diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 5566b74b0c197a64b5f22bc02948b283bbbe8b13..2e53294e2320a0994b6279c0e568af1c6dede1b2 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.70 2004/08/29 05:06:43 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.71 2004/11/09 00:34:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -80,9 +80,10 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause, * interpreting it as a value. Then the available range is 1-losel to hisel. * However, this calculation double-excludes nulls, so really we need * hisel + losel + null_frac - 1.) - * If the calculation yields zero or negative, however, we chicken out and - * use a default estimate; that probably means that one or both - * selectivities is a default estimate rather than an actual range value. + * + * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation + * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation + * yields an impossible (negative) result. * * A free side-effect is that we can recognize redundant inequalities such * as "x < 4 AND x < 5"; only the tighter constraint will be counted. @@ -194,37 +195,51 @@ clauselist_selectivity(Query *root, if (rqlist->have_lobound && rqlist->have_hibound) { /* Successfully matched a pair of range clauses */ - Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0; - - /* Adjust for double-exclusion of NULLs */ - s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid); + Selectivity s2; /* - * A zero or slightly negative s2 should be converted into a - * small positive value; we probably are dealing with a very - * tight range and got a bogus result due to roundoff errors. - * However, if s2 is very negative, then we probably have - * default selectivity estimates on one or both sides of the - * range. In that case, insert a not-so-wildly-optimistic - * default estimate. + * Exact equality to the default value probably means the + * selectivity function punted. This is not airtight but + * should be good enough. */ - if (s2 <= 0.0) + if (rqlist->hibound == DEFAULT_INEQ_SEL || + rqlist->lobound == DEFAULT_INEQ_SEL) { - if (s2 < -0.01) - { - /* - * No data available --- use a default estimate that - * is small, but not real small. - */ - s2 = 0.005; - } - else + s2 = DEFAULT_RANGE_INEQ_SEL; + } + else + { + s2 = rqlist->hibound + rqlist->lobound - 1.0; + + /* Adjust for double-exclusion of NULLs */ + s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid); + + /* + * A zero or slightly negative s2 should be converted into a + * small positive value; we probably are dealing with a very + * tight range and got a bogus result due to roundoff errors. + * However, if s2 is very negative, then we probably have + * default selectivity estimates on one or both sides of the + * range that we failed to recognize above for some reason. + */ + if (s2 <= 0.0) { - /* - * It's just roundoff error; use a small positive - * value - */ - s2 = 1.0e-10; + if (s2 < -0.01) + { + /* + * No data available --- use a default estimate that + * is small, but not real small. + */ + s2 = DEFAULT_RANGE_INEQ_SEL; + } + else + { + /* + * It's just roundoff error; use a small positive + * value + */ + s2 = 1.0e-10; + } } } /* Merge in the selectivity of the pair of clauses */ diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 24759bf5c0965d178a9df0d4cd2c9dfce0f516f7..a3f782895b9b7f765045409042adc20cbc43dc64 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -111,45 +111,6 @@ #include "utils/syscache.h" -/* - * Note: the default selectivity estimates are not chosen entirely at random. - * We want them to be small enough to ensure that indexscans will be used if - * available, for typical table densities of ~100 tuples/page. Thus, for - * example, 0.01 is not quite small enough, since that makes it appear that - * nearly all pages will be hit anyway. Also, since we sometimes estimate - * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal - * 1/DEFAULT_EQ_SEL. - */ - -/* default selectivity estimate for equalities such as "A = b" */ -#define DEFAULT_EQ_SEL 0.005 - -/* default selectivity estimate for inequalities such as "A < b" */ -#define DEFAULT_INEQ_SEL (1.0 / 3.0) - -/* default selectivity estimate for pattern-match operators such as LIKE */ -#define DEFAULT_MATCH_SEL 0.005 - -/* default number of distinct values in a table */ -#define DEFAULT_NUM_DISTINCT 200 - -/* default selectivity estimate for boolean and null test nodes */ -#define DEFAULT_UNK_SEL 0.005 -#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL) - -/* - * Clamp a computed probability estimate (which may suffer from roundoff or - * estimation errors) to valid range. Argument must be a float variable. - */ -#define CLAMP_PROBABILITY(p) \ - do { \ - if (p < 0.0) \ - p = 0.0; \ - else if (p > 1.0) \ - p = 1.0; \ - } while (0) - - /* Return data from examine_variable and friends */ typedef struct { diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 84edb000f478c845278dcf69df8a58678f189a0c..185f848f5844fe598118888902aa6109a11e24b3 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.19 2004/08/29 05:06:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.20 2004/11/09 00:34:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,49 @@ #include "nodes/parsenodes.h" +/* + * Note: the default selectivity estimates are not chosen entirely at random. + * We want them to be small enough to ensure that indexscans will be used if + * available, for typical table densities of ~100 tuples/page. Thus, for + * example, 0.01 is not quite small enough, since that makes it appear that + * nearly all pages will be hit anyway. Also, since we sometimes estimate + * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal + * 1/DEFAULT_EQ_SEL. + */ + +/* default selectivity estimate for equalities such as "A = b" */ +#define DEFAULT_EQ_SEL 0.005 + +/* default selectivity estimate for inequalities such as "A < b" */ +#define DEFAULT_INEQ_SEL 0.3333333333333333 + +/* default selectivity estimate for range inequalities "A > b AND A < c" */ +#define DEFAULT_RANGE_INEQ_SEL 0.005 + +/* default selectivity estimate for pattern-match operators such as LIKE */ +#define DEFAULT_MATCH_SEL 0.005 + +/* default number of distinct values in a table */ +#define DEFAULT_NUM_DISTINCT 200 + +/* default selectivity estimate for boolean and null test nodes */ +#define DEFAULT_UNK_SEL 0.005 +#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL) + + +/* + * Clamp a computed probability estimate (which may suffer from roundoff or + * estimation errors) to valid range. Argument must be a float variable. + */ +#define CLAMP_PROBABILITY(p) \ + do { \ + if (p < 0.0) \ + p = 0.0; \ + else if (p > 1.0) \ + p = 1.0; \ + } while (0) + + typedef enum { Pattern_Type_Like, Pattern_Type_Like_IC,