From 54d20024c1dad339acc8624d7ca902b762fe0844 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Wed, 21 Mar 2007 22:18:12 +0000 Subject: [PATCH] Fix some problems with selectivity estimation for partial indexes. First, genericcostestimate() was being way too liberal about including partial-index conditions in its selectivity estimate, resulting in substantial underestimates for situations such as an indexqual "x = 42" used with an index on x "WHERE x >= 40 AND x < 50". While the code is intentionally set up to favor selecting partial indexes when available, this was too much... Second, choose_bitmap_and() was likewise easily fooled by cases of this type, since it would similarly think that the partial index had selectivity independent of the indexqual. Fixed by using predicate_implied_by() rather than simple equality checks to determine redundancy. This is a good deal more expensive but I don't see much alternative. At least the extra cost is only paid when there's actually a partial index under consideration. Per report from Jeff Davis. I'm not going to risk back-patching this, though. --- src/backend/optimizer/path/indxpath.c | 109 ++++++++++++++++++-------- src/backend/utils/adt/selfuncs.c | 51 ++++++------ src/test/regress/expected/select.out | 9 ++- src/test/regress/sql/select.sql | 11 ++- 4 files changed, 120 insertions(+), 60 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 04e029beb28..7197658ae9b 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.217 2007/03/17 00:11:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.218 2007/03/21 22:18:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,8 +57,8 @@ static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, static int bitmap_path_comparator(const void *a, const void *b); static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths, RelOptInfo *outer_rel); -static List *pull_indexpath_quals(Path *bitmapqual); -static bool lists_intersect_ptr(List *list1, List *list2); +static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds); +static bool lists_intersect(List *list1, List *list2); static bool match_clause_to_indexcol(IndexOptInfo *index, int indexcol, Oid opfamily, RestrictInfo *rinfo, @@ -562,6 +562,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, Path **patharray; Cost costsofar; List *qualsofar; + List *firstpred; ListCell *lastcell; int i; ListCell *l; @@ -586,8 +587,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, * consider an index redundant if any of its index conditions were already * used by earlier indexes. (We could use predicate_implied_by to have a * more intelligent, but much more expensive, check --- but in most cases - * simple pointer equality should suffice, since after all the index - * conditions are all coming from the same RestrictInfo lists.) + * simple equality should suffice.) * * You might think the condition for redundancy should be "all index * conditions already used", not "any", but this turns out to be wrong. @@ -597,10 +597,27 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, * non-selective. In any case, we'd surely be drastically misestimating * the selectivity if we count the same condition twice. * - * We include index predicate conditions in the redundancy test. Because - * the test is just for pointer equality and not equal(), the effect is - * that use of the same partial index in two different AND elements is - * considered redundant. (XXX is this too strong?) + * We must also consider index predicate conditions in checking for + * redundancy, because the estimated selectivity of a partial index + * includes its predicate even if the explicit index conditions don't. + * Here we have to work harder than just checking expression equality: + * we check to see if any of the predicate clauses are implied by + * index conditions or predicate clauses of previous paths. This covers + * cases such as a condition "x = 42" used with a plain index, followed + * by a clauseless scan of a partial index "WHERE x >= 40 AND x < 50". + * Also, we reject indexes that have a qual condition matching any + * previously-used index's predicate (by including predicate conditions + * into qualsofar). It should be sufficient to check equality in this + * case, not implication, since we've sorted the paths by selectivity + * and so tighter conditions are seen first --- only for exactly equal + * cases might the partial index come first. + * + * XXX the reason we need all these redundancy checks is that costsize.c + * and clausesel.c aren't very smart about redundant clauses: they will + * usually double-count the redundant clauses, producing a too-small + * selectivity that makes a redundant AND look like it reduces the total + * cost. Perhaps someday that code will be smarter and we can remove + * these heuristics. * * Note: outputting the selected sub-paths in selectivity order is a good * thing even if we weren't using that as part of the selection method, @@ -619,18 +636,38 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, paths = list_make1(patharray[0]); costsofar = bitmap_and_cost_est(root, rel, paths, outer_rel); - qualsofar = pull_indexpath_quals(patharray[0]); + find_indexpath_quals(patharray[0], &qualsofar, &firstpred); + qualsofar = list_concat(qualsofar, firstpred); lastcell = list_head(paths); /* for quick deletions */ for (i = 1; i < npaths; i++) { Path *newpath = patharray[i]; List *newqual; + List *newpred; Cost newcost; - newqual = pull_indexpath_quals(newpath); - if (lists_intersect_ptr(newqual, qualsofar)) + find_indexpath_quals(newpath, &newqual, &newpred); + if (lists_intersect(newqual, qualsofar)) continue; /* consider it redundant */ + if (newpred) + { + bool redundant = false; + + /* we check each predicate clause separately */ + foreach(l, newpred) + { + Node *np = (Node *) lfirst(l); + + if (predicate_implied_by(list_make1(np), qualsofar)) + { + redundant = true; + break; /* out of inner loop */ + } + } + if (redundant) + continue; + } /* tentatively add newpath to paths, so we can estimate cost */ paths = lappend(paths, newpath); newcost = bitmap_and_cost_est(root, rel, paths, outer_rel); @@ -638,7 +675,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, { /* keep newpath in paths, update subsidiary variables */ costsofar = newcost; - qualsofar = list_concat(qualsofar, newqual); + qualsofar = list_concat(list_concat(qualsofar, newqual), newpred); lastcell = lnext(lastcell); } else @@ -710,35 +747,39 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, } /* - * pull_indexpath_quals + * find_indexpath_quals * - * Given the Path structure for a plain or bitmap indexscan, extract a list + * Given the Path structure for a plain or bitmap indexscan, extract lists * of all the indexquals and index predicate conditions used in the Path. * * This is sort of a simplified version of make_restrictinfo_from_bitmapqual; * here, we are not trying to produce an accurate representation of the AND/OR * semantics of the Path, but just find out all the base conditions used. * - * The result list contains pointers to the expressions used in the Path, + * The result lists contain pointers to the expressions used in the Path, * but all the list cells are freshly built, so it's safe to destructively - * modify the list (eg, by concat'ing it with other lists). + * modify the lists (eg, by concat'ing with other lists). */ -static List * -pull_indexpath_quals(Path *bitmapqual) +static void +find_indexpath_quals(Path *bitmapqual, List **quals, List **preds) { - List *result = NIL; ListCell *l; + *quals = NIL; + *preds = NIL; + if (IsA(bitmapqual, BitmapAndPath)) { BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; foreach(l, apath->bitmapquals) { - List *sublist; + List *subquals; + List *subpreds; - sublist = pull_indexpath_quals((Path *) lfirst(l)); - result = list_concat(result, sublist); + find_indexpath_quals((Path *) lfirst(l), &subquals, &subpreds); + *quals = list_concat(*quals, subquals); + *preds = list_concat(*preds, subpreds); } } else if (IsA(bitmapqual, BitmapOrPath)) @@ -747,36 +788,36 @@ pull_indexpath_quals(Path *bitmapqual) foreach(l, opath->bitmapquals) { - List *sublist; + List *subquals; + List *subpreds; - sublist = pull_indexpath_quals((Path *) lfirst(l)); - result = list_concat(result, sublist); + find_indexpath_quals((Path *) lfirst(l), &subquals, &subpreds); + *quals = list_concat(*quals, subquals); + *preds = list_concat(*preds, subpreds); } } else if (IsA(bitmapqual, IndexPath)) { IndexPath *ipath = (IndexPath *) bitmapqual; - result = get_actual_clauses(ipath->indexclauses); - result = list_concat(result, list_copy(ipath->indexinfo->indpred)); + *quals = get_actual_clauses(ipath->indexclauses); + *preds = list_copy(ipath->indexinfo->indpred); } else elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); - - return result; } /* - * lists_intersect_ptr + * lists_intersect * Detect whether two lists have a nonempty intersection, - * using pointer equality to compare members. + * using equal() to compare members. * * This possibly should go into list.c, but it doesn't yet have any use * except in choose_bitmap_and. */ static bool -lists_intersect_ptr(List *list1, List *list2) +lists_intersect(List *list1, List *list2) { ListCell *cell1; @@ -787,7 +828,7 @@ lists_intersect_ptr(List *list1, List *list2) foreach(cell2, list2) { - if (lfirst(cell2) == datum1) + if (equal(lfirst(cell2), datum1)) return true; } } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index df61fea567b..a92317aeac1 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.229 2007/03/17 00:11:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.230 2007/03/21 22:18:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -86,6 +86,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/plancat.h" +#include "optimizer/predtest.h" #include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "parser/parse_coerce.h" @@ -4775,36 +4776,38 @@ genericcostestimate(PlannerInfo *root, List *selectivityQuals; ListCell *l; - /* + /*---------- * If the index is partial, AND the index predicate with the explicitly * given indexquals to produce a more accurate idea of the index - * selectivity. This may produce redundant clauses. We get rid of exact - * duplicates in the code below. We expect that most cases of partial - * redundancy (such as "x < 4" from the qual and "x < 5" from the - * predicate) will be recognized and handled correctly by - * clauselist_selectivity(). This assumption is somewhat fragile, since - * it depends on predicate_implied_by() and clauselist_selectivity() - * having similar capabilities, and there are certainly many cases where - * we will end up with a too-low selectivity estimate. This will bias the - * system in favor of using partial indexes where possible, which is not - * necessarily a bad thing. But it'd be nice to do better someday. + * selectivity. However, we need to be careful not to insert redundant + * clauses, because clauselist_selectivity() is easily fooled into + * computing a too-low selectivity estimate. Our approach is to add + * only the index predicate clause(s) that cannot be proven to be implied + * by the given indexquals. This successfully handles cases such as a + * qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50". + * There are many other cases where we won't detect redundancy, leading + * to a too-low selectivity estimate, which will bias the system in favor + * of using partial indexes where possible. That is not necessarily bad + * though. * - * Note that index->indpred and indexQuals are both in implicit-AND form, - * so ANDing them together just takes merging the lists. However, - * eliminating duplicates is a bit trickier because indexQuals contains - * RestrictInfo nodes and the indpred does not. It is okay to pass a - * mixed list to clauselist_selectivity, but we have to work a bit to - * generate a list without logical duplicates. (We could just list_union - * indpred and strippedQuals, but then we'd not get caching of per-qual - * selectivity estimates.) + * Note that indexQuals contains RestrictInfo nodes while the indpred + * does not. This is OK for both predicate_implied_by() and + * clauselist_selectivity(). + *---------- */ if (index->indpred != NIL) { - List *strippedQuals; - List *predExtraQuals; + List *predExtraQuals = NIL; + + foreach(l, index->indpred) + { + Node *predQual = (Node *) lfirst(l); + List *oneQual = list_make1(predQual); - strippedQuals = get_actual_clauses(indexQuals); - predExtraQuals = list_difference(index->indpred, strippedQuals); + if (!predicate_implied_by(oneQual, indexQuals)) + predExtraQuals = list_concat(predExtraQuals, oneQual); + } + /* list_concat avoids modifying the passed-in indexQuals list */ selectivityQuals = list_concat(predExtraQuals, indexQuals); } else diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out index 0b3f546bdfb..d58c8d2811b 100644 --- a/src/test/regress/expected/select.out +++ b/src/test/regress/expected/select.out @@ -209,9 +209,13 @@ SELECT onek.unique1, onek.string4 FROM onek -- test partial btree indexes -- -- As of 7.2, planner probably won't pick an indexscan without stats, --- so ANALYZE first. +-- so ANALYZE first. Also, we want to prevent it from picking a bitmapscan +-- followed by sort, because that could hide index ordering problems. -- ANALYZE onek2; +SET enable_seqscan TO off; +SET enable_bitmapscan TO off; +SET enable_sort TO off; -- -- awk '{if($1<10){print $0;}else{next;}}' onek.data | sort +0n -1 -- @@ -288,6 +292,9 @@ SELECT onek2.unique1, onek2.stringu1 FROM onek2 999 | LMAAAA (19 rows) +RESET enable_seqscan; +RESET enable_bitmapscan; +RESET enable_sort; SELECT two, stringu1, ten, string4 INTO TABLE tmp FROM onek; diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql index f23cccd24f9..8c92168c9b8 100644 --- a/src/test/regress/sql/select.sql +++ b/src/test/regress/sql/select.sql @@ -59,10 +59,15 @@ SELECT onek.unique1, onek.string4 FROM onek -- test partial btree indexes -- -- As of 7.2, planner probably won't pick an indexscan without stats, --- so ANALYZE first. +-- so ANALYZE first. Also, we want to prevent it from picking a bitmapscan +-- followed by sort, because that could hide index ordering problems. -- ANALYZE onek2; +SET enable_seqscan TO off; +SET enable_bitmapscan TO off; +SET enable_sort TO off; + -- -- awk '{if($1<10){print $0;}else{next;}}' onek.data | sort +0n -1 -- @@ -81,6 +86,10 @@ SELECT onek2.unique1, onek2.stringu1 FROM onek2 SELECT onek2.unique1, onek2.stringu1 FROM onek2 WHERE onek2.unique1 > 980; +RESET enable_seqscan; +RESET enable_bitmapscan; +RESET enable_sort; + SELECT two, stringu1, ten, string4 INTO TABLE tmp -- GitLab