diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index d6402cf911817b1b8c17da91019a1fac19bf051a..825be2cd3a6b8e39f7bbfbe20f4f5ae43e408ce7 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -632,9 +632,39 @@ sort ordering was important; and so using the same PathKey for both sort orderings doesn't create any real problem. +Order of processing for EquivalenceClasses and PathKeys +------------------------------------------------------- + +As alluded to above, there is a specific sequence of phases in the +processing of EquivalenceClasses and PathKeys during planning. During the +initial scanning of the query's quals (deconstruct_jointree followed by +reconsider_outer_join_clauses), we construct EquivalenceClasses based on +mergejoinable clauses found in the quals. At the end of this process, +we know all we can know about equivalence of different variables, so +subsequently there will be no further merging of EquivalenceClasses. +At that point it is possible to consider the EquivalenceClasses as +"canonical" and build canonical PathKeys that reference them. Before +we reach that point (actually, before entering query_planner at all) +we also ensure that we have constructed EquivalenceClasses for all the +expressions used in the query's ORDER BY and related clauses. These +classes might or might not get merged together, depending on what we +find in the quals. + +Because all the EquivalenceClasses are known before we begin path +generation, we can use them as a guide to which indexes are of interest: +if an index's column is not mentioned in any EquivalenceClass then that +index's sort order cannot possibly be helpful for the query. This allows +short-circuiting of much of the processing of create_index_paths() for +irrelevant indexes. + +There are some cases where planner.c constructs additional +EquivalenceClasses and PathKeys after query_planner has completed. +In these cases, the extra ECs/PKs are needed to represent sort orders +that were not considered during query_planner. Such situations should be +minimized since it is impossible for query_planner to return a plan +producing such a sort order, meaning a explicit sort will always be needed. +Currently this happens only for queries involving multiple window functions +with different orderings, for which extra sorts are needed anyway. -Though Bob Devine <bob.devine@worldnet.att.net> was not involved in the -coding of our optimizer, he is available to field questions about -optimizer topics. -- bjm & tgl diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index e44e960b5454d4698ed82e4e857794ffe2a9adf2..b646aa4c9e61e5d1263c14352bc05933e6f6ac2b 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -78,6 +78,10 @@ static bool reconsider_full_join_clause(PlannerInfo *root, * join. (This is the reason why we need a failure return. It's more * convenient to check this case here than at the call sites...) * + * On success return, we have also initialized the clause's left_ec/right_ec + * fields to point to the EquivalenceClass representing it. This saves lookup + * effort later. + * * Note: constructing merged EquivalenceClasses is a standard UNION-FIND * problem, for which there exist better data structures than simple lists. * If this code ever proves to be a bottleneck then it could be sped up --- @@ -106,6 +110,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, *em2; ListCell *lc1; + /* Should not already be marked as having generated an eclass */ + Assert(restrictinfo->left_ec == NULL); + Assert(restrictinfo->right_ec == NULL); + /* Extract info from given clause */ Assert(is_opclause(clause)); opno = ((OpExpr *) clause)->opno; @@ -236,8 +244,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, { ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec1; + restrictinfo->right_ec = ec1; /* mark the RI as usable with this pair of EMs */ - /* NB: can't set left_ec/right_ec until merging is finished */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; return true; @@ -266,6 +276,9 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, ec2->ec_relids = NULL; ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec1; + restrictinfo->right_ec = ec1; /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; @@ -276,6 +289,9 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, em2 = add_eq_member(ec1, item2, item2_relids, false, item2_type); ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec1; + restrictinfo->right_ec = ec1; /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; @@ -286,6 +302,9 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, em1 = add_eq_member(ec2, item1, item1_relids, false, item1_type); ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); ec2->ec_below_outer_join |= below_outer_join; + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec2; + restrictinfo->right_ec = ec2; /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; @@ -311,6 +330,9 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, root->eq_classes = lappend(root->eq_classes, ec); + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec; + restrictinfo->right_ec = ec; /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; @@ -362,15 +384,19 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, /* * get_eclass_for_sort_expr * Given an expression and opfamily info, find an existing equivalence - * class it is a member of; if none, build a new single-member + * class it is a member of; if none, optionally build a new single-member * EquivalenceClass for it. * * sortref is the SortGroupRef of the originating SortGroupClause, if any, * or zero if not. (It should never be zero if the expression is volatile!) * + * If create_it is TRUE, we'll build a new EquivalenceClass when there is no + * match. If create_it is FALSE, we just return NULL when no match. + * * This can be used safely both before and after EquivalenceClass merging; * since it never causes merging it does not invalidate any existing ECs - * or PathKeys. + * or PathKeys. However, ECs added after path generation has begun are + * of limited usefulness, so usually it's best to create them beforehand. * * Note: opfamilies must be chosen consistently with the way * process_equivalence() would do; that is, generated from a mergejoinable @@ -382,7 +408,8 @@ get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, Oid expr_datatype, List *opfamilies, - Index sortref) + Index sortref, + bool create_it) { EquivalenceClass *newec; EquivalenceMember *newem; @@ -426,8 +453,12 @@ get_eclass_for_sort_expr(PlannerInfo *root, } } + /* No match; does caller want a NULL result? */ + if (!create_it) + return NULL; + /* - * No match, so build a new single-member EC + * OK, build a new single-member EC * * Here, we must be sure that we construct the EC in the right context. We * can assume, however, that the passed expr is long-lived. @@ -1094,8 +1125,8 @@ create_join_clause(PlannerInfo *root, rinfo->parent_ec = parent_ec; /* - * We can set these now, rather than letting them be looked up later, - * since this is only used after EC merging is complete. + * We know the correct values for left_ec/right_ec, ie this particular EC, + * so we can just set them directly instead of forcing another lookup. */ rinfo->left_ec = ec; rinfo->right_ec = ec; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index ab3b9cd394d9af54bcd59e9830a9fbcc0a773fa8..bea134b3f1a02db8c611a6fc46d5bba041961682 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -1041,7 +1041,7 @@ select_mergejoin_clauses(PlannerInfo *root, * mergejoin is not really all that big a deal, and so it's not clear * that improving this is important. */ - cache_mergeclause_eclasses(root, restrictinfo); + update_mergeclause_eclasses(root, restrictinfo); if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 643d57a92dc83f6adf4e2d1da223909528d59805..20c6d73617d507b88fce427a681320a684b47cd0 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -40,6 +40,7 @@ static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, Oid ordering_op, bool nulls_first, Index sortref, + bool create_it, bool canonicalize); static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, AttrNumber varattno); @@ -230,6 +231,10 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) * If the PathKey is being generated from a SortGroupClause, sortref should be * the SortGroupClause's SortGroupRef; otherwise zero. * + * create_it is TRUE if we should create any missing EquivalenceClass + * needed to represent the sort key. If it's FALSE, we return NULL if the + * sort key isn't already present in any EquivalenceClass. + * * canonicalize should always be TRUE after EquivalenceClass merging has * been performed, but FALSE if we haven't done EquivalenceClass merging yet. */ @@ -238,6 +243,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, Oid ordering_op, bool nulls_first, Index sortref, + bool create_it, bool canonicalize) { Oid opfamily, @@ -300,9 +306,13 @@ make_pathkey_from_sortinfo(PlannerInfo *root, COERCE_DONTCARE); } - /* Now find or create a matching EquivalenceClass */ + /* Now find or (optionally) create a matching EquivalenceClass */ eclass = get_eclass_for_sort_expr(root, expr, opcintype, opfamilies, - sortref); + sortref, create_it); + + /* Fail if no EC and !create_it */ + if (!eclass) + return NULL; /* And finally we can find or create a PathKey node */ if (canonicalize) @@ -478,8 +488,11 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, * The result is canonical, meaning that redundant pathkeys are removed; * it may therefore have fewer entries than there are index columns. * - * We generate the full pathkeys list whether or not all are useful for the - * current query. Caller should do truncate_useless_pathkeys(). + * Another reason for stopping early is that we may be able to tell that + * an index column's sort order is uninteresting for this query. However, + * that test is just based on the existence of an EquivalenceClass and not + * on position in pathkey lists, so it's not complete. Caller should call + * truncate_useless_pathkeys() to possibly remove more pathkeys. */ List * build_index_pathkeys(PlannerInfo *root, @@ -527,14 +540,23 @@ build_index_pathkeys(PlannerInfo *root, indexprs_item = lnext(indexprs_item); } - /* OK, make a canonical pathkey for this sort key */ + /* OK, try to make a canonical pathkey for this sort key */ cpathkey = make_pathkey_from_sortinfo(root, indexkey, sortop, nulls_first, 0, + false, true); + /* + * If the sort key isn't already present in any EquivalenceClass, + * then it's not an interesting sort order for this query. So + * we can stop now --- lower-order sort keys aren't useful either. + */ + if (!cpathkey) + break; + /* Add to list unless redundant */ if (!pathkey_is_redundant(cpathkey, retval)) retval = lappend(retval, cpathkey); @@ -644,13 +666,20 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, outer_expr, sub_member->em_datatype, sub_eclass->ec_opfamilies, - 0); - best_pathkey = - make_canonical_pathkey(root, - outer_ec, - sub_pathkey->pk_opfamily, - sub_pathkey->pk_strategy, - sub_pathkey->pk_nulls_first); + 0, + false); + + /* + * If we don't find a matching EC, sub-pathkey isn't + * interesting to the outer query + */ + if (outer_ec) + best_pathkey = + make_canonical_pathkey(root, + outer_ec, + sub_pathkey->pk_opfamily, + sub_pathkey->pk_strategy, + sub_pathkey->pk_nulls_first); } } else @@ -738,7 +767,16 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, outer_expr, sub_member->em_datatype, sub_eclass->ec_opfamilies, - 0); + 0, + false); + + /* + * If we don't find a matching EC, this sub-pathkey isn't + * interesting to the outer query + */ + if (!outer_ec) + continue; + outer_pk = make_canonical_pathkey(root, outer_ec, sub_pathkey->pk_opfamily, @@ -859,6 +897,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, sortcl->sortop, sortcl->nulls_first, sortcl->tleSortGroupRef, + true, canonicalize); /* Canonical form eliminates redundant ordering keys */ @@ -878,46 +917,81 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, ****************************************************************************/ /* - * cache_mergeclause_eclasses - * Make the cached EquivalenceClass links valid in a mergeclause - * restrictinfo. + * initialize_mergeclause_eclasses + * Set the EquivalenceClass links in a mergeclause restrictinfo. * * RestrictInfo contains fields in which we may cache pointers to * EquivalenceClasses for the left and right inputs of the mergeclause. * (If the mergeclause is a true equivalence clause these will be the - * same EquivalenceClass, otherwise not.) + * same EquivalenceClass, otherwise not.) If the mergeclause is either + * used to generate an EquivalenceClass, or derived from an EquivalenceClass, + * then it's easy to set up the left_ec and right_ec members --- otherwise, + * this function should be called to set them up. We will generate new + * EquivalenceClauses if necessary to represent the mergeclause's left and + * right sides. + * + * Note this is called before EC merging is complete, so the links won't + * necessarily point to canonical ECs. Before they are actually used for + * anything, update_mergeclause_eclasses must be called to ensure that + * they've been updated to point to canonical ECs. */ void -cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) +initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) { + Expr *clause = restrictinfo->clause; + Oid lefttype, + righttype; + + /* Should be a mergeclause ... */ Assert(restrictinfo->mergeopfamilies != NIL); + /* ... with links not yet set */ + Assert(restrictinfo->left_ec == NULL); + Assert(restrictinfo->right_ec == NULL); + + /* Need the declared input types of the operator */ + op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype); + + /* Find or create a matching EquivalenceClass for each side */ + restrictinfo->left_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_leftop(clause), + lefttype, + restrictinfo->mergeopfamilies, + 0, + true); + restrictinfo->right_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_rightop(clause), + righttype, + restrictinfo->mergeopfamilies, + 0, + true); +} - /* the cached values should be either both set or both not */ - if (restrictinfo->left_ec == NULL) - { - Expr *clause = restrictinfo->clause; - Oid lefttype, - righttype; - - /* Need the declared input types of the operator */ - op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype); - - /* Find or create a matching EquivalenceClass for each side */ - restrictinfo->left_ec = - get_eclass_for_sort_expr(root, - (Expr *) get_leftop(clause), - lefttype, - restrictinfo->mergeopfamilies, - 0); - restrictinfo->right_ec = - get_eclass_for_sort_expr(root, - (Expr *) get_rightop(clause), - righttype, - restrictinfo->mergeopfamilies, - 0); - } - else - Assert(restrictinfo->right_ec != NULL); +/* + * update_mergeclause_eclasses + * Make the cached EquivalenceClass links valid in a mergeclause + * restrictinfo. + * + * These pointers should have been set by process_equivalence or + * initialize_mergeclause_eclasses, but they might have been set to + * non-canonical ECs that got merged later. Chase up to the canonical + * merged parent if so. + */ +void +update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) +{ + /* Should be a merge clause ... */ + Assert(restrictinfo->mergeopfamilies != NIL); + /* ... with pointers already set */ + Assert(restrictinfo->left_ec != NULL); + Assert(restrictinfo->right_ec != NULL); + + /* Chase up to the top as needed */ + while (restrictinfo->left_ec->ec_merged) + restrictinfo->left_ec = restrictinfo->left_ec->ec_merged; + while (restrictinfo->right_ec->ec_merged) + restrictinfo->right_ec = restrictinfo->right_ec->ec_merged; } /* @@ -953,7 +1027,7 @@ find_mergeclauses_for_pathkeys(PlannerInfo *root, { RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); - cache_mergeclause_eclasses(root, rinfo); + update_mergeclause_eclasses(root, rinfo); } foreach(i, pathkeys) @@ -1089,7 +1163,7 @@ select_outer_pathkeys_for_merge(PlannerInfo *root, ListCell *lc2; /* get the outer eclass */ - cache_mergeclause_eclasses(root, rinfo); + update_mergeclause_eclasses(root, rinfo); if (rinfo->outer_is_left) oeclass = rinfo->left_ec; @@ -1250,7 +1324,7 @@ make_inner_pathkeys_for_merge(PlannerInfo *root, EquivalenceClass *ieclass; PathKey *pathkey; - cache_mergeclause_eclasses(root, rinfo); + update_mergeclause_eclasses(root, rinfo); if (rinfo->outer_is_left) { @@ -1366,7 +1440,7 @@ pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys) if (restrictinfo->mergeopfamilies == NIL) continue; - cache_mergeclause_eclasses(root, restrictinfo); + update_mergeclause_eclasses(root, restrictinfo); if (pathkey->pk_eclass == restrictinfo->left_ec || pathkey->pk_eclass == restrictinfo->right_ec) diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index a2ff314679daa30003d59d04ec01066049e1cb13..77ebadd5cc6a634f5e07886c19a3e4c71dbe3c31 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1066,6 +1066,12 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * * If none of the above hold, pass it off to * distribute_restrictinfo_to_rels(). + * + * In all cases, it's important to initialize the left_ec and right_ec + * fields of a mergejoinable clause, so that all possibly mergejoinable + * expressions have representations in EquivalenceClasses. If + * process_equivalence is successful, it will take care of that; + * otherwise, we have to call initialize_mergeclause_eclasses to do it. */ if (restrictinfo->mergeopfamilies) { @@ -1073,10 +1079,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, { if (process_equivalence(root, restrictinfo, below_outer_join)) return; - /* EC rejected it, so pass to distribute_restrictinfo_to_rels */ + /* EC rejected it, so set left_ec/right_ec the hard way ... */ + initialize_mergeclause_eclasses(root, restrictinfo); + /* ... and fall through to distribute_restrictinfo_to_rels */ } else if (maybe_outer_join && restrictinfo->can_join) { + /* we need to set up left_ec/right_ec the hard way */ + initialize_mergeclause_eclasses(root, restrictinfo); + /* now see if it should go to any outer-join lists */ if (bms_is_subset(restrictinfo->left_relids, outerjoin_nonnullable) && !bms_overlap(restrictinfo->right_relids, @@ -1104,6 +1115,12 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, restrictinfo); return; } + /* nope, so fall through to distribute_restrictinfo_to_rels */ + } + else + { + /* we still need to set up left_ec/right_ec */ + initialize_mergeclause_eclasses(root, restrictinfo); } } @@ -1404,6 +1421,9 @@ process_implied_equality(PlannerInfo *root, * * This overlaps the functionality of process_implied_equality(), but we * must return the RestrictInfo, not push it into the joininfo tree. + * + * Note: we do not do initialize_mergeclause_eclasses() here. It is + * caller's responsibility that left_ec/right_ec be set as necessary. */ RestrictInfo * build_implied_join_equality(Oid opno, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 5f628eeeb96174a3c9051257e235baff43efcacd..6bce53cce57d5c3d8551ad12c371e36100c009ff 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -116,7 +116,8 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, Oid expr_datatype, List *opfamilies, - Index sortref); + Index sortref, + bool create_it); extern void generate_base_implied_equalities(PlannerInfo *root); extern List *generate_join_implied_equalities(PlannerInfo *root, RelOptInfo *joinrel, @@ -172,7 +173,9 @@ extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist, bool canonicalize); -extern void cache_mergeclause_eclasses(PlannerInfo *root, +extern void initialize_mergeclause_eclasses(PlannerInfo *root, + RestrictInfo *restrictinfo); +extern void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); extern List *find_mergeclauses_for_pathkeys(PlannerInfo *root, List *pathkeys,