diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index c438d974c259926ceaceffcf9a4372c3fda5ef78..1bc6d15a3e83eaa1e70de86eb553e7436e3ec44c 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.8 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.9 2008/01/09 20:42:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -52,10 +52,10 @@ static RestrictInfo *create_join_clause(PlannerInfo *root, EquivalenceMember *leftem, EquivalenceMember *rightem, EquivalenceClass *parent_ec); -static void reconsider_outer_join_clause(PlannerInfo *root, +static bool reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, bool outer_on_left); -static void reconsider_full_join_clause(PlannerInfo *root, +static bool reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo); @@ -1094,8 +1094,9 @@ create_join_clause(PlannerInfo *root, /* * reconsider_outer_join_clauses * Re-examine any outer-join clauses that were set aside by - * distribute_qual_to_rels(), and either create EquivalenceClasses - * to replace them or push them out into the regular join-clause lists. + * distribute_qual_to_rels(), and see if we can derive any + * EquivalenceClasses from them. Then, if they were not made + * redundant, push them out into the regular join-clause lists. * * When we have mergejoinable clauses A = B that are outer-join clauses, * we can't blindly combine them with other clauses A = C to deduce B = C, @@ -1112,8 +1113,8 @@ create_join_clause(PlannerInfo *root, * equivalence clause.) * * Note that the above rule does not work for full outer joins; nor is it - * very interesting to consider cases where the equivalence clause involves - * relations entirely outside the outer join, since such clauses couldn't + * very interesting to consider cases where the generated equivalence clause + * would involve relations outside the outer join, since such clauses couldn't * be pushed into the inner side's scan anyway. So the restriction to * outervar = pseudoconstant is not really giving up anything. * @@ -1141,57 +1142,161 @@ create_join_clause(PlannerInfo *root, * that could result in propagating constant restrictions from * INNERVAR to OUTERVAR, which would be very wrong. * + * It's possible that the INNERVAR is also an OUTERVAR for some other + * outer-join clause, in which case the process can be repeated. So we repeat + * looping over the lists of clauses until no further deductions can be made. + * Whenever we do make a deduction, we remove the generating clause from the + * lists, since we don't want to make the same deduction twice. + * * If we don't find any match for a set-aside outer join clause, we must * throw it back into the regular joinclause processing by passing it to - * distribute_restrictinfo_to_rels(). + * distribute_restrictinfo_to_rels(). If we do generate a derived clause, + * however, the outer-join clause is redundant. We still throw it back, + * because otherwise the join will be seen as a clauseless join and avoided + * during join order searching; but we mark it as redundant to keep from + * messing up the joinrel's size estimate. (This behavior means that the + * API for this routine is uselessly complex: we could have just put all + * the clauses into the regular processing initially. We keep it because + * someday we might want to do something else, such as inserting "dummy" + * joinclauses instead of real ones.) + * + * Outer join clauses that are marked outerjoin_delayed are special: this + * condition means that one or both VARs might go to null due to a lower + * outer join. We can still push a constant through the clause, but only + * if its operator is strict; and we *have to* throw the clause back into + * regular joinclause processing. By keeping the strict join clause, + * we ensure that any null-extended rows that are mistakenly generated due + * to suppressing rows not matching the constant will be rejected at the + * upper outer join. (This doesn't work for full-join clauses.) */ void reconsider_outer_join_clauses(PlannerInfo *root) { - ListCell *lc; + bool found; + ListCell *cell; + ListCell *prev; + ListCell *next; + + /* Outer loop repeats until we find no more deductions */ + do + { + found = false; + + /* Process the LEFT JOIN clauses */ + prev = NULL; + for (cell = list_head(root->left_join_clauses); cell; cell = next) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + next = lnext(cell); + if (reconsider_outer_join_clause(root, rinfo, true)) + { + found = true; + /* remove it from the list */ + root->left_join_clauses = + list_delete_cell(root->left_join_clauses, cell, prev); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->this_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + else + prev = cell; + } + + /* Process the RIGHT JOIN clauses */ + prev = NULL; + for (cell = list_head(root->right_join_clauses); cell; cell = next) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + next = lnext(cell); + if (reconsider_outer_join_clause(root, rinfo, false)) + { + found = true; + /* remove it from the list */ + root->right_join_clauses = + list_delete_cell(root->right_join_clauses, cell, prev); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->this_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + else + prev = cell; + } + + /* Process the FULL JOIN clauses */ + prev = NULL; + for (cell = list_head(root->full_join_clauses); cell; cell = next) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + next = lnext(cell); + if (reconsider_full_join_clause(root, rinfo)) + { + found = true; + /* remove it from the list */ + root->full_join_clauses = + list_delete_cell(root->full_join_clauses, cell, prev); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->this_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + else + prev = cell; + } + } while (found); - /* Process the LEFT JOIN clauses */ - foreach(lc, root->left_join_clauses) + /* Now, any remaining clauses have to be thrown back */ + foreach(cell, root->left_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - reconsider_outer_join_clause(root, rinfo, true); + distribute_restrictinfo_to_rels(root, rinfo); } - /* And the RIGHT JOIN clauses */ - foreach(lc, root->right_join_clauses) + foreach(cell, root->right_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - reconsider_outer_join_clause(root, rinfo, false); + distribute_restrictinfo_to_rels(root, rinfo); } - /* And the FULL JOIN clauses */ - foreach(lc, root->full_join_clauses) + foreach(cell, root->full_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - reconsider_full_join_clause(root, rinfo); + distribute_restrictinfo_to_rels(root, rinfo); } } /* * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause + * + * Returns TRUE if we were able to propagate a constant through the clause. */ -static void +static bool reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, bool outer_on_left) { Expr *outervar, *innervar; - Oid left_type, + Oid opno, + left_type, right_type, inner_datatype; Relids inner_relids; ListCell *lc1; - /* Extract needed info from the clause */ Assert(is_opclause(rinfo->clause)); - op_input_types(((OpExpr *) rinfo->clause)->opno, - &left_type, &right_type); + opno = ((OpExpr *) rinfo->clause)->opno; + + /* If clause is outerjoin_delayed, operator must be strict */ + if (rinfo->outerjoin_delayed && !op_strict(opno)) + return false; + + /* Extract needed info from the clause */ + op_input_types(opno, &left_type, &right_type); if (outer_on_left) { outervar = (Expr *) get_leftop(rinfo->clause); @@ -1266,41 +1371,46 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, } /* - * If we were able to equate INNERVAR to any constant, we're done, and - * we can throw away the outer-join clause as redundant. Otherwise, - * fall out of the search loop, since we know the OUTERVAR appears in - * at most one EC. + * If we were able to equate INNERVAR to any constant, report success. + * Otherwise, fall out of the search loop, since we know the OUTERVAR + * appears in at most one EC. */ if (match) - return; + return true; else break; } - /* We did not find a match, so throw it back into regular processing */ - distribute_restrictinfo_to_rels(root, rinfo); + return false; /* failed to make any deduction */ } /* * reconsider_outer_join_clauses for a single FULL JOIN clause + * + * Returns TRUE if we were able to propagate a constant through the clause. */ -static void +static bool reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) { Expr *leftvar; Expr *rightvar; - Oid left_type, + Oid opno, + left_type, right_type; Relids left_relids, right_relids; ListCell *lc1; + /* Can't use an outerjoin_delayed clause here */ + if (rinfo->outerjoin_delayed) + return false; + /* Extract needed info from the clause */ Assert(is_opclause(rinfo->clause)); + opno = ((OpExpr *) rinfo->clause)->opno; + op_input_types(opno, &left_type, &right_type); leftvar = (Expr *) get_leftop(rinfo->clause); rightvar = (Expr *) get_rightop(rinfo->clause); - op_input_types(((OpExpr *) rinfo->clause)->opno, - &left_type, &right_type); left_relids = rinfo->left_relids; right_relids = rinfo->right_relids; @@ -1412,7 +1522,7 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) if (matchleft && matchright) { cur_ec->ec_members = list_delete_ptr(cur_ec->ec_members, coal_em); - return; + return true; } /* @@ -1423,8 +1533,7 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) break; } - /* We did not find a match, so throw it back into regular processing */ - distribute_restrictinfo_to_rels(root, rinfo); + return false; /* failed to make any deduction */ } @@ -1651,16 +1760,22 @@ have_relevant_eclass_joinclause(PlannerInfo *root, ListCell *lc2; /* - * Won't generate joinclauses if const or single-member (the latter - * test covers the volatile case too) + * Won't generate joinclauses if single-member (this test covers the + * volatile case too) */ - if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + if (list_length(ec->ec_members) <= 1) continue; /* * Note we don't test ec_broken; if we did, we'd need a separate code * path to look through ec_sources. Checking the members anyway is OK * as a possibly-overoptimistic heuristic. + * + * We don't test ec_has_const either, even though a const eclass + * won't generate real join clauses. This is because if we had + * "WHERE a.x = b.y and a.x = 42", it is worth considering a join + * between a and b, since the join result is likely to be small even + * though it'll end up being an unqualified nestloop. */ /* Needn't scan if it couldn't contain members from each rel */ @@ -1674,8 +1789,8 @@ have_relevant_eclass_joinclause(PlannerInfo *root, { EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); - if (cur_em->em_is_child) - continue; /* ignore children here */ + if (cur_em->em_is_const || cur_em->em_is_child) + continue; /* ignore consts and children here */ if (bms_is_subset(cur_em->em_relids, rel1->relids)) { has_rel1 = true; @@ -1719,16 +1834,22 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1) ListCell *lc2; /* - * Won't generate joinclauses if const or single-member (the latter - * test covers the volatile case too) + * Won't generate joinclauses if single-member (this test covers the + * volatile case too) */ - if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + if (list_length(ec->ec_members) <= 1) continue; /* * Note we don't test ec_broken; if we did, we'd need a separate code * path to look through ec_sources. Checking the members anyway is OK * as a possibly-overoptimistic heuristic. + * + * We don't test ec_has_const either, even though a const eclass + * won't generate real join clauses. This is because if we had + * "WHERE a.x = b.y and a.x = 42", it is worth considering a join + * between a and b, since the join result is likely to be small even + * though it'll end up being an unqualified nestloop. */ /* Needn't scan if it couldn't contain members from each rel */ @@ -1742,8 +1863,8 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1) { EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); - if (cur_em->em_is_child) - continue; /* ignore children here */ + if (cur_em->em_is_const || cur_em->em_is_child) + continue; /* ignore consts and children here */ if (bms_is_subset(cur_em->em_relids, rel1->relids)) { has_rel1 = true; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 5d595ce2151c7ece9bf65e6804042a2bf5401fa8..d5e2506d7a0866890597f074a2cef423b379d6cf 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.114 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.115 2008/01/09 20:42:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,7 +34,8 @@ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, List *restrictlist, JoinType jointype); static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, JoinType jointype); -static List *select_mergejoin_clauses(RelOptInfo *joinrel, +static List *select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, @@ -69,7 +70,8 @@ add_paths_to_joinrel(PlannerInfo *root, * disable if it's a full join. */ if (enable_mergejoin || jointype == JOIN_FULL) - mergeclause_list = select_mergejoin_clauses(joinrel, + mergeclause_list = select_mergejoin_clauses(root, + joinrel, outerrel, innerrel, restrictlist, @@ -883,7 +885,8 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, * currently of interest. */ static List * -select_mergejoin_clauses(RelOptInfo *joinrel, +select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, @@ -937,6 +940,35 @@ select_mergejoin_clauses(RelOptInfo *joinrel, continue; /* no good for these input relations */ } + /* + * Insist that each side have a non-redundant eclass. This + * restriction is needed because various bits of the planner expect + * that each clause in a merge be associatable with some pathkey in a + * canonical pathkey list, but redundant eclasses can't appear in + * canonical sort orderings. (XXX it might be worth relaxing this, + * but not enough time to address it for 8.3.) + * + * Note: it would be bad if this condition failed for an otherwise + * mergejoinable FULL JOIN clause, since that would result in + * undesirable planner failure. I believe that is not possible + * however; a variable involved in a full join could only appear + * in below_outer_join eclasses, which aren't considered redundant. + * + * This case *can* happen for left/right join clauses: the + * outer-side variable could be equated to a constant. Because we + * will propagate that constant across the join clause, the loss of + * ability to do a mergejoin is not really all that big a deal, and + * so it's not clear that improving this is important. + */ + cache_mergeclause_eclasses(root, restrictinfo); + + if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || + EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) + { + have_nonmergeable_joinclause = true; + continue; /* can't handle redundant eclasses */ + } + result_list = lappend(result_list, restrictinfo); } diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index ba332b5eadf827bd9e7d169aff896574b7d6e96b..1556bf74df659ad560305ab513730302f9dc606d 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.83 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.84 2008/01/09 20:42:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -90,16 +90,18 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) /* * Find potentially interesting OR joinclauses. Note we must ignore any - * joinclauses that are marked outerjoin_delayed, because they cannot be - * pushed down to the per-relation level due to outer-join rules. (XXX in - * some cases it might be possible to allow this, but it would require - * substantially more bookkeeping about where the clause came from.) + * joinclauses that are marked outerjoin_delayed or !is_pushed_down, + * because they cannot be pushed down to the per-relation level due to + * outer-join rules. (XXX in some cases it might be possible to allow + * this, but it would require substantially more bookkeeping about where + * the clause came from.) */ foreach(i, rel->joininfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); if (restriction_is_or_clause(rinfo) && + rinfo->is_pushed_down && !rinfo->outerjoin_delayed) { /* diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 1ef44ddfa414eea43cc82c325e765117e2bf5cf6..f0141b236681ba27fd90cc1b3849e0319a1cb919 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.92 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,13 +30,6 @@ #include "utils/lsyscache.h" -/* - * If an EC contains a const and isn't below-outer-join, any PathKey depending - * on it must be redundant, since there's only one possible value of the key. - */ -#define MUST_BE_REDUNDANT(eclass) \ - ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join) - static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first); static PathKey *make_canonical_pathkey(PlannerInfo *root, @@ -164,7 +157,7 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) Assert(!new_ec->ec_merged); /* Check for EC containing a constant --- unconditionally redundant */ - if (MUST_BE_REDUNDANT(new_ec)) + if (EC_MUST_BE_REDUNDANT(new_ec)) return true; /* If same EC already used in list, then redundant */ @@ -211,7 +204,7 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) * pathkey_is_redundant would notice that, but we needn't even bother * constructing the node... */ - if (MUST_BE_REDUNDANT(eclass)) + if (EC_MUST_BE_REDUNDANT(eclass)) continue; /* OK, build a canonicalized PathKey struct */ diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 42a202b95038501d440ccef3c6b35a6aeba12336..0255b646a3bdaa429b89a6f66639a786e9bcf591 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.137 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.138 2008/01/09 20:42:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -778,13 +778,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, if (ojscope) { /* clause is attached to outer join, eval it there */ - relids = ojscope; + relids = bms_copy(ojscope); /* mustn't use as gating qual, so don't mark pseudoconstant */ } else { /* eval at original syntactic level */ - relids = qualscope; + relids = bms_copy(qualscope); if (!contain_volatile_functions(clause)) { /* mark as gating qual */ @@ -849,12 +849,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * We can't use such a clause to deduce equivalence (the left and * right sides might be unequal above the join because one of them has * gone to NULL) ... but we might be able to use it for more limited - * deductions, if there are no lower outer joins that delay its - * application. If so, consider adding it to the lists of set-aside - * clauses. + * deductions, if it is mergejoinable. So consider adding it to the + * lists of set-aside outer-join clauses. */ + is_pushed_down = false; maybe_equivalence = false; - maybe_outer_join = !check_outerjoin_delay(root, &relids, false); + maybe_outer_join = true; + + /* Check to see if must be delayed by lower outer join */ + outerjoin_delayed = check_outerjoin_delay(root, &relids, false); /* * Now force the qual to be evaluated exactly at the level of joining @@ -868,8 +871,6 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, */ Assert(ojscope); relids = ojscope; - is_pushed_down = false; - outerjoin_delayed = true; Assert(!pseudoconstant); } else @@ -880,7 +881,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, */ is_pushed_down = true; - /* Check to see if must be delayed by outer join */ + /* Check to see if must be delayed by lower outer join */ outerjoin_delayed = check_outerjoin_delay(root, &relids, true); if (outerjoin_delayed) @@ -1052,8 +1053,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * mentioning only C cannot be applied below the join to A. * * For a non-pushed-down qual, this isn't going to determine where we place the - * qual, but we need to determine outerjoin_delayed anyway so we can decide - * whether the qual is potentially useful for equivalence deductions. + * qual, but we need to determine outerjoin_delayed anyway for possible use + * in reconsider_outer_join_clauses(). * * Lastly, a pushed-down qual that references the nullable side of any current * oj_info_list member and has to be evaluated above that OJ (because its diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 204bcb7f24ce49aa2f9779cfc4553da34c30458f..f01f1e9e51f5d9132d3ccbd205f1f159862e6c6a 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.152 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.153 2008/01/09 20:42:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -479,6 +479,13 @@ typedef struct EquivalenceClass struct EquivalenceClass *ec_merged; /* set if merged into another EC */ } EquivalenceClass; +/* + * If an EC contains a const and isn't below-outer-join, any PathKey depending + * on it must be redundant, since there's only one possible value of the key. + */ +#define EC_MUST_BE_REDUNDANT(eclass) \ + ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join) + /* * EquivalenceMember - one member expression of an EquivalenceClass * @@ -856,17 +863,17 @@ typedef struct HashPath * * When dealing with outer joins we have to be very careful about pushing qual * clauses up and down the tree. An outer join's own JOIN/ON conditions must - * be evaluated exactly at that join node, and any quals appearing in WHERE or - * in a JOIN above the outer join cannot be pushed down below the outer join. - * Otherwise the outer join will produce wrong results because it will see the - * wrong sets of input rows. All quals are stored as RestrictInfo nodes - * during planning, but there's a flag to indicate whether a qual has been + * be evaluated exactly at that join node, unless they are "degenerate" + * conditions that reference only Vars from the nullable side of the join. + * Quals appearing in WHERE or in a JOIN above the outer join cannot be pushed + * down below the outer join, if they reference any nullable Vars. + * RestrictInfo nodes contain a flag to indicate whether a qual has been * pushed down to a lower level than its original syntactic placement in the * join tree would suggest. If an outer join prevents us from pushing a qual * down to its "natural" semantic level (the level associated with just the * base rels used in the qual) then we mark the qual with a "required_relids" * value including more than just the base rels it actually uses. By - * pretending that the qual references all the rels appearing in the outer + * pretending that the qual references all the rels required to form the outer * join, we prevent it from being evaluated below the outer join's joinrel. * When we do form the outer join's joinrel, we still need to distinguish * those quals that are actually in that join's JOIN/ON condition from those @@ -878,11 +885,13 @@ typedef struct HashPath * It's possible for an OUTER JOIN clause to be marked is_pushed_down too, * if we decide that it can be pushed down into the nullable side of the join. * In that case it acts as a plain filter qual for wherever it gets evaluated. + * (In short, is_pushed_down is only false for non-degenerate outer join + * conditions. Possibly we should rename it to reflect that meaning?) * - * When application of a qual must be delayed by outer join, we also mark it - * with outerjoin_delayed = true. This isn't redundant with required_relids - * because that might equal clause_relids whether or not it's an outer-join - * clause. + * RestrictInfo nodes also contain an outerjoin_delayed flag, which is true + * if the clause's applicability must be delayed due to any outer joins + * appearing below its own syntactic level (ie, it references any Vars from + * the nullable side of any lower outer join). * * In general, the referenced clause might be arbitrarily complex. The * kinds of clauses we can handle as indexscan quals, mergejoin clauses, @@ -932,7 +941,7 @@ typedef struct RestrictInfo bool is_pushed_down; /* TRUE if clause was pushed down in level */ - bool outerjoin_delayed; /* TRUE if delayed by outer join */ + bool outerjoin_delayed; /* TRUE if delayed by lower outer join */ bool can_join; /* see comment above */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 3dd4f5ed7d01218181c764e5cd1c67969f0de514..bd33cb5c12646b7df4d4e54a9f5b045c4f0a8879 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2290,3 +2290,34 @@ from yy 301 | | | | 1 (3 rows) +-- +-- regression test for improper pushing of constants across outer-join clauses +-- (as seen in early 8.2.x releases) +-- +create temp table zt1 (f1 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt1_pkey" for table "zt1" +create temp table zt2 (f2 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt2_pkey" for table "zt2" +create temp table zt3 (f3 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt3_pkey" for table "zt3" +insert into zt1 values(53); +insert into zt2 values(53); +select * from + zt2 left join zt3 on (f2 = f3) + left join zt1 on (f3 = f1) +where f2 = 53; + f2 | f3 | f1 +----+----+---- + 53 | | +(1 row) + +create temp view zv1 as select *,'dummy'::text AS junk from zt1; +select * from + zt2 left join zt3 on (f2 = f3) + left join zv1 on (f3 = f1) +where f2 = 53; + f2 | f3 | f1 | junk +----+----+----+------ + 53 | | | +(1 row) + diff --git a/src/test/regress/expected/join_1.out b/src/test/regress/expected/join_1.out index 9e10c73acb7b96b1b9353d60b185fa63e3142464..4690b711402199bb7c5bf395010a560e10854ebe 100644 --- a/src/test/regress/expected/join_1.out +++ b/src/test/regress/expected/join_1.out @@ -2290,3 +2290,34 @@ from yy 301 | | | | 1 (3 rows) +-- +-- regression test for improper pushing of constants across outer-join clauses +-- (as seen in early 8.2.x releases) +-- +create temp table zt1 (f1 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt1_pkey" for table "zt1" +create temp table zt2 (f2 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt2_pkey" for table "zt2" +create temp table zt3 (f3 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt3_pkey" for table "zt3" +insert into zt1 values(53); +insert into zt2 values(53); +select * from + zt2 left join zt3 on (f2 = f3) + left join zt1 on (f3 = f1) +where f2 = 53; + f2 | f3 | f1 +----+----+---- + 53 | | +(1 row) + +create temp view zv1 as select *,'dummy'::text AS junk from zt1; +select * from + zt2 left join zt3 on (f2 = f3) + left join zv1 on (f3 = f1) +where f2 = 53; + f2 | f3 | f1 | junk +----+----+----+------ + 53 | | | +(1 row) + diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 4efb3c7283626153d9555667eb83a0e93fc8614c..96e15b526c2370a4ac46cb5adc090db59cdc6489 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -462,3 +462,26 @@ from yy left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy left join xx xxa on yya.pkxx = xxa.pkxx left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx; + +-- +-- regression test for improper pushing of constants across outer-join clauses +-- (as seen in early 8.2.x releases) +-- + +create temp table zt1 (f1 int primary key); +create temp table zt2 (f2 int primary key); +create temp table zt3 (f3 int primary key); +insert into zt1 values(53); +insert into zt2 values(53); + +select * from + zt2 left join zt3 on (f2 = f3) + left join zt1 on (f3 = f1) +where f2 = 53; + +create temp view zv1 as select *,'dummy'::text AS junk from zt1; + +select * from + zt2 left join zt3 on (f2 = f3) + left join zv1 on (f3 = f1) +where f2 = 53;