diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 0f04033166588adc157d18f5d50ce1c1f07ddb30..53d8fdd6e5bb28c34b3867580c6f037b818c548c 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -254,54 +254,6 @@ allow_star_schema_join(PlannerInfo *root, bms_nonempty_difference(innerparams, outerrelids)); } -/* - * There's a pitfall for creating parameterized nestloops: suppose the inner - * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's - * minimum eval_at set includes the outer rel (B) and some third rel (C). - * We might think we could create a B/A nestloop join that's parameterized by - * C. But we would end up with a plan in which the PHV's expression has to be - * evaluated as a nestloop parameter at the B/A join; and the executor is only - * set up to handle simple Vars as NestLoopParams. Rather than add complexity - * and overhead to the executor for such corner cases, it seems better to - * forbid the join. (Note that existence of such a PHV probably means there - * is a join order constraint that will cause us to consider joining B and C - * directly; so we can still make use of A's parameterized path with B+C.) - * So we check whether any PHVs used in the query could pose such a hazard. - * We don't have any simple way of checking whether a risky PHV would actually - * be used in the inner plan, and the case is so unusual that it doesn't seem - * worth working very hard on it. - * - * This case can occur whether or not the join's remaining parameterization - * overlaps param_source_rels, so we have to check for it separately from - * allow_star_schema_join, even though it looks much like a star-schema case. - */ -static inline bool -check_hazardous_phv(PlannerInfo *root, - Path *outer_path, - Path *inner_path) -{ - Relids innerparams = PATH_REQ_OUTER(inner_path); - Relids outerrelids = outer_path->parent->relids; - ListCell *lc; - - foreach(lc, root->placeholder_list) - { - PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); - - if (!bms_is_subset(phinfo->ph_eval_at, innerparams)) - continue; /* ignore, could not be a nestloop param */ - if (!bms_overlap(phinfo->ph_eval_at, outerrelids)) - continue; /* ignore, not relevant to this join */ - if (bms_is_subset(phinfo->ph_eval_at, outerrelids)) - continue; /* safe, it can be eval'd within outerrel */ - /* Otherwise, it's potentially unsafe, so reject the join */ - return false; - } - - /* OK to perform the join */ - return true; -} - /* * try_nestloop_path * Consider a nestloop join path; if it appears useful, push it into @@ -322,31 +274,24 @@ try_nestloop_path(PlannerInfo *root, /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible --- unless allow_star_schema_join - * says to allow it anyway. Also, we must reject if check_hazardous_phv - * doesn't like the look of it. + * says to allow it anyway. Also, we must reject if have_dangerous_phv + * doesn't like the look of it, which could only happen if the nestloop is + * still parameterized. */ required_outer = calc_nestloop_required_outer(outer_path, inner_path); if (required_outer && ((!bms_overlap(required_outer, extra->param_source_rels) && !allow_star_schema_join(root, outer_path, inner_path)) || - !check_hazardous_phv(root, outer_path, inner_path))) + have_dangerous_phv(root, + outer_path->parent->relids, + PATH_REQ_OUTER(inner_path)))) { /* Waste no memory when we reject a path here */ bms_free(required_outer); return; } - /* - * Independently of that, add parameterization needed for any - * PlaceHolderVars that need to be computed at the join. We can handle - * that just by adding joinrel->lateral_relids; that might include some - * rels that are already in required_outer, but no harm done. (Note that - * lateral_relids is exactly NULL if empty, so this will not break the - * property that required_outer is too.) - */ - required_outer = bms_add_members(required_outer, joinrel->lateral_relids); - /* * Do a precheck to quickly eliminate obviously-inferior paths. We * calculate a cheap lower bound on the path's cost and then use @@ -418,12 +363,6 @@ try_mergejoin_path(PlannerInfo *root, return; } - /* - * Independently of that, add parameterization needed for any - * PlaceHolderVars that need to be computed at the join. - */ - required_outer = bms_add_members(required_outer, joinrel->lateral_relids); - /* * If the given paths are already well enough ordered, we can skip doing * an explicit sort. @@ -500,12 +439,6 @@ try_hashjoin_path(PlannerInfo *root, return; } - /* - * Independently of that, add parameterization needed for any - * PlaceHolderVars that need to be computed at the join. - */ - required_outer = bms_add_members(required_outer, joinrel->lateral_relids); - /* * See comments in try_nestloop_path(). Also note that hashjoin paths * never have any output pathkeys, per comments in create_hashjoin_path. diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 9f0212fad23b0cefc05ee7a8c8d5a038206defe4..47837e1eda8762635166a290aa38b975af6920f0 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -332,9 +332,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, bool reversed; bool unique_ified; bool must_be_leftjoin; - bool lateral_fwd; - bool lateral_rev; - Relids join_lateral_rels; ListCell *l; /* @@ -527,75 +524,126 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, /* * We also have to check for constraints imposed by LATERAL references. - * The proposed rels could each contain lateral references to the other, - * in which case the join is impossible. If there are lateral references - * in just one direction, then the join has to be done with a nestloop - * with the lateral referencer on the inside. If the join matches an SJ - * that cannot be implemented by such a nestloop, the join is impossible. */ - lateral_fwd = lateral_rev = false; - foreach(l, root->lateral_info_list) + if (root->hasLateralRTEs) { - LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); + bool lateral_fwd; + bool lateral_rev; + Relids join_lateral_rels; - if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) && - bms_overlap(ljinfo->lateral_lhs, rel1->relids)) + /* + * The proposed rels could each contain lateral references to the + * other, in which case the join is impossible. If there are lateral + * references in just one direction, then the join has to be done with + * a nestloop with the lateral referencer on the inside. If the join + * matches an SJ that cannot be implemented by such a nestloop, the + * join is impossible. + * + * Also, if the lateral reference is only indirect, we should reject + * the join; whatever rel(s) the reference chain goes through must be + * joined to first. + * + * Another case that might keep us from building a valid plan is the + * implementation restriction described by have_dangerous_phv(). + */ + lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids); + lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids); + if (lateral_fwd && lateral_rev) + return false; /* have lateral refs in both directions */ + if (lateral_fwd) { /* has to be implemented as nestloop with rel1 on left */ - if (lateral_rev) - return false; /* have lateral refs in both directions */ - lateral_fwd = true; - if (!bms_is_subset(ljinfo->lateral_lhs, rel1->relids)) - return false; /* rel1 can't compute the required parameter */ if (match_sjinfo && (reversed || unique_ified || match_sjinfo->jointype == JOIN_FULL)) return false; /* not implementable as nestloop */ + /* check there is a direct reference from rel2 to rel1 */ + foreach(l, root->lateral_info_list) + { + LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); + + if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) && + bms_is_subset(ljinfo->lateral_lhs, rel1->relids)) + break; + } + if (l == NULL) + return false; /* only indirect refs, so reject */ + /* check we won't have a dangerous PHV */ + if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids)) + return false; /* might be unable to handle required PHV */ } - if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) && - bms_overlap(ljinfo->lateral_lhs, rel2->relids)) + else if (lateral_rev) { /* has to be implemented as nestloop with rel2 on left */ - if (lateral_fwd) - return false; /* have lateral refs in both directions */ - lateral_rev = true; - if (!bms_is_subset(ljinfo->lateral_lhs, rel2->relids)) - return false; /* rel2 can't compute the required parameter */ if (match_sjinfo && (!reversed || unique_ified || match_sjinfo->jointype == JOIN_FULL)) return false; /* not implementable as nestloop */ + /* check there is a direct reference from rel1 to rel2 */ + foreach(l, root->lateral_info_list) + { + LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); + + if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) && + bms_is_subset(ljinfo->lateral_lhs, rel2->relids)) + break; + } + if (l == NULL) + return false; /* only indirect refs, so reject */ + /* check we won't have a dangerous PHV */ + if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids)) + return false; /* might be unable to handle required PHV */ } - } - /* - * LATERAL references could also cause problems later on if we accept this - * join: if the join's minimum parameterization includes any rels that - * would have to be on the inside of an outer join with this join rel, - * then it's never going to be possible to build the complete query using - * this join. We should reject this join not only because it'll save - * work, but because if we don't, the clauseless-join heuristics might - * think that legality of this join means that some other join rel need - * not be formed, and that could lead to failure to find any plan at all. - * It seems best not to merge this check into the main loop above, because - * it is concerned with SJs that are not otherwise relevant to this join. - */ - join_lateral_rels = min_join_parameterization(root, joinrelids); - if (join_lateral_rels) - { - foreach(l, root->join_info_list) + /* + * LATERAL references could also cause problems later on if we accept + * this join: if the join's minimum parameterization includes any rels + * that would have to be on the inside of an outer join with this join + * rel, then it's never going to be possible to build the complete + * query using this join. We should reject this join not only because + * it'll save work, but because if we don't, the clauseless-join + * heuristics might think that legality of this join means that some + * other join rel need not be formed, and that could lead to failure + * to find any plan at all. We have to consider not only rels that + * are directly on the inner side of an OJ with the joinrel, but also + * ones that are indirectly so, so search to find all such rels. + */ + join_lateral_rels = min_join_parameterization(root, joinrelids, + rel1, rel2); + if (join_lateral_rels) { - SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); - - if (bms_overlap(sjinfo->min_righthand, join_lateral_rels) && - bms_overlap(sjinfo->min_lefthand, joinrelids)) - return false; /* will not be able to join to min_righthand */ - if (sjinfo->jointype == JOIN_FULL && - bms_overlap(sjinfo->min_lefthand, join_lateral_rels) && - bms_overlap(sjinfo->min_righthand, joinrelids)) - return false; /* will not be able to join to min_lefthand */ + Relids join_plus_rhs = bms_copy(joinrelids); + bool more; + + do + { + more = false; + foreach(l, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); + + if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) && + !bms_is_subset(sjinfo->min_righthand, join_plus_rhs)) + { + join_plus_rhs = bms_add_members(join_plus_rhs, + sjinfo->min_righthand); + more = true; + } + /* full joins constrain both sides symmetrically */ + if (sjinfo->jointype == JOIN_FULL && + bms_overlap(sjinfo->min_righthand, join_plus_rhs) && + !bms_is_subset(sjinfo->min_lefthand, join_plus_rhs)) + { + join_plus_rhs = bms_add_members(join_plus_rhs, + sjinfo->min_lefthand); + more = true; + } + } + } while (more); + if (bms_overlap(join_plus_rhs, join_lateral_rels)) + return false; /* will not be able to join to some RHS rel */ } } @@ -852,8 +900,9 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) * could be merged with that function, but it seems clearer to separate the * two concerns. We need this test because there are degenerate cases where * a clauseless join must be performed to satisfy join-order restrictions. - * Also, if one rel has a lateral reference to the other, we should consider - * joining them even if the join would be clauseless. + * Also, if one rel has a lateral reference to the other, or both are needed + * to compute some PHV, we should consider joining them even if the join would + * be clauseless. * * Note: this is only a problem if one side of a degenerate outer join * contains multiple rels, or a clauseless join is required within an @@ -870,8 +919,8 @@ have_join_order_restriction(PlannerInfo *root, ListCell *l; /* - * If either side has a lateral reference to the other, attempt the join - * regardless of outer-join considerations. + * If either side has a direct lateral reference to the other, attempt the + * join regardless of outer-join considerations. */ foreach(l, root->lateral_info_list) { @@ -885,6 +934,22 @@ have_join_order_restriction(PlannerInfo *root, return true; } + /* + * Likewise, if both rels are needed to compute some PlaceHolderVar, + * attempt the join regardless of outer-join considerations. (This is not + * very desirable, because a PHV with a large eval_at set will cause a lot + * of probably-useless joins to be considered, but failing to do this can + * cause us to fail to construct a plan at all.) + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) && + bms_is_subset(rel2->relids, phinfo->ph_eval_at)) + return true; + } + /* * It's possible that the rels correspond to the left and right sides of a * degenerate outer join, that is, one with no joinclause mentioning the @@ -960,7 +1025,7 @@ have_join_order_restriction(PlannerInfo *root, * has_join_restriction * Detect whether the specified relation has join-order restrictions, * due to being inside an outer join or an IN (sub-SELECT), - * or participating in any LATERAL references. + * or participating in any LATERAL references or multi-rel PHVs. * * Essentially, this tests whether have_join_order_restriction() could * succeed with this rel and some other one. It's OK if we sometimes @@ -972,12 +1037,15 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel) { ListCell *l; - foreach(l, root->lateral_info_list) + if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL) + return true; + + foreach(l, root->placeholder_list) { - LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); - if (bms_is_subset(ljinfo->lateral_rhs, rel->relids) || - bms_overlap(ljinfo->lateral_lhs, rel->relids)) + if (bms_is_subset(rel->relids, phinfo->ph_eval_at) && + !bms_equal(rel->relids, phinfo->ph_eval_at)) return true; } @@ -1058,6 +1126,57 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel) } +/* + * There's a pitfall for creating parameterized nestloops: suppose the inner + * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's + * minimum eval_at set includes the outer rel (B) and some third rel (C). + * We might think we could create a B/A nestloop join that's parameterized by + * C. But we would end up with a plan in which the PHV's expression has to be + * evaluated as a nestloop parameter at the B/A join; and the executor is only + * set up to handle simple Vars as NestLoopParams. Rather than add complexity + * and overhead to the executor for such corner cases, it seems better to + * forbid the join. (Note that we can still make use of A's parameterized + * path with pre-joined B+C as the outer rel. have_join_order_restriction() + * ensures that we will consider making such a join even if there are not + * other reasons to do so.) + * + * So we check whether any PHVs used in the query could pose such a hazard. + * We don't have any simple way of checking whether a risky PHV would actually + * be used in the inner plan, and the case is so unusual that it doesn't seem + * worth working very hard on it. + * + * This needs to be checked in two places. If the inner rel's minimum + * parameterization would trigger the restriction, then join_is_legal() should + * reject the join altogether, because there will be no workable paths for it. + * But joinpath.c has to check again for every proposed nestloop path, because + * the inner path might have more than the minimum parameterization, causing + * some PHV to be dangerous for it that otherwise wouldn't be. + */ +bool +have_dangerous_phv(PlannerInfo *root, + Relids outer_relids, Relids inner_params) +{ + ListCell *lc; + + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + + if (!bms_is_subset(phinfo->ph_eval_at, inner_params)) + continue; /* ignore, could not be a nestloop param */ + if (!bms_overlap(phinfo->ph_eval_at, outer_relids)) + continue; /* ignore, not relevant to this join */ + if (bms_is_subset(phinfo->ph_eval_at, outer_relids)) + continue; /* safe, it can be eval'd within outerrel */ + /* Otherwise, it's potentially unsafe, so reject the join */ + return true; + } + + /* OK to perform the join */ + return false; +} + + /* * is_dummy_rel --- has relation been proven empty? */ diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 2f4e8181eb5a70bc4183ec9270c2215acf9f8d4e..dded896064d05667832cbf353527b4a51ab95703 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -449,9 +449,7 @@ create_lateral_join_info(PlannerInfo *root) Assert(false); } - /* We now know all the relids needed for lateral refs in this rel */ - if (bms_is_empty(lateral_relids)) - continue; /* ensure lateral_relids is NULL if empty */ + /* We now have all the direct lateral refs from this rel */ brel->lateral_relids = lateral_relids; } @@ -459,6 +457,14 @@ create_lateral_join_info(PlannerInfo *root) * Now check for lateral references within PlaceHolderVars, and make * LateralJoinInfos describing each such reference. Unlike references in * unflattened LATERAL RTEs, the referencing location could be a join. + * + * For a PHV that is due to be evaluated at a join, we mark each of the + * join's member baserels as having the PHV's lateral references too. Even + * though the baserels could be scanned without considering those lateral + * refs, we will never be able to form the join except as a path + * parameterized by the lateral refs, so there is no point in considering + * unparameterized paths for the baserels; and we mustn't try to join any + * of those baserels to the lateral refs too soon, either. */ foreach(lc, root->placeholder_list) { @@ -471,6 +477,7 @@ create_lateral_join_info(PlannerInfo *root) PVC_RECURSE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS); ListCell *lc2; + int ev_at; foreach(lc2, vars) { @@ -500,6 +507,15 @@ create_lateral_join_info(PlannerInfo *root) } list_free(vars); + + ev_at = -1; + while ((ev_at = bms_next_member(eval_at, ev_at)) >= 0) + { + RelOptInfo *brel = find_base_rel(root, ev_at); + + brel->lateral_relids = bms_add_members(brel->lateral_relids, + phinfo->ph_lateral); + } } } @@ -508,46 +524,103 @@ create_lateral_join_info(PlannerInfo *root) return; /* - * Now that we've identified all lateral references, make a second pass in - * which we mark each baserel with the set of relids of rels that - * reference it laterally (essentially, the inverse mapping of - * lateral_relids). We'll need this for join_clause_is_movable_to(). + * At this point the lateral_relids sets represent only direct lateral + * references. Replace them by their transitive closure, so that they + * describe both direct and indirect lateral references. If relation X + * references Y laterally, and Y references Z laterally, then we will have + * to scan X on the inside of a nestloop with Z, so for all intents and + * purposes X is laterally dependent on Z too. * - * Also, propagate lateral_relids and lateral_referencers from appendrel - * parent rels to their child rels. We intentionally give each child rel - * the same minimum parameterization, even though it's quite possible that - * some don't reference all the lateral rels. This is because any append - * path for the parent will have to have the same parameterization for - * every child anyway, and there's no value in forcing extra - * reparameterize_path() calls. Similarly, a lateral reference to the - * parent prevents use of otherwise-movable join rels for each child. + * This code is essentially Warshall's algorithm for transitive closure. + * The outer loop considers each baserel, and propagates its lateral + * dependencies to those baserels that have a lateral dependency on it. */ for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *brel = root->simple_rel_array[rti]; - Relids lateral_referencers; + Relids outer_lateral_relids; + Index rti2; - if (brel == NULL) + if (brel == NULL || brel->reloptkind != RELOPT_BASEREL) continue; - if (brel->reloptkind != RELOPT_BASEREL) + + /* need not consider baserel further if it has no lateral refs */ + outer_lateral_relids = brel->lateral_relids; + if (outer_lateral_relids == NULL) continue; - /* Compute lateral_referencers using the finished lateral_info_list */ - lateral_referencers = NULL; - foreach(lc, root->lateral_info_list) + /* else scan all baserels */ + for (rti2 = 1; rti2 < root->simple_rel_array_size; rti2++) { - LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc); + RelOptInfo *brel2 = root->simple_rel_array[rti2]; + + if (brel2 == NULL || brel2->reloptkind != RELOPT_BASEREL) + continue; - if (bms_is_member(brel->relid, ljinfo->lateral_lhs)) - lateral_referencers = bms_add_members(lateral_referencers, - ljinfo->lateral_rhs); + /* if brel2 has lateral ref to brel, propagate brel's refs */ + if (bms_is_member(rti, brel2->lateral_relids)) + brel2->lateral_relids = bms_add_members(brel2->lateral_relids, + outer_lateral_relids); } - brel->lateral_referencers = lateral_referencers; + } + + /* + * Now that we've identified all lateral references, mark each baserel + * with the set of relids of rels that reference it laterally (possibly + * indirectly) --- that is, the inverse mapping of lateral_relids. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + Relids lateral_relids; + int rti2; + + if (brel == NULL || brel->reloptkind != RELOPT_BASEREL) + continue; + + /* Nothing to do at rels with no lateral refs */ + lateral_relids = brel->lateral_relids; + if (lateral_relids == NULL) + continue; /* - * If it's an appendrel parent, copy its lateral_relids and - * lateral_referencers to each child rel. + * We should not have broken the invariant that lateral_relids is + * exactly NULL if empty. */ + Assert(!bms_is_empty(lateral_relids)); + + /* Also, no rel should have a lateral dependency on itself */ + Assert(!bms_is_member(rti, lateral_relids)); + + /* Mark this rel's referencees */ + rti2 = -1; + while ((rti2 = bms_next_member(lateral_relids, rti2)) >= 0) + { + RelOptInfo *brel2 = root->simple_rel_array[rti2]; + + Assert(brel2 != NULL && brel2->reloptkind == RELOPT_BASEREL); + brel2->lateral_referencers = + bms_add_member(brel2->lateral_referencers, rti); + } + } + + /* + * Lastly, propagate lateral_relids and lateral_referencers from appendrel + * parent rels to their child rels. We intentionally give each child rel + * the same minimum parameterization, even though it's quite possible that + * some don't reference all the lateral rels. This is because any append + * path for the parent will have to have the same parameterization for + * every child anyway, and there's no value in forcing extra + * reparameterize_path() calls. Similarly, a lateral reference to the + * parent prevents use of otherwise-movable join rels for each child. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL || brel->reloptkind != RELOPT_BASEREL) + continue; + if (root->simple_rte_array[rti]->inh) { foreach(lc, root->append_rel_list) diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index b197f144117f3b0e479919f33c203266c55b89b7..a2be2ede85e1dbf6512137bd59156e9a47f33908 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -373,7 +373,8 @@ build_join_rel(PlannerInfo *root, joinrel->cheapest_total_path = NULL; joinrel->cheapest_unique_path = NULL; joinrel->cheapest_parameterized_paths = NIL; - joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids); + joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids, + outer_rel, inner_rel); joinrel->relid = 0; /* indicates not a baserel */ joinrel->rtekind = RTE_JOIN; joinrel->min_attr = 0; @@ -508,29 +509,27 @@ build_join_rel(PlannerInfo *root, * because join_is_legal() needs the value to check a prospective join. */ Relids -min_join_parameterization(PlannerInfo *root, Relids joinrelids) +min_join_parameterization(PlannerInfo *root, + Relids joinrelids, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) { Relids result; - ListCell *lc; - - /* Easy if there are no lateral references */ - if (root->lateral_info_list == NIL) - return NULL; /* - * Scan lateral_info_list to find all the lateral references occurring in - * or below this join. + * Basically we just need the union of the inputs' lateral_relids, less + * whatever is already in the join. + * + * It's not immediately obvious that this is a valid way to compute the + * result, because it might seem that we're ignoring possible lateral refs + * of PlaceHolderVars that are due to be computed at the join but not in + * either input. However, because create_lateral_join_info() already + * charged all such PHV refs to each member baserel of the join, they'll + * be accounted for already in the inputs' lateral_relids. Likewise, we + * do not need to worry about doing transitive closure here, because that + * was already accounted for in the original baserel lateral_relids. */ - result = NULL; - foreach(lc, root->lateral_info_list) - { - LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc); - - if (bms_is_subset(ljinfo->lateral_rhs, joinrelids)) - result = bms_add_members(result, ljinfo->lateral_lhs); - } - - /* Remove any rels that are already included in the join */ + result = bms_union(outer_rel->lateral_relids, inner_rel->lateral_relids); result = bms_del_members(result, joinrelids); /* Maintain invariant that result is exactly NULL if empty */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 32321238ae4c4f6b8412de17cd65ad5f95ef3dc8..f7796f8efbeaef07ccff4ef65793d52772da3508 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -358,6 +358,7 @@ typedef struct PlannerInfo * cheapest_parameterized_paths - best paths for their parameterizations; * always includes cheapest_total_path, even if that's unparameterized * lateral_relids - required outer rels for LATERAL, as a Relids set + * (includes both direct and indirect lateral references) * * If the relation is a base relation it will have these fields set: * @@ -373,6 +374,7 @@ typedef struct PlannerInfo * lateral_vars - lateral cross-references of rel, if any (list of * Vars and PlaceHolderVars) * lateral_referencers - relids of rels that reference this one laterally + * (includes both direct and indirect lateral references) * indexlist - list of IndexOptInfo nodes for relation's indexes * (always NIL if it's not a table) * pages - number of disk pages in relation (zero if not a table) diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index f41847f4093ba19bb11cb97fa71a2265a1810ed0..8fb9eda01b67e2b529124f41f9a50117f05a48a0 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -148,7 +148,10 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List **restrictlist_ptr); -extern Relids min_join_parameterization(PlannerInfo *root, Relids joinrelids); +extern Relids min_join_parameterization(PlannerInfo *root, + Relids joinrelids, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); extern RelOptInfo *build_empty_join_rel(PlannerInfo *root); extern AppendRelInfo *find_childrel_appendrelinfo(PlannerInfo *root, RelOptInfo *rel); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 87123a57295d271f9b69a7581190721f31e41ace..7757741c10467ad621f5be3d13ae086f4b8144e3 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -98,6 +98,8 @@ extern RelOptInfo *make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); extern bool have_join_order_restriction(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); +extern bool have_dangerous_phv(PlannerInfo *root, + Relids outer_relids, Relids inner_params); /* * equivclass.c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index dba5d2dbab383c2e4578fea12324982bd279ceb1..64f046ee0bb83235cc7708eee24893411b08a1f4 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3620,6 +3620,159 @@ where t1.f1 = ss.f1; doh! | 4567890123456789 | 123 | 4567890123456789 | doh! (1 row) +explain (verbose, costs off) +select * from + text_tbl t1 + left join int8_tbl i8 + on i8.q2 = 123, + lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1, + lateral (select ss1.* from text_tbl t3 limit 1) as ss2 +where t1.f1 = ss2.f1; + QUERY PLAN +------------------------------------------------------------------- + Nested Loop + Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1) + Join Filter: (t1.f1 = (t2.f1)) + -> Nested Loop + Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1 + -> Nested Loop Left Join + Output: t1.f1, i8.q1, i8.q2 + -> Seq Scan on public.text_tbl t1 + Output: t1.f1 + -> Materialize + Output: i8.q1, i8.q2 + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + Filter: (i8.q2 = 123) + -> Limit + Output: (i8.q1), t2.f1 + -> Seq Scan on public.text_tbl t2 + Output: i8.q1, t2.f1 + -> Limit + Output: ((i8.q1)), (t2.f1) + -> Seq Scan on public.text_tbl t3 + Output: (i8.q1), t2.f1 +(22 rows) + +select * from + text_tbl t1 + left join int8_tbl i8 + on i8.q2 = 123, + lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1, + lateral (select ss1.* from text_tbl t3 limit 1) as ss2 +where t1.f1 = ss2.f1; + f1 | q1 | q2 | q1 | f1 | q1 | f1 +------+------------------+-----+------------------+------+------------------+------ + doh! | 4567890123456789 | 123 | 4567890123456789 | doh! | 4567890123456789 | doh! +(1 row) + +explain (verbose, costs off) +select 1 from + text_tbl as tt1 + inner join text_tbl as tt2 on (tt1.f1 = 'foo') + left join text_tbl as tt3 on (tt3.f1 = 'foo') + left join text_tbl as tt4 on (tt3.f1 = tt4.f1), + lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1 +where tt1.f1 = ss1.c0; + QUERY PLAN +---------------------------------------------------------- + Nested Loop + Output: 1 + -> Nested Loop Left Join + Output: tt1.f1, tt4.f1 + -> Nested Loop + Output: tt1.f1 + -> Seq Scan on public.text_tbl tt1 + Output: tt1.f1 + Filter: (tt1.f1 = 'foo'::text) + -> Seq Scan on public.text_tbl tt2 + Output: tt2.f1 + -> Materialize + Output: tt4.f1 + -> Nested Loop Left Join + Output: tt4.f1 + Join Filter: (tt3.f1 = tt4.f1) + -> Seq Scan on public.text_tbl tt3 + Output: tt3.f1 + Filter: (tt3.f1 = 'foo'::text) + -> Seq Scan on public.text_tbl tt4 + Output: tt4.f1 + Filter: (tt4.f1 = 'foo'::text) + -> Subquery Scan on ss1 + Output: ss1.c0 + Filter: (ss1.c0 = 'foo'::text) + -> Limit + Output: (tt4.f1) + -> Seq Scan on public.text_tbl tt5 + Output: tt4.f1 +(29 rows) + +select 1 from + text_tbl as tt1 + inner join text_tbl as tt2 on (tt1.f1 = 'foo') + left join text_tbl as tt3 on (tt3.f1 = 'foo') + left join text_tbl as tt4 on (tt3.f1 = tt4.f1), + lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1 +where tt1.f1 = ss1.c0; + ?column? +---------- +(0 rows) + +-- +-- check a case in which a PlaceHolderVar forces join order +-- +explain (verbose, costs off) +select ss2.* from + int4_tbl i41 + left join int8_tbl i8 + join (select i42.f1 as c1, i43.f1 as c2, 42 as c3 + from int4_tbl i42, int4_tbl i43) ss1 + on i8.q1 = ss1.c2 + on i41.f1 = ss1.c1, + lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2 +where ss1.c2 = 0; + QUERY PLAN +------------------------------------------------------------------------ + Nested Loop + Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42)) + -> Hash Join + Output: i41.f1, i42.f1, i8.q1, i8.q2, i43.f1, 42 + Hash Cond: (i41.f1 = i42.f1) + -> Nested Loop + Output: i8.q1, i8.q2, i43.f1, i41.f1 + -> Nested Loop + Output: i8.q1, i8.q2, i43.f1 + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + Filter: (i8.q1 = 0) + -> Seq Scan on public.int4_tbl i43 + Output: i43.f1 + Filter: (i43.f1 = 0) + -> Seq Scan on public.int4_tbl i41 + Output: i41.f1 + -> Hash + Output: i42.f1 + -> Seq Scan on public.int4_tbl i42 + Output: i42.f1 + -> Limit + Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42)) + -> Seq Scan on public.text_tbl + Output: i41.f1, i8.q1, i8.q2, i42.f1, i43.f1, (42) +(25 rows) + +select ss2.* from + int4_tbl i41 + left join int8_tbl i8 + join (select i42.f1 as c1, i43.f1 as c2, 42 as c3 + from int4_tbl i42, int4_tbl i43) ss1 + on i8.q1 = ss1.c2 + on i41.f1 = ss1.c1, + lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2 +where ss1.c2 = 0; + f1 | q1 | q2 | c1 | c2 | c3 +----+----+----+----+----+---- +(0 rows) + -- -- test ability to push constants through outer join clauses -- @@ -4741,14 +4894,12 @@ select * from Output: a.q1, a.q2 -> Nested Loop Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1) - Join Filter: (a.q2 = b.q1) -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2 - -> Materialize - Output: c.q1 - -> Seq Scan on public.int8_tbl c - Output: c.q1 -(13 rows) + Filter: (a.q2 = b.q1) + -> Seq Scan on public.int8_tbl c + Output: c.q1, c.q2 +(11 rows) select * from int8_tbl a left join lateral diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index fdd4e78cf281da3a009fcc5b1daebceef2aabf5a..0358d00e4045055dae952686bfd19a1a204014f6 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1134,6 +1134,65 @@ select * from lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss where t1.f1 = ss.f1; +explain (verbose, costs off) +select * from + text_tbl t1 + left join int8_tbl i8 + on i8.q2 = 123, + lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1, + lateral (select ss1.* from text_tbl t3 limit 1) as ss2 +where t1.f1 = ss2.f1; + +select * from + text_tbl t1 + left join int8_tbl i8 + on i8.q2 = 123, + lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1, + lateral (select ss1.* from text_tbl t3 limit 1) as ss2 +where t1.f1 = ss2.f1; + +explain (verbose, costs off) +select 1 from + text_tbl as tt1 + inner join text_tbl as tt2 on (tt1.f1 = 'foo') + left join text_tbl as tt3 on (tt3.f1 = 'foo') + left join text_tbl as tt4 on (tt3.f1 = tt4.f1), + lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1 +where tt1.f1 = ss1.c0; + +select 1 from + text_tbl as tt1 + inner join text_tbl as tt2 on (tt1.f1 = 'foo') + left join text_tbl as tt3 on (tt3.f1 = 'foo') + left join text_tbl as tt4 on (tt3.f1 = tt4.f1), + lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1 +where tt1.f1 = ss1.c0; + +-- +-- check a case in which a PlaceHolderVar forces join order +-- + +explain (verbose, costs off) +select ss2.* from + int4_tbl i41 + left join int8_tbl i8 + join (select i42.f1 as c1, i43.f1 as c2, 42 as c3 + from int4_tbl i42, int4_tbl i43) ss1 + on i8.q1 = ss1.c2 + on i41.f1 = ss1.c1, + lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2 +where ss1.c2 = 0; + +select ss2.* from + int4_tbl i41 + left join int8_tbl i8 + join (select i42.f1 as c1, i43.f1 as c2, 42 as c3 + from int4_tbl i42, int4_tbl i43) ss1 + on i8.q1 = ss1.c2 + on i41.f1 = ss1.c1, + lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2 +where ss1.c2 = 0; + -- -- test ability to push constants through outer join clauses --