From edca44b1525b3d591263d032dc4fe500ea771e0e Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Mon, 7 Dec 2015 18:56:14 -0500 Subject: [PATCH] Simplify LATERAL-related calculations within add_paths_to_joinrel(). While convincing myself that commit 7e19db0c09719d79 would solve both of the problems recently reported by Andreas Seltenreich, I realized that add_paths_to_joinrel's handling of LATERAL restrictions could be made noticeably simpler and faster if we were to retain the minimum possible parameterization for each joinrel (that is, the set of relids supplying unsatisfied lateral references in it). We already retain that for baserels, in RelOptInfo.lateral_relids, so we can use that field for joinrels too. I re-pgindent'd the files touched here, which affects some unrelated comments. This is, I believe, just a minor optimization not a bug fix, so no back-patch. --- src/backend/nodes/outfuncs.c | 2 +- src/backend/optimizer/path/joinpath.c | 73 ++++++--------------------- src/backend/optimizer/util/relnode.c | 22 ++++---- src/include/nodes/relation.h | 18 +++---- 4 files changed, 36 insertions(+), 79 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 012c14bf29a..c709105d4fe 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1891,13 +1891,13 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_NODE_FIELD(cheapest_total_path); WRITE_NODE_FIELD(cheapest_unique_path); WRITE_NODE_FIELD(cheapest_parameterized_paths); + WRITE_BITMAPSET_FIELD(lateral_relids); WRITE_UINT_FIELD(relid); WRITE_OID_FIELD(reltablespace); WRITE_ENUM_FIELD(rtekind, RTEKind); WRITE_INT_FIELD(min_attr); WRITE_INT_FIELD(max_attr); WRITE_NODE_FIELD(lateral_vars); - WRITE_BITMAPSET_FIELD(lateral_relids); WRITE_BITMAPSET_FIELD(lateral_referencers); WRITE_NODE_FIELD(indexlist); WRITE_UINT_FIELD(pages); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index a35c881fd92..0f040331665 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -86,7 +86,6 @@ add_paths_to_joinrel(PlannerInfo *root, extra.mergeclause_list = NIL; extra.sjinfo = sjinfo; extra.param_source_rels = NULL; - extra.extra_lateral_rels = NULL; /* * Find potential mergejoin clauses. We can skip this if we are not @@ -151,60 +150,14 @@ add_paths_to_joinrel(PlannerInfo *root, } /* - * However, when a LATERAL subquery is involved, we have to be a bit - * laxer, because there will simply not be any paths for the joinrel that - * aren't parameterized by whatever the subquery is parameterized by, - * unless its parameterization is resolved within the joinrel. Hence, add - * to param_source_rels anything that is laterally referenced in either - * input and is not in the join already. + * However, when a LATERAL subquery is involved, there will simply not be + * any paths for the joinrel that aren't parameterized by whatever the + * subquery is parameterized by, unless its parameterization is resolved + * within the joinrel. So we might as well allow additional dependencies + * on whatever residual lateral dependencies the joinrel will have. */ - foreach(lc, root->lateral_info_list) - { - LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc); - - if (bms_is_subset(ljinfo->lateral_rhs, joinrel->relids)) - extra.param_source_rels = bms_join(extra.param_source_rels, - bms_difference(ljinfo->lateral_lhs, - joinrel->relids)); - } - - /* - * Another issue created by LATERAL references is that PlaceHolderVars - * that need to be computed at this join level might contain lateral - * references to rels not in the join, meaning that the paths for the join - * would need to be marked as parameterized by those rels, independently - * of all other considerations. Set extra_lateral_rels to the set of such - * rels. This will not affect our decisions as to which paths to - * generate; we merely add these rels to their required_outer sets. - */ - foreach(lc, root->placeholder_list) - { - PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); - - /* PHVs without lateral refs can be skipped over quickly */ - if (phinfo->ph_lateral == NULL) - continue; - /* Is it due to be evaluated at this join, and not in either input? */ - if (bms_is_subset(phinfo->ph_eval_at, joinrel->relids) && - !bms_is_subset(phinfo->ph_eval_at, outerrel->relids) && - !bms_is_subset(phinfo->ph_eval_at, innerrel->relids)) - { - /* Yes, remember its lateral rels */ - extra.extra_lateral_rels = bms_add_members(extra.extra_lateral_rels, - phinfo->ph_lateral); - } - } - - /* - * Make sure extra_lateral_rels doesn't list anything within the join, and - * that it's NULL if empty. (This allows us to use bms_add_members to add - * it to required_outer below, while preserving the property that - * required_outer is exactly NULL if empty.) - */ - extra.extra_lateral_rels = bms_del_members(extra.extra_lateral_rels, - joinrel->relids); - if (bms_is_empty(extra.extra_lateral_rels)) - extra.extra_lateral_rels = NULL; + extra.param_source_rels = bms_add_members(extra.param_source_rels, + joinrel->lateral_relids); /* * 1. Consider mergejoin paths where both relations must be explicitly @@ -386,9 +339,13 @@ try_nestloop_path(PlannerInfo *root, /* * Independently of that, add parameterization needed for any - * PlaceHolderVars that need to be computed at the join. + * PlaceHolderVars that need to be computed at the join. We can handle + * that just by adding joinrel->lateral_relids; that might include some + * rels that are already in required_outer, but no harm done. (Note that + * lateral_relids is exactly NULL if empty, so this will not break the + * property that required_outer is too.) */ - required_outer = bms_add_members(required_outer, extra->extra_lateral_rels); + required_outer = bms_add_members(required_outer, joinrel->lateral_relids); /* * Do a precheck to quickly eliminate obviously-inferior paths. We @@ -465,7 +422,7 @@ try_mergejoin_path(PlannerInfo *root, * Independently of that, add parameterization needed for any * PlaceHolderVars that need to be computed at the join. */ - required_outer = bms_add_members(required_outer, extra->extra_lateral_rels); + required_outer = bms_add_members(required_outer, joinrel->lateral_relids); /* * If the given paths are already well enough ordered, we can skip doing @@ -547,7 +504,7 @@ try_hashjoin_path(PlannerInfo *root, * Independently of that, add parameterization needed for any * PlaceHolderVars that need to be computed at the join. */ - required_outer = bms_add_members(required_outer, extra->extra_lateral_rels); + required_outer = bms_add_members(required_outer, joinrel->lateral_relids); /* * See comments in try_nestloop_path(). Also note that hashjoin paths diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 8cc7bd771b3..b197f144117 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -103,7 +103,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) /* cheap startup cost is interesting iff not all tuples to be retrieved */ rel->consider_startup = (root->tuple_fraction > 0); rel->consider_param_startup = false; /* might get changed later */ - rel->consider_parallel = false; /* might get changed later */ + rel->consider_parallel = false; /* might get changed later */ rel->reltargetlist = NIL; rel->pathlist = NIL; rel->ppilist = NIL; @@ -111,11 +111,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->cheapest_total_path = NULL; rel->cheapest_unique_path = NULL; rel->cheapest_parameterized_paths = NIL; + rel->lateral_relids = NULL; rel->relid = relid; rel->rtekind = rte->rtekind; /* min_attr, max_attr, attr_needed, attr_widths are set below */ rel->lateral_vars = NIL; - rel->lateral_relids = NULL; rel->lateral_referencers = NULL; rel->indexlist = NIL; rel->pages = 0; @@ -373,6 +373,7 @@ build_join_rel(PlannerInfo *root, joinrel->cheapest_total_path = NULL; joinrel->cheapest_unique_path = NULL; joinrel->cheapest_parameterized_paths = NIL; + joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids); joinrel->relid = 0; /* indicates not a baserel */ joinrel->rtekind = RTE_JOIN; joinrel->min_attr = 0; @@ -380,7 +381,6 @@ build_join_rel(PlannerInfo *root, joinrel->attr_needed = NULL; joinrel->attr_widths = NULL; joinrel->lateral_vars = NIL; - joinrel->lateral_relids = NULL; joinrel->lateral_referencers = NULL; joinrel->indexlist = NIL; joinrel->pages = 0; @@ -448,15 +448,15 @@ build_join_rel(PlannerInfo *root, * Set the consider_parallel flag if this joinrel could potentially be * scanned within a parallel worker. If this flag is false for either * inner_rel or outer_rel, then it must be false for the joinrel also. - * Even if both are true, there might be parallel-restricted quals at - * our level. + * Even if both are true, there might be parallel-restricted quals at our + * level. * - * Note that if there are more than two rels in this relation, they - * could be divided between inner_rel and outer_rel in any arbitary - * way. We assume this doesn't matter, because we should hit all the - * same baserels and joinclauses while building up to this joinrel no - * matter which we take; therefore, we should make the same decision - * here however we get here. + * Note that if there are more than two rels in this relation, they could + * be divided between inner_rel and outer_rel in any arbitary way. We + * assume this doesn't matter, because we should hit all the same baserels + * and joinclauses while building up to this joinrel no matter which we + * take; therefore, we should make the same decision here however we get + * here. */ if (inner_rel->consider_parallel && outer_rel->consider_parallel && !has_parallel_hazard((Node *) restrictlist, false)) diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 9a0dd28195f..6de07a1fbd0 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -99,15 +99,15 @@ typedef struct PlannerGlobal Index lastRowMarkId; /* highest PlanRowMark ID assigned */ - int lastPlanNodeId; /* highest plan node ID assigned */ + int lastPlanNodeId; /* highest plan node ID assigned */ bool transientPlan; /* redo plan when TransactionXmin changes? */ bool hasRowSecurity; /* row security applied? */ - bool parallelModeOK; /* parallel mode potentially OK? */ + bool parallelModeOK; /* parallel mode potentially OK? */ - bool parallelModeNeeded; /* parallel mode actually required? */ + bool parallelModeNeeded; /* parallel mode actually required? */ } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ @@ -357,6 +357,7 @@ typedef struct PlannerInfo * (no duplicates) output from relation; NULL if not yet requested * cheapest_parameterized_paths - best paths for their parameterizations; * always includes cheapest_total_path, even if that's unparameterized + * lateral_relids - required outer rels for LATERAL, as a Relids set * * If the relation is a base relation it will have these fields set: * @@ -371,8 +372,6 @@ typedef struct PlannerInfo * zero means not computed yet * lateral_vars - lateral cross-references of rel, if any (list of * Vars and PlaceHolderVars) - * lateral_relids - required outer rels for LATERAL, as a Relids set - * (for child rels this can be more than lateral_vars) * lateral_referencers - relids of rels that reference this one laterally * indexlist - list of IndexOptInfo nodes for relation's indexes * (always NIL if it's not a table) @@ -388,7 +387,7 @@ typedef struct PlannerInfo * set_subquery_pathlist processes the object. * * For otherrels that are appendrel members, these fields are filled - * in just as for a baserel. + * in just as for a baserel, except we don't bother with lateral_vars. * * If the relation is either a foreign table or a join of foreign tables that * all belong to the same foreign server, these fields will be set: @@ -463,6 +462,10 @@ typedef struct RelOptInfo struct Path *cheapest_unique_path; List *cheapest_parameterized_paths; + /* parameterization information needed for both base rels and join rels */ + /* (see also lateral_vars and lateral_referencers) */ + Relids lateral_relids; /* minimum parameterization of rel */ + /* information about a base rel (not set for join rels!) */ Index relid; Oid reltablespace; /* containing tablespace */ @@ -472,7 +475,6 @@ typedef struct RelOptInfo Relids *attr_needed; /* array indexed [min_attr .. max_attr] */ int32 *attr_widths; /* array indexed [min_attr .. max_attr] */ List *lateral_vars; /* LATERAL Vars and PHVs referenced by rel */ - Relids lateral_relids; /* minimum parameterization of rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ BlockNumber pages; /* size estimates derived from pg_class */ @@ -1717,7 +1719,6 @@ typedef struct SemiAntiJoinFactors * sjinfo is extra info about special joins for selectivity estimation * semifactors is as shown above (only valid for SEMI or ANTI joins) * param_source_rels are OK targets for parameterization of result paths - * extra_lateral_rels are additional parameterization for result paths */ typedef struct JoinPathExtraData { @@ -1726,7 +1727,6 @@ typedef struct JoinPathExtraData SpecialJoinInfo *sjinfo; SemiAntiJoinFactors semifactors; Relids param_source_rels; - Relids extra_lateral_rels; } JoinPathExtraData; /* -- GitLab