diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index b6754478dcf3e8fd6ae69d4efe6cb95f07864ff5..b0a28f515f19e3e8f615935a4c2e1f1b05fec630 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1809,6 +1809,7 @@ _outProjectionPath(StringInfo str, const ProjectionPath *node) _outPathInfo(str, (const Path *) node); WRITE_NODE_FIELD(subpath); + WRITE_BOOL_FIELD(dummypp); } static void diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ab8df76a6edd09282c934ea86663d7d495e6a2dd..b2db6e8d0355c8f9e0ebe8c351fab985c1623d6b 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1409,8 +1409,9 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) /* * create_projection_plan * - * Create a Result node to do a projection step and (recursively) plans - * for its subpaths. + * Create a plan tree to do a projection step and (recursively) plans + * for its subpaths. We may need a Result node for the projection, + * but sometimes we can just let the subplan do the work. */ static Plan * create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) @@ -1425,31 +1426,37 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) tlist = build_path_tlist(root, &best_path->path); /* - * We might not really need a Result node here. There are several ways - * that this can happen. For example, MergeAppend doesn't project, so we - * would have thought that we needed a projection to attach resjunk sort - * columns to its output ... but create_merge_append_plan might have added - * those same resjunk sort columns to both MergeAppend and its children. - * Alternatively, apply_projection_to_path might have created a projection - * path as the subpath of a Gather node even though the subpath was - * projection-capable. So, if the subpath is capable of projection or the - * desired tlist is the same expression-wise as the subplan's, just jam it - * in there. We'll have charged for a Result that doesn't actually appear - * in the plan, but that's better than having a Result we don't need. + * We might not really need a Result node here, either because the subplan + * can project or because it's returning the right list of expressions + * anyway. Usually create_projection_path will have detected that and set + * dummypp if we don't need a Result; but its decision can't be final, + * because some createplan.c routines change the tlists of their nodes. + * (An example is that create_merge_append_plan might add resjunk sort + * columns to a MergeAppend.) So we have to recheck here. If we do + * arrive at a different answer than create_projection_path did, we'll + * have made slightly wrong cost estimates; but label the plan with the + * cost estimates we actually used, not "corrected" ones. (XXX this could + * be cleaned up if we moved more of the sortcolumn setup logic into Path + * creation, but that would add expense to creating Paths we might end up + * not using.) */ if (is_projection_capable_path(best_path->subpath) || tlist_same_exprs(tlist, subplan->targetlist)) { + /* Don't need a separate Result, just assign tlist to subplan */ plan = subplan; plan->targetlist = tlist; - /* Adjust cost to match what we thought during planning */ + /* Label plan with the estimated costs we actually used */ plan->startup_cost = best_path->path.startup_cost; plan->total_cost = best_path->path.total_cost; + plan->plan_rows = best_path->path.rows; + plan->plan_width = best_path->path.pathtarget->width; /* ... but be careful not to munge subplan's parallel-aware flag */ } else { + /* We need a Result node */ plan = (Plan *) make_result(tlist, NULL, subplan); copy_generic_path_info(plan, (Path *) best_path); diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 0434a5a8e73e16e1a79cef46fa3758f5000004e2..cefec7bdf1083cbc3f0ee3b47db63893ff41f755 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -465,8 +465,7 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, * cheapest path.) */ sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path, - create_pathtarget(subroot, tlist), - false); + create_pathtarget(subroot, tlist)); /* * Determine cost to get just the first row of the presorted path. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 094c7082b141faf6956337a21b9561e6f7ac0030..2372311d40996a68e64cff0c457f603720a694ff 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1500,7 +1500,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, PathTarget *grouping_target; PathTarget *scanjoin_target; bool have_grouping; - bool scanjoin_target_parallel_safe = false; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; List *rollup_lists = NIL; @@ -1730,14 +1729,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, else scanjoin_target = grouping_target; - /* - * Check whether scan/join target is parallel safe ... unless there - * are no partial paths, in which case we don't care. - */ - if (current_rel->partial_pathlist && - !has_parallel_hazard((Node *) scanjoin_target->exprs, false)) - scanjoin_target_parallel_safe = true; - /* * Forcibly apply scan/join target to all the Paths for the scan/join * rel. @@ -1756,8 +1747,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, Assert(subpath->param_info == NULL); path = apply_projection_to_path(root, current_rel, - subpath, scanjoin_target, - scanjoin_target_parallel_safe); + subpath, scanjoin_target); /* If we had to add a Result, path is different from subpath */ if (path != subpath) { @@ -1774,15 +1764,13 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, * partial pathlist will expect partial paths for that rel to produce * the same output as complete paths ... and we just changed the * output for the complete paths, so we'll need to do the same thing - * for partial paths. + * for partial paths. But only parallel-safe expressions can be + * computed by partial paths. */ - if (scanjoin_target_parallel_safe) + if (current_rel->partial_pathlist && + !has_parallel_hazard((Node *) scanjoin_target->exprs, false)) { - /* - * Apply the scan/join target to each partial path. Otherwise, - * anything that attempts to use the partial paths for further - * upper planning may go wrong. - */ + /* Apply the scan/join target to each partial path */ foreach(lc, current_rel->partial_pathlist) { Path *subpath = (Path *) lfirst(lc); @@ -1792,36 +1780,14 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, Assert(subpath->param_info == NULL); /* - * We can't use apply_projection_to_path() here, because there - * could already be pointers to these paths, and therefore we - * dare not modify them in place. Instead, we must use - * create_projection_path() unconditionally. + * Don't use apply_projection_to_path() here, because there + * could be other pointers to these paths, and therefore we + * mustn't modify them in place. */ newpath = (Path *) create_projection_path(root, current_rel, subpath, scanjoin_target); - - /* - * Although create_projection_path() inserts a ProjectionPath - * unconditionally, create_projection_plan() will only insert - * a Result node if the subpath is not projection-capable, so - * we should discount the cost of that node if it will not - * actually get inserted. (This is pretty grotty but we can - * improve it later if it seems important.) - */ - if (equal(scanjoin_target->exprs, subpath->pathtarget->exprs)) - { - /* at most we need a relabeling of the subpath */ - newpath->startup_cost = subpath->startup_cost; - newpath->total_cost = subpath->total_cost; - } - else if (is_projection_capable_path(subpath)) - { - /* need to project, but we don't need a Result */ - newpath->total_cost -= cpu_tuple_cost * subpath->rows; - } - lfirst(lc) = newpath; } } @@ -4231,7 +4197,7 @@ create_ordered_paths(PlannerInfo *root, /* Add projection step if needed */ if (path->pathtarget != target) path = apply_projection_to_path(root, ordered_rel, - path, target, false); + path, target); add_path(ordered_rel, path); } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 30975e0106a564a0f2282405f1529e650691daed..552b756b8b1b5da51445c02183a304889b439ee1 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -325,8 +325,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, refnames_tlist); path = apply_projection_to_path(root, rel, path, - create_pathtarget(root, tlist), - false); + create_pathtarget(root, tlist)); /* Return the fully-fledged tlist to caller, too */ *pTargetList = tlist; @@ -395,8 +394,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, path->parent, path, create_pathtarget(root, - *pTargetList), - false); + *pTargetList)); } return path; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 0ff353fa11d0e39b40d644baa27ee987b6000b16..8fd933fd6bdff4efa87b899b6e0792ec04d21afb 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2168,6 +2168,7 @@ create_projection_path(PlannerInfo *root, PathTarget *target) { ProjectionPath *pathnode = makeNode(ProjectionPath); + PathTarget *oldtarget = subpath->pathtarget; pathnode->path.pathtype = T_Result; pathnode->path.parent = rel; @@ -2184,13 +2185,46 @@ create_projection_path(PlannerInfo *root, pathnode->subpath = subpath; /* - * The Result node's cost is cpu_tuple_cost per row, plus the cost of - * evaluating the tlist. There is no qual to worry about. + * We might not need a separate Result node. If the input plan node type + * can project, we can just tell it to project something else. Or, if it + * can't project but the desired target has the same expression list as + * what the input will produce anyway, we can still give it the desired + * tlist (possibly changing its ressortgroupref labels, but nothing else). + * Note: in the latter case, create_projection_plan has to recheck our + * conclusion; see comments therein. */ - pathnode->path.rows = subpath->rows; - pathnode->path.startup_cost = subpath->startup_cost + target->cost.startup; - pathnode->path.total_cost = subpath->total_cost + target->cost.startup + - (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows; + if (is_projection_capable_path(subpath) || + equal(oldtarget->exprs, target->exprs)) + { + /* No separate Result node needed */ + pathnode->dummypp = true; + + /* + * Set cost of plan as subpath's cost, adjusted for tlist replacement. + */ + pathnode->path.rows = subpath->rows; + pathnode->path.startup_cost = subpath->startup_cost + + (target->cost.startup - oldtarget->cost.startup); + pathnode->path.total_cost = subpath->total_cost + + (target->cost.startup - oldtarget->cost.startup) + + (target->cost.per_tuple - oldtarget->cost.per_tuple) * subpath->rows; + } + else + { + /* We really do need the Result node */ + pathnode->dummypp = false; + + /* + * The Result node's cost is cpu_tuple_cost per row, plus the cost of + * evaluating the tlist. There is no qual to worry about. + */ + pathnode->path.rows = subpath->rows; + pathnode->path.startup_cost = subpath->startup_cost + + target->cost.startup; + pathnode->path.total_cost = subpath->total_cost + + target->cost.startup + + (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows; + } return pathnode; } @@ -2199,38 +2233,37 @@ create_projection_path(PlannerInfo *root, * apply_projection_to_path * Add a projection step, or just apply the target directly to given path. * - * Most plan types include ExecProject, so we can implement a new projection - * without an extra plan node: just replace the given path's pathtarget with - * the desired one. If the given path can't project, add a ProjectionPath. + * This has the same net effect as create_projection_path(), except that if + * a separate Result plan node isn't needed, we just replace the given path's + * pathtarget with the desired one. This must be used only when the caller + * knows that the given path isn't referenced elsewhere and so can be modified + * in-place. * - * We can also short-circuit cases where the targetlist expressions are - * actually equal; this is not an uncommon case, since it may arise from - * trying to apply a PathTarget with sortgroupref labeling to a derived - * path without such labeling. + * If the input path is a GatherPath, we try to push the new target down to + * its input as well; this is a yet more invasive modification of the input + * path, which create_projection_path() can't do. * - * This requires knowing that the source path won't be referenced for other - * purposes (e.g., other possible paths), since we modify it in-place. Note - * also that we mustn't change the source path's parent link; so when it is + * Note that we mustn't change the source path's parent link; so when it is * add_path'd to "rel" things will be a bit inconsistent. So far that has * not caused any trouble. * * 'rel' is the parent relation associated with the result * 'path' is the path representing the source of data * 'target' is the PathTarget to be computed - * 'target_parallel' indicates that target expressions are all parallel-safe */ Path * apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, - PathTarget *target, - bool target_parallel) + PathTarget *target) { QualCost oldcost; - /* Make a separate ProjectionPath if needed */ - if (!is_projection_capable_path(path) && - !equal(path->pathtarget->exprs, target->exprs)) + /* + * If given path can't project, we might need a Result node, so make a + * separate ProjectionPath. + */ + if (!is_projection_capable_path(path)) return (Path *) create_projection_path(root, rel, path, target); /* @@ -2247,10 +2280,11 @@ apply_projection_to_path(PlannerInfo *root, /* * If the path happens to be a Gather path, we'd like to arrange for the * subpath to return the required target list so that workers can help - * project. But if there is something that is not parallel-safe in the + * project. But if there is something that is not parallel-safe in the * target expressions, then we can't. */ - if (IsA(path, GatherPath) &&target_parallel) + if (IsA(path, GatherPath) && + !has_parallel_hazard((Node *) target->exprs, false)) { GatherPath *gpath = (GatherPath *) path; @@ -2258,14 +2292,12 @@ apply_projection_to_path(PlannerInfo *root, * We always use create_projection_path here, even if the subpath is * projection-capable, so as to avoid modifying the subpath in place. * It seems unlikely at present that there could be any other - * references to the subpath anyway, but better safe than sorry. - * (create_projection_plan will only insert a Result node if the - * subpath is not projection-capable, so we only include the cost of - * that node if it will actually be inserted. This is a bit grotty - * but we can improve it later if it seems important.) + * references to the subpath, but better safe than sorry. + * + * Note that we don't change the GatherPath's cost estimates; it might + * be appropriate to do so, to reflect the fact that the bulk of the + * target evaluation will happen in workers. */ - if (!is_projection_capable_path(gpath->subpath)) - gpath->path.total_cost += cpu_tuple_cost * gpath->subpath->rows; gpath->subpath = (Path *) create_projection_path(root, gpath->subpath->parent, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 57747fc4e556f86b3963bc1a12d23b244304fca7..9470df626cc86c45417951c138b104e3930e8f74 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1274,15 +1274,22 @@ typedef struct HashPath /* * ProjectionPath represents a projection (that is, targetlist computation) * - * This path node represents using a Result plan node to do a projection. - * It's only needed atop a node that doesn't support projection (such as - * Sort); otherwise we just jam the new desired PathTarget into the lower - * path node, and adjust that node's estimated cost accordingly. + * Nominally, this path node represents using a Result plan node to do a + * projection step. However, if the input plan node supports projection, + * we can just modify its output targetlist to do the required calculations + * directly, and not need a Result. In some places in the planner we can just + * jam the desired PathTarget into the input path node (and adjust its cost + * accordingly), so we don't need a ProjectionPath. But in other places + * it's necessary to not modify the input path node, so we need a separate + * ProjectionPath node, which is marked dummy to indicate that we intend to + * assign the work to the input plan node. The estimated cost for the + * ProjectionPath node will account for whether a Result will be used or not. */ typedef struct ProjectionPath { Path path; Path *subpath; /* path representing input source */ + bool dummypp; /* true if no separate Result is needed */ } ProjectionPath; /* diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 586ecdd9b8714a9fc4953e0ffb93282b017af3ef..5de4c34a2b73c2fa26fd3541f8592ff1d66ac7f4 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -143,8 +143,7 @@ extern ProjectionPath *create_projection_path(PlannerInfo *root, extern Path *apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, - PathTarget *target, - bool target_parallel); + PathTarget *target); extern SortPath *create_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,