diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index cca5db88e2f1fd747c7839ce2794a97db08e8cae..5728f70c8b07cd6c3188a521939da98ef4f5e815 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -20,10 +20,21 @@ #include "optimizer/tlist.h" +/* Test if an expression node represents a SRF call. Beware multiple eval! */ +#define IS_SRF_CALL(node) \ + ((IsA(node, FuncExpr) && ((FuncExpr *) (node))->funcretset) || \ + (IsA(node, OpExpr) && ((OpExpr *) (node))->opretset)) + +/* Workspace for split_pathtarget_walker */ typedef struct { - List *nextlevel_tlist; - bool nextlevel_contains_srfs; + List *input_target_exprs; /* exprs available from input */ + List *level_srfs; /* list of lists of SRF exprs */ + List *level_input_vars; /* vars needed by SRFs of each level */ + List *level_input_srfs; /* SRFs needed by SRFs of each level */ + List *current_input_vars; /* vars needed in current subexpr */ + List *current_input_srfs; /* SRFs needed in current subexpr */ + int current_depth; /* max SRF depth in current subexpr */ } split_pathtarget_context; static bool split_pathtarget_walker(Node *node, @@ -799,24 +810,27 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target) * x, y, z * which satisfy the desired property. * + * Another example is + * srf1(x), srf2(srf3(y)) + * That must appear as-is in the top PathTarget, but below that we need + * srf1(x), srf3(y) + * That is, each SRF must be computed at a level corresponding to the nesting + * depth of SRFs within its arguments. + * * In some cases, a SRF has already been evaluated in some previous plan level * and we shouldn't expand it again (that is, what we see in the target is * already meant as a reference to a lower subexpression). So, don't expand * any tlist expressions that appear in input_target, if that's not NULL. - * In principle we might need to consider matching subexpressions to - * input_target, but for now it's not necessary because only ORDER BY and - * GROUP BY expressions are at issue and those will look the same at both - * plan levels. * * The outputs of this function are two parallel lists, one a list of * PathTargets and the other an integer list of bool flags indicating - * whether the corresponding PathTarget contains any top-level SRFs. + * whether the corresponding PathTarget contains any evaluatable SRFs. * The lists are given in the order they'd need to be evaluated in, with * the "lowest" PathTarget first. So the last list entry is always the * originally given PathTarget, and any entries before it indicate evaluation * levels that must be inserted below it. The first list entry must not - * contain any SRFs, since it will typically be attached to a plan node - * that cannot evaluate SRFs. + * contain any SRFs (other than ones duplicating input_target entries), since + * it will typically be attached to a plan node that cannot evaluate SRFs. * * Note: using a list for the flags may seem like overkill, since there * are only a few possible patterns for which levels contain SRFs. @@ -827,133 +841,283 @@ split_pathtarget_at_srfs(PlannerInfo *root, PathTarget *target, PathTarget *input_target, List **targets, List **targets_contain_srfs) { - /* Initialize output lists to empty; we prepend to them within loop */ - *targets = *targets_contain_srfs = NIL; + split_pathtarget_context context; + int max_depth; + bool need_extra_projection; + List *prev_level_tlist; + ListCell *lc, + *lc1, + *lc2, + *lc3; - /* Loop to consider each level of PathTarget we need */ - for (;;) + /* + * It's not unusual for planner.c to pass us two physically identical + * targets, in which case we can conclude without further ado that all + * expressions are available from the input. (The logic below would + * arrive at the same conclusion, but much more tediously.) + */ + if (target == input_target) { - bool target_contains_srfs = false; - split_pathtarget_context context; - ListCell *lc; + *targets = list_make1(target); + *targets_contain_srfs = list_make1_int(false); + return; + } + + /* Pass any input_target exprs down to split_pathtarget_walker() */ + context.input_target_exprs = input_target ? input_target->exprs : NIL; + + /* + * Initialize with empty level-zero lists, and no levels after that. + * (Note: we could dispense with representing level zero explicitly, since + * it will never receive any SRFs, but then we'd have to special-case that + * level when we get to building result PathTargets. Level zero describes + * the SRF-free PathTarget that will be given to the input plan node.) + */ + context.level_srfs = list_make1(NIL); + context.level_input_vars = list_make1(NIL); + context.level_input_srfs = list_make1(NIL); - context.nextlevel_tlist = NIL; - context.nextlevel_contains_srfs = false; + /* Initialize data we'll accumulate across all the target expressions */ + context.current_input_vars = NIL; + context.current_input_srfs = NIL; + max_depth = 0; + need_extra_projection = false; + + /* Scan each expression in the PathTarget looking for SRFs */ + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); /* - * Scan the PathTarget looking for SRFs. Top-level SRFs are handled - * in this loop, ones lower down are found by split_pathtarget_walker. + * Find all SRFs and Vars (and Var-like nodes) in this expression, and + * enter them into appropriate lists within the context struct. */ - foreach(lc, target->exprs) + context.current_depth = 0; + split_pathtarget_walker(node, &context); + + /* An expression containing no SRFs is of no further interest */ + if (context.current_depth == 0) + continue; + + /* + * Track max SRF nesting depth over the whole PathTarget. Also, if + * this expression establishes a new max depth, we no longer care + * whether previous expressions contained nested SRFs; we can handle + * any required projection for them in the final ProjectSet node. + */ + if (max_depth < context.current_depth) { - Node *node = (Node *) lfirst(lc); + max_depth = context.current_depth; + need_extra_projection = false; + } + + /* + * If any maximum-depth SRF is not at the top level of its expression, + * we'll need an extra Result node to compute the top-level scalar + * expression. + */ + if (max_depth == context.current_depth && !IS_SRF_CALL(node)) + need_extra_projection = true; + } + + /* + * If we found no SRFs needing evaluation (maybe they were all present in + * input_target, or maybe they were all removed by const-simplification), + * then no ProjectSet is needed; fall out. + */ + if (max_depth == 0) + { + *targets = list_make1(target); + *targets_contain_srfs = list_make1_int(false); + return; + } + + /* + * The Vars and SRF outputs needed at top level can be added to the last + * level_input lists if we don't need an extra projection step. If we do + * need one, add a SRF-free level to the lists. + */ + if (need_extra_projection) + { + context.level_srfs = lappend(context.level_srfs, NIL); + context.level_input_vars = lappend(context.level_input_vars, + context.current_input_vars); + context.level_input_srfs = lappend(context.level_input_srfs, + context.current_input_srfs); + } + else + { + lc = list_nth_cell(context.level_input_vars, max_depth); + lfirst(lc) = list_concat(lfirst(lc), context.current_input_vars); + lc = list_nth_cell(context.level_input_srfs, max_depth); + lfirst(lc) = list_concat(lfirst(lc), context.current_input_srfs); + } + + /* + * Now construct the output PathTargets. The original target can be used + * as-is for the last one, but we need to construct a new SRF-free target + * representing what the preceding plan node has to emit, as well as a + * target for each intermediate ProjectSet node. + */ + *targets = *targets_contain_srfs = NIL; + prev_level_tlist = NIL; + + forthree(lc1, context.level_srfs, + lc2, context.level_input_vars, + lc3, context.level_input_srfs) + { + List *level_srfs = (List *) lfirst(lc1); + PathTarget *ntarget; + + if (lnext(lc1) == NULL) + { + ntarget = target; + } + else + { + ntarget = create_empty_pathtarget(); /* - * A tlist item that is just a reference to an expression already - * computed in input_target need not be evaluated here, so just - * make sure it's included in the next PathTarget. + * This target should actually evaluate any SRFs of the current + * level, and it needs to propagate forward any Vars needed by + * later levels, as well as SRFs computed earlier and needed by + * later levels. We rely on add_new_columns_to_pathtarget() to + * remove duplicate items. Also, for safety, make a separate copy + * of each item for each PathTarget. */ - if (input_target && list_member(input_target->exprs, node)) + add_new_columns_to_pathtarget(ntarget, copyObject(level_srfs)); + for_each_cell(lc, lnext(lc2)) { - context.nextlevel_tlist = lappend(context.nextlevel_tlist, node); - continue; - } + List *input_vars = (List *) lfirst(lc); - /* Else, we need to compute this expression. */ - if (IsA(node, FuncExpr) && - ((FuncExpr *) node)->funcretset) - { - /* Top-level SRF: it can be evaluated here */ - target_contains_srfs = true; - /* Recursively examine SRF's inputs */ - split_pathtarget_walker((Node *) ((FuncExpr *) node)->args, - &context); + add_new_columns_to_pathtarget(ntarget, copyObject(input_vars)); } - else if (IsA(node, OpExpr) && - ((OpExpr *) node)->opretset) + for_each_cell(lc, lnext(lc3)) { - /* Same as above, but for set-returning operator */ - target_contains_srfs = true; - split_pathtarget_walker((Node *) ((OpExpr *) node)->args, - &context); - } - else - { - /* Not a top-level SRF, so recursively examine expression */ - split_pathtarget_walker(node, &context); + List *input_srfs = (List *) lfirst(lc); + ListCell *lcx; + + foreach(lcx, input_srfs) + { + Node *srf = (Node *) lfirst(lcx); + + if (list_member(prev_level_tlist, srf)) + add_new_column_to_pathtarget(ntarget, copyObject(srf)); + } } + set_pathtarget_cost_width(root, ntarget); } /* - * Prepend current target and associated flag to output lists. + * Add current target and does-it-compute-SRFs flag to output lists. */ - *targets = lcons(target, *targets); - *targets_contain_srfs = lcons_int(target_contains_srfs, - *targets_contain_srfs); + *targets = lappend(*targets, ntarget); + *targets_contain_srfs = lappend_int(*targets_contain_srfs, + (level_srfs != NIL)); - /* - * Done if we found no SRFs anywhere in this target; the tentative - * tlist we built for the next level can be discarded. - */ - if (!target_contains_srfs && !context.nextlevel_contains_srfs) - break; - - /* - * Else build the next PathTarget down, and loop back to process it. - * Copy the subexpressions to make sure PathTargets don't share - * substructure (might be unnecessary, but be safe); and drop any - * duplicate entries in the sub-targetlist. - */ - target = create_empty_pathtarget(); - add_new_columns_to_pathtarget(target, - (List *) copyObject(context.nextlevel_tlist)); - set_pathtarget_cost_width(root, target); + /* Remember this level's output for next pass */ + prev_level_tlist = ntarget->exprs; } } -/* Recursively examine expressions for split_pathtarget_at_srfs */ +/* + * Recursively examine expressions for split_pathtarget_at_srfs. + * + * Note we make no effort here to prevent duplicate entries in the output + * lists. Duplicates will be gotten rid of later. + */ static bool split_pathtarget_walker(Node *node, split_pathtarget_context *context) { if (node == NULL) return false; + + /* + * A subexpression that matches an expression already computed in + * input_target can be treated like a Var (which indeed it will be after + * setrefs.c gets done with it), even if it's actually a SRF. Record it + * as being needed for the current expression, and ignore any + * substructure. + */ + if (list_member(context->input_target_exprs, node)) + { + context->current_input_vars = lappend(context->current_input_vars, + node); + return false; + } + + /* + * Vars and Var-like constructs are expected to be gotten from the input, + * too. We assume that these constructs cannot contain any SRFs (if one + * does, there will be an executor failure from a misplaced SRF). + */ if (IsA(node, Var) || IsA(node, PlaceHolderVar) || IsA(node, Aggref) || IsA(node, GroupingFunc) || IsA(node, WindowFunc)) { - /* - * Pass these items down to the child plan level for evaluation. - * - * We assume that these constructs cannot contain any SRFs (if one - * does, there will be an executor failure from a misplaced SRF). - */ - context->nextlevel_tlist = lappend(context->nextlevel_tlist, node); - - /* Having done that, we need not examine their sub-structure */ + context->current_input_vars = lappend(context->current_input_vars, + node); return false; } - else if ((IsA(node, FuncExpr) && - ((FuncExpr *) node)->funcretset) || - (IsA(node, OpExpr) && - ((OpExpr *) node)->opretset)) + + /* + * If it's a SRF, recursively examine its inputs, determine its level, and + * make appropriate entries in the output lists. + */ + if (IS_SRF_CALL(node)) { + List *save_input_vars = context->current_input_vars; + List *save_input_srfs = context->current_input_srfs; + int save_current_depth = context->current_depth; + int srf_depth; + ListCell *lc; + + context->current_input_vars = NIL; + context->current_input_srfs = NIL; + context->current_depth = 0; + + (void) expression_tree_walker(node, split_pathtarget_walker, + (void *) context); + + /* Depth is one more than any SRF below it */ + srf_depth = context->current_depth + 1; + + /* If new record depth, initialize another level of output lists */ + if (srf_depth >= list_length(context->level_srfs)) + { + context->level_srfs = lappend(context->level_srfs, NIL); + context->level_input_vars = lappend(context->level_input_vars, NIL); + context->level_input_srfs = lappend(context->level_input_srfs, NIL); + } + + /* Record this SRF as needing to be evaluated at appropriate level */ + lc = list_nth_cell(context->level_srfs, srf_depth); + lfirst(lc) = lappend(lfirst(lc), node); + + /* Record its inputs as being needed at the same level */ + lc = list_nth_cell(context->level_input_vars, srf_depth); + lfirst(lc) = list_concat(lfirst(lc), context->current_input_vars); + lc = list_nth_cell(context->level_input_srfs, srf_depth); + lfirst(lc) = list_concat(lfirst(lc), context->current_input_srfs); + /* - * Pass SRFs down to the child plan level for evaluation, and mark - * that it contains SRFs. (We are not at top level of our own tlist, - * else this would have been picked up by split_pathtarget_at_srfs.) + * Restore caller-level state and update it for presence of this SRF. + * Notice we report the SRF itself as being needed for evaluation of + * surrounding expression. */ - context->nextlevel_tlist = lappend(context->nextlevel_tlist, node); - context->nextlevel_contains_srfs = true; + context->current_input_vars = save_input_vars; + context->current_input_srfs = lappend(save_input_srfs, node); + context->current_depth = Max(save_current_depth, srf_depth); - /* Inputs to the SRF need not be considered here, so we're done */ + /* We're done here */ return false; } /* - * Otherwise, the node is evaluatable within the current PathTarget, so - * recurse to examine its inputs. + * Otherwise, the node is a scalar (non-set) expression, so recurse to + * examine its inputs. */ return expression_tree_walker(node, split_pathtarget_walker, (void *) context); diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 47afdc335e1d562d8a510e84fa2ae2553e92fbb3..1ad4ad47b2c011c770f8ac823d05e04f16c63952 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -853,7 +853,7 @@ select * from int4_tbl o where (f1, f1) in -> Result Output: i.f1, ((generate_series(1, 2)) / 10) -> ProjectSet - Output: i.f1, generate_series(1, 2) + Output: generate_series(1, 2), i.f1 -> HashAggregate Output: i.f1 Group Key: i.f1 diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out index dca51a842faab6c82df11ad2de69d5bcb93238e6..0eeaf9e8300d335003369abf52f714311d5d7554 100644 --- a/src/test/regress/expected/tsrf.out +++ b/src/test/regress/expected/tsrf.out @@ -53,6 +53,29 @@ SELECT generate_series(generate_series(1,3), generate_series(2, 4)); 4 (6 rows) +-- check proper nesting of SRFs in different expressions +explain (verbose, costs off) +SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4); + QUERY PLAN +-------------------------------------------------------------------------------- + ProjectSet + Output: generate_series(1, (generate_series(1, 3))), (generate_series(2, 4)) + -> ProjectSet + Output: generate_series(1, 3), generate_series(2, 4) + -> Result +(5 rows) + +SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4); + generate_series | generate_series +-----------------+----------------- + 1 | 2 + 1 | 3 + 2 | 3 + 1 | 4 + 2 | 4 + 3 | 4 +(6 rows) + CREATE TABLE few(id int, dataa text, datab text); INSERT INTO few VALUES(1, 'a', 'foo'),(2, 'a', 'bar'),(3, 'b', 'bar'); -- SRF output order of sorting is maintained, if SRF is not referenced @@ -518,6 +541,69 @@ SELECT |@|ARRAY[1,2,3]; 3 (3 rows) +-- Some fun cases involving duplicate SRF calls +explain (verbose, costs off) +select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1; + QUERY PLAN +------------------------------------------------------------------ + Result + Output: (generate_series(1, 3)), ((generate_series(1, 3)) + 1) + -> ProjectSet + Output: generate_series(1, 3) + -> Result +(5 rows) + +select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1; + x | xp1 +---+----- + 1 | 2 + 2 | 3 + 3 | 4 +(3 rows) + +explain (verbose, costs off) +select generate_series(1,3)+1 order by generate_series(1,3); + QUERY PLAN +------------------------------------------------------------------------ + Sort + Output: (((generate_series(1, 3)) + 1)), (generate_series(1, 3)) + Sort Key: (generate_series(1, 3)) + -> Result + Output: ((generate_series(1, 3)) + 1), (generate_series(1, 3)) + -> ProjectSet + Output: generate_series(1, 3) + -> Result +(8 rows) + +select generate_series(1,3)+1 order by generate_series(1,3); + ?column? +---------- + 2 + 3 + 4 +(3 rows) + +-- Check that SRFs of same nesting level run in lockstep +explain (verbose, costs off) +select generate_series(1,3) as x, generate_series(3,6) + 1 as y; + QUERY PLAN +------------------------------------------------------------------ + Result + Output: (generate_series(1, 3)), ((generate_series(3, 6)) + 1) + -> ProjectSet + Output: generate_series(1, 3), generate_series(3, 6) + -> Result +(5 rows) + +select generate_series(1,3) as x, generate_series(3,6) + 1 as y; + x | y +---+--- + 1 | 4 + 2 | 5 + 3 | 6 + | 7 +(4 rows) + -- Clean up DROP TABLE few; DROP TABLE fewmore; diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql index ff2fa3eb9a97608b131d0f1e3bf0ecf8587eea13..e627bb99ed963352bf8158de512b1d7705ba2b83 100644 --- a/src/test/regress/sql/tsrf.sql +++ b/src/test/regress/sql/tsrf.sql @@ -17,6 +17,11 @@ SELECT generate_series(1, generate_series(1, 3)); -- srf, with two SRF arguments SELECT generate_series(generate_series(1,3), generate_series(2, 4)); +-- check proper nesting of SRFs in different expressions +explain (verbose, costs off) +SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4); +SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4); + CREATE TABLE few(id int, dataa text, datab text); INSERT INTO few VALUES(1, 'a', 'foo'),(2, 'a', 'bar'),(3, 'b', 'bar'); @@ -129,6 +134,19 @@ SELECT (SELECT generate_series(1,3) LIMIT 1 OFFSET g.i) FROM generate_series(0,3 CREATE OPERATOR |@| (PROCEDURE = unnest, RIGHTARG = ANYARRAY); SELECT |@|ARRAY[1,2,3]; +-- Some fun cases involving duplicate SRF calls +explain (verbose, costs off) +select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1; +select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1; +explain (verbose, costs off) +select generate_series(1,3)+1 order by generate_series(1,3); +select generate_series(1,3)+1 order by generate_series(1,3); + +-- Check that SRFs of same nesting level run in lockstep +explain (verbose, costs off) +select generate_series(1,3) as x, generate_series(3,6) + 1 as y; +select generate_series(1,3) as x, generate_series(3,6) + 1 as y; + -- Clean up DROP TABLE few; DROP TABLE fewmore;