diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 6abe3f0770a119e2389c7fdb166872e2486cd0dc..c4d3ee50148efd0455bbf1e9edc1f340264645da 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2267,6 +2267,7 @@ JumbleQuery(pgssJumbleState *jstate, Query *query) JumbleExpr(jstate, (Node *) query->onConflict); JumbleExpr(jstate, (Node *) query->returningList); JumbleExpr(jstate, (Node *) query->groupClause); + JumbleExpr(jstate, (Node *) query->groupingSets); JumbleExpr(jstate, query->havingQual); JumbleExpr(jstate, (Node *) query->windowClause); JumbleExpr(jstate, (Node *) query->distinctClause); @@ -2397,6 +2398,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) JumbleExpr(jstate, (Node *) expr->aggfilter); } break; + case T_GroupingFunc: + { + GroupingFunc *grpnode = (GroupingFunc *) node; + + JumbleExpr(jstate, (Node *) grpnode->refs); + } + break; case T_WindowFunc: { WindowFunc *expr = (WindowFunc *) node; @@ -2698,6 +2706,12 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) JumbleExpr(jstate, (Node *) lfirst(temp)); } break; + case T_IntList: + foreach(temp, (List *) node) + { + APP_JUMB(lfirst_int(temp)); + } + break; case T_SortGroupClause: { SortGroupClause *sgc = (SortGroupClause *) node; @@ -2708,6 +2722,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) APP_JUMB(sgc->nulls_first); } break; + case T_GroupingSet: + { + GroupingSet *gsnode = (GroupingSet *) node; + + JumbleExpr(jstate, (Node *) gsnode->content); + } + break; case T_WindowClause: { WindowClause *wc = (WindowClause *) node; diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index b1e94d7b9e202efe5572d251993270086b047fe6..89a609f61c185a2fad448805d7e5ccff63a654ee 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -12228,7 +12228,9 @@ NULL baz</literallayout>(3 rows)</entry> <xref linkend="functions-aggregate-statistics-table">. The built-in ordered-set aggregate functions are listed in <xref linkend="functions-orderedset-table"> and - <xref linkend="functions-hypothetical-table">. + <xref linkend="functions-hypothetical-table">. Grouping operations, + which are closely related to aggregate functions, are listed in + <xref linkend="functions-grouping-table">. The special syntax considerations for aggregate functions are explained in <xref linkend="syntax-aggregates">. Consult <xref linkend="tutorial-agg"> for additional introductory @@ -13326,6 +13328,72 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; to the rule specified in the <literal>ORDER BY</> clause. </para> + <table id="functions-grouping-table"> + <title>Grouping Operations</title> + + <tgroup cols="3"> + <thead> + <row> + <entry>Function</entry> + <entry>Return Type</entry> + <entry>Description</entry> + </row> + </thead> + + <tbody> + + <row> + <entry> + <indexterm> + <primary>GROUPING</primary> + </indexterm> + <function>GROUPING(<replaceable class="parameter">args...</replaceable>)</function> + </entry> + <entry> + <type>integer</type> + </entry> + <entry> + Integer bitmask indicating which arguments are not being included in the current + grouping set + </entry> + </row> + </tbody> + </tgroup> + </table> + + <para> + Grouping operations are used in conjunction with grouping sets (see + <xref linkend="queries-grouping-sets">) to distinguish result rows. The + arguments to the <literal>GROUPING</> operation are not actually evaluated, + but they must match exactly expressions given in the <literal>GROUP BY</> + clause of the associated query level. Bits are assigned with the rightmost + argument being the least-significant bit; each bit is 0 if the corresponding + expression is included in the grouping criteria of the grouping set generating + the result row, and 1 if it is not. For example: +<screen> +<prompt>=></> <userinput>SELECT * FROM items_sold;</> + make | model | sales +-------+-------+------- + Foo | GT | 10 + Foo | Tour | 20 + Bar | City | 15 + Bar | Sport | 5 +(4 rows) + +<prompt>=></> <userinput>SELECT make, model, GROUPING(make,model), sum(sales) FROM items_sold GROUP BY ROLLUP(make,model);</> + make | model | grouping | sum +-------+-------+----------+----- + Foo | GT | 0 | 10 + Foo | Tour | 0 | 20 + Bar | City | 0 | 15 + Bar | Sport | 0 | 5 + Foo | | 1 | 30 + Bar | | 1 | 20 + | | 3 | 50 +(7 rows) +</screen> + </para> + </sect1> <sect1 id="functions-window"> diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index 7dbad462a5dd5ab01cb3d05e1197446393a93e05..ab49bd7e91f035aa15dbc0f00ee3e470e5a2e769 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -1183,6 +1183,181 @@ SELECT product_id, p.name, (sum(s.units) * (p.price - p.cost)) AS profit </para> </sect2> + <sect2 id="queries-grouping-sets"> + <title><literal>GROUPING SETS</>, <literal>CUBE</>, and <literal>ROLLUP</></title> + + <indexterm zone="queries-grouping-sets"> + <primary>GROUPING SETS</primary> + </indexterm> + <indexterm zone="queries-grouping-sets"> + <primary>CUBE</primary> + </indexterm> + <indexterm zone="queries-grouping-sets"> + <primary>ROLLUP</primary> + </indexterm> + + <para> + More complex grouping operations than those described above are possible + using the concept of <firstterm>grouping sets</>. The data selected by + the <literal>FROM</> and <literal>WHERE</> clauses is grouped separately + by each specified grouping set, aggregates computed for each group just as + for simple <literal>GROUP BY</> clauses, and then the results returned. + For example: +<screen> +<prompt>=></> <userinput>SELECT * FROM items_sold;</> + brand | size | sales +-------+------+------- + Foo | L | 10 + Foo | M | 20 + Bar | M | 15 + Bar | L | 5 +(4 rows) + +<prompt>=></> <userinput>SELECT brand, size, sum(sales) FROM items_sold GROUP BY GROUPING SETS ((brand), (size), ());</> + brand | size | sum +-------+------+----- + Foo | | 30 + Bar | | 20 + | L | 15 + | M | 35 + | | 50 +(5 rows) +</screen> + </para> + + <para> + Each sublist of <literal>GROUPING SETS</> may specify zero or more columns + or expressions and is interpreted the same way as though it were directly + in the <literal>GROUP BY</> clause. An empty grouping set means that all + rows are aggregated down to a single group (which is output even if no + input rows were present), as described above for the case of aggregate + functions with no <literal>GROUP BY</> clause. + </para> + + <para> + References to the grouping columns or expressions are replaced + by <literal>NULL</> values in result rows for grouping sets in which those + columns do not appear. To distinguish which grouping a particular output + row resulted from, see <xref linkend="functions-grouping-table">. + </para> + + <para> + A shorthand notation is provided for specifying two common types of grouping set. + A clause of the form +<programlisting> +ROLLUP ( <replaceable>e1</>, <replaceable>e2</>, <replaceable>e3</>, ... ) +</programlisting> + represents the given list of expressions and all prefixes of the list including + the empty list; thus it is equivalent to +<programlisting> +GROUPING SETS ( + ( <replaceable>e1</>, <replaceable>e2</>, <replaceable>e3</>, ... ), + ... + ( <replaceable>e1</>, <replaceable>e2</> ) + ( <replaceable>e1</> ) + ( ) +) +</programlisting> + This is commonly used for analysis over hierarchical data; e.g. total + salary by department, division, and company-wide total. + </para> + + <para> + A clause of the form +<programlisting> +CUBE ( <replaceable>e1</>, <replaceable>e2</>, ... ) +</programlisting> + represents the given list and all of its possible subsets (i.e. the power + set). Thus +<programlisting> +CUBE ( a, b, c ) +</programlisting> + is equivalent to +<programlisting> +GROUPING SETS ( + ( a, b, c ), + ( a, b ), + ( a, c ), + ( a ), + ( b, c ), + ( b ), + ( c ), + ( ), +) +</programlisting> + </para> + + <para> + The individual elements of a <literal>CUBE</> or <literal>ROLLUP</> + clause may be either individual expressions, or sub-lists of elements in + parentheses. In the latter case, the sub-lists are treated as single + units for the purposes of generating the individual grouping sets. + For example: +<programlisting> +CUBE ( (a,b), (c,d) ) +</programlisting> + is equivalent to +<programlisting> +GROUPING SETS ( + ( a, b, c, d ) + ( a, b ) + ( c, d ) + ( ) +) +</programlisting> + and +<programlisting> +ROLLUP ( a, (b,c), d ) +</programlisting> + is equivalent to +<programlisting> +GROUPING SETS ( + ( a, b, c, d ) + ( a, b, c ) + ( a ) + ( ) +) +</programlisting> + </para> + + <para> + The <literal>CUBE</> and <literal>ROLLUP</> constructs can be used either + directly in the <literal>GROUP BY</> clause, or nested inside a + <literal>GROUPING SETS</> clause. If one <literal>GROUPING SETS</> clause + is nested inside another, the effect is the same as if all the elements of + the inner clause had been written directly in the outer clause. + </para> + + <para> + If multiple grouping items are specified in a single <literal>GROUP BY</> + clause, then the final list of grouping sets is the cross product of the + individual items. For example: +<programlisting> +GROUP BY a, CUBE(b,c), GROUPING SETS ((d), (e)) +</programlisting> + is equivalent to +<programlisting> +GROUP BY GROUPING SETS ( + (a,b,c,d), (a,b,c,e), + (a,b,d), (a,b,e), + (a,c,d), (a,c,e), + (a,d), (a,e) +) +</programlisting> + </para> + + <note> + <para> + The construct <literal>(a,b)</> is normally recognized in expressions as + a <link linkend="sql-syntax-row-constructors">row constructor</link>. + Within the <literal>GROUP BY</> clause, this does not apply at the top + levels of expressions, and <literal>(a,b)</> is parsed as a list of + expressions as described above. If for some reason you <emphasis>need</> + a row constructor in a grouping expression, use <literal>ROW(a,b)</>. + </para> + </note> + </sect2> + <sect2 id="queries-window"> <title>Window Function Processing</title> diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 42e04660a19eb00fc5f2dc08e57d294e9fca8b28..632d7935cb41fe946cbbd6d356ba927af6c1cf27 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -37,7 +37,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac [ * | <replaceable class="parameter">expression</replaceable> [ [ AS ] <replaceable class="parameter">output_name</replaceable> ] [, ...] ] [ FROM <replaceable class="parameter">from_item</replaceable> [, ...] ] [ WHERE <replaceable class="parameter">condition</replaceable> ] - [ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...] ] + [ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...] ] [ HAVING <replaceable class="parameter">condition</replaceable> [, ...] ] [ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ] [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] <replaceable class="parameter">select</replaceable> ] @@ -60,6 +60,15 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac [ WITH ORDINALITY ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ] <replaceable class="parameter">from_item</replaceable> [ NATURAL ] <replaceable class="parameter">join_type</replaceable> <replaceable class="parameter">from_item</replaceable> [ ON <replaceable class="parameter">join_condition</replaceable> | USING ( <replaceable class="parameter">join_column</replaceable> [, ...] ) ] +<phrase>and <replaceable class="parameter">grouping_element</replaceable> can be one of:</phrase> + + ( ) + <replaceable class="parameter">expression</replaceable> + ( <replaceable class="parameter">expression</replaceable> [, ...] ) + ROLLUP ( { <replaceable class="parameter">expression</replaceable> | ( <replaceable class="parameter">expression</replaceable> [, ...] ) } [, ...] ) + CUBE ( { <replaceable class="parameter">expression</replaceable> | ( <replaceable class="parameter">expression</replaceable> [, ...] ) } [, ...] ) + GROUPING SETS ( <replaceable class="parameter">grouping_element</replaceable> [, ...] ) + <phrase>and <replaceable class="parameter">with_query</replaceable> is:</phrase> <replaceable class="parameter">with_query_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] AS ( <replaceable class="parameter">select</replaceable> | <replaceable class="parameter">values</replaceable> | <replaceable class="parameter">insert</replaceable> | <replaceable class="parameter">update</replaceable> | <replaceable class="parameter">delete</replaceable> ) @@ -665,22 +674,34 @@ WHERE <replaceable class="parameter">condition</replaceable> <para> The optional <literal>GROUP BY</literal> clause has the general form <synopsis> -GROUP BY <replaceable class="parameter">expression</replaceable> [, ...] +GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...] </synopsis> </para> <para> <literal>GROUP BY</literal> will condense into a single row all selected rows that share the same values for the grouped - expressions. <replaceable - class="parameter">expression</replaceable> can be an input column - name, or the name or ordinal number of an output column - (<command>SELECT</command> list item), or an arbitrary + expressions. An <replaceable + class="parameter">expression</replaceable> used inside a + <replaceable class="parameter">grouping_element</replaceable> + can be an input column name, or the name or ordinal number of an + output column (<command>SELECT</command> list item), or an arbitrary expression formed from input-column values. In case of ambiguity, a <literal>GROUP BY</literal> name will be interpreted as an input-column name rather than an output column name. </para> + <para> + If any of <literal>GROUPING SETS</>, <literal>ROLLUP</> or + <literal>CUBE</> are present as grouping elements, then the + <literal>GROUP BY</> clause as a whole defines some number of + independent <replaceable>grouping sets</>. The effect of this is + equivalent to constructing a <literal>UNION ALL</> between + subqueries with the individual grouping sets as their + <literal>GROUP BY</> clauses. For further details on the handling + of grouping sets see <xref linkend="queries-grouping-sets">. + </para> + <para> Aggregate functions, if any are used, are computed across all rows making up each group, producing a separate value for each group. diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt index e14ae0605ce64c38fecf7d66b6b0b3674c503e90..3da6010b9fdd676f7f00530fd866d976dcc5de41 100644 --- a/src/backend/catalog/sql_features.txt +++ b/src/backend/catalog/sql_features.txt @@ -467,9 +467,9 @@ T331 Basic roles YES T332 Extended roles NO mostly supported T341 Overloading of SQL-invoked functions and procedures YES T351 Bracketed SQL comments (/*...*/ comments) YES -T431 Extended grouping capabilities NO -T432 Nested and concatenated GROUPING SETS NO -T433 Multiargument GROUPING function NO +T431 Extended grouping capabilities YES +T432 Nested and concatenated GROUPING SETS YES +T433 Multiargument GROUPING function YES T434 GROUP BY DISTINCT NO T441 ABS and MOD functions YES T461 Symmetric BETWEEN predicate YES diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 2e4df5fcfefafbe8ec786a2f55b2f96fa2c442fb..232f41df65a29479ae31f98979624e59ad0b7ff6 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -82,6 +82,12 @@ static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors, ExplainState *es); static void show_agg_keys(AggState *astate, List *ancestors, ExplainState *es); +static void show_grouping_sets(PlanState *planstate, Agg *agg, + List *ancestors, ExplainState *es); +static void show_grouping_set_keys(PlanState *planstate, + Agg *aggnode, Sort *sortnode, + List *context, bool useprefix, + List *ancestors, ExplainState *es); static void show_group_keys(GroupState *gstate, List *ancestors, ExplainState *es); static void show_sort_group_keys(PlanState *planstate, const char *qlabel, @@ -1851,18 +1857,116 @@ show_agg_keys(AggState *astate, List *ancestors, { Agg *plan = (Agg *) astate->ss.ps.plan; - if (plan->numCols > 0) + if (plan->numCols > 0 || plan->groupingSets) { /* The key columns refer to the tlist of the child plan */ ancestors = lcons(astate, ancestors); - show_sort_group_keys(outerPlanState(astate), "Group Key", - plan->numCols, plan->grpColIdx, - NULL, NULL, NULL, - ancestors, es); + + if (plan->groupingSets) + show_grouping_sets(outerPlanState(astate), plan, ancestors, es); + else + show_sort_group_keys(outerPlanState(astate), "Group Key", + plan->numCols, plan->grpColIdx, + NULL, NULL, NULL, + ancestors, es); + ancestors = list_delete_first(ancestors); } } +static void +show_grouping_sets(PlanState *planstate, Agg *agg, + List *ancestors, ExplainState *es) +{ + List *context; + bool useprefix; + ListCell *lc; + + /* Set up deparsing context */ + context = set_deparse_context_planstate(es->deparse_cxt, + (Node *) planstate, + ancestors); + useprefix = (list_length(es->rtable) > 1 || es->verbose); + + ExplainOpenGroup("Grouping Sets", "Grouping Sets", false, es); + + show_grouping_set_keys(planstate, agg, NULL, + context, useprefix, ancestors, es); + + foreach(lc, agg->chain) + { + Agg *aggnode = lfirst(lc); + Sort *sortnode = (Sort *) aggnode->plan.lefttree; + + show_grouping_set_keys(planstate, aggnode, sortnode, + context, useprefix, ancestors, es); + } + + ExplainCloseGroup("Grouping Sets", "Grouping Sets", false, es); +} + +static void +show_grouping_set_keys(PlanState *planstate, + Agg *aggnode, Sort *sortnode, + List *context, bool useprefix, + List *ancestors, ExplainState *es) +{ + Plan *plan = planstate->plan; + char *exprstr; + ListCell *lc; + List *gsets = aggnode->groupingSets; + AttrNumber *keycols = aggnode->grpColIdx; + + ExplainOpenGroup("Grouping Set", NULL, true, es); + + if (sortnode) + { + show_sort_group_keys(planstate, "Sort Key", + sortnode->numCols, sortnode->sortColIdx, + sortnode->sortOperators, sortnode->collations, + sortnode->nullsFirst, + ancestors, es); + if (es->format == EXPLAIN_FORMAT_TEXT) + es->indent++; + } + + ExplainOpenGroup("Group Keys", "Group Keys", false, es); + + foreach(lc, gsets) + { + List *result = NIL; + ListCell *lc2; + + foreach(lc2, (List *) lfirst(lc)) + { + Index i = lfirst_int(lc2); + AttrNumber keyresno = keycols[i]; + TargetEntry *target = get_tle_by_resno(plan->targetlist, + keyresno); + + if (!target) + elog(ERROR, "no tlist entry for key %d", keyresno); + /* Deparse the expression, showing any top-level cast */ + exprstr = deparse_expression((Node *) target->expr, context, + useprefix, true); + + result = lappend(result, exprstr); + } + + if (!result && es->format == EXPLAIN_FORMAT_TEXT) + ExplainPropertyText("Group Key", "()", es); + else + ExplainPropertyListNested("Group Key", result, es); + } + + ExplainCloseGroup("Group Keys", "Group Keys", false, es); + + if (sortnode && es->format == EXPLAIN_FORMAT_TEXT) + es->indent--; + + ExplainCloseGroup("Grouping Set", NULL, true, es); +} + /* * Show the grouping keys for a Group node. */ @@ -2612,6 +2716,52 @@ ExplainPropertyList(const char *qlabel, List *data, ExplainState *es) } } +/* + * Explain a property that takes the form of a list of unlabeled items within + * another list. "data" is a list of C strings. + */ +void +ExplainPropertyListNested(const char *qlabel, List *data, ExplainState *es) +{ + ListCell *lc; + bool first = true; + + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + case EXPLAIN_FORMAT_XML: + ExplainPropertyList(qlabel, data, es); + return; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoChar(es->str, '['); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + escape_json(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ']'); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + appendStringInfoString(es->str, "- ["); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + escape_yaml(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ']'); + break; + } +} + /* * Explain a simple property. * diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index e5994112a42ca1940544679b2208b7ade5dcc782..d414e20f1209c44782f1c3c39df716852527cc67 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -181,6 +181,9 @@ static Datum ExecEvalArrayCoerceExpr(ArrayCoerceExprState *astate, bool *isNull, ExprDoneCond *isDone); static Datum ExecEvalCurrentOfExpr(ExprState *exprstate, ExprContext *econtext, bool *isNull, ExprDoneCond *isDone); +static Datum ExecEvalGroupingFuncExpr(GroupingFuncExprState *gstate, + ExprContext *econtext, + bool *isNull, ExprDoneCond *isDone); /* ---------------------------------------------------------------- @@ -3016,6 +3019,44 @@ ExecEvalCaseTestExpr(ExprState *exprstate, return econtext->caseValue_datum; } +/* + * ExecEvalGroupingFuncExpr + * + * Return a bitmask with a bit for each (unevaluated) argument expression + * (rightmost arg is least significant bit). + * + * A bit is set if the corresponding expression is NOT part of the set of + * grouping expressions in the current grouping set. + */ +static Datum +ExecEvalGroupingFuncExpr(GroupingFuncExprState *gstate, + ExprContext *econtext, + bool *isNull, + ExprDoneCond *isDone) +{ + int result = 0; + int attnum = 0; + Bitmapset *grouped_cols = gstate->aggstate->grouped_cols; + ListCell *lc; + + if (isDone) + *isDone = ExprSingleResult; + + *isNull = false; + + foreach(lc, (gstate->clauses)) + { + attnum = lfirst_int(lc); + + result = result << 1; + + if (!bms_is_member(attnum, grouped_cols)) + result = result | 1; + } + + return (Datum) result; +} + /* ---------------------------------------------------------------- * ExecEvalArray - ARRAY[] expressions * ---------------------------------------------------------------- @@ -4482,6 +4523,28 @@ ExecInitExpr(Expr *node, PlanState *parent) state = (ExprState *) astate; } break; + case T_GroupingFunc: + { + GroupingFunc *grp_node = (GroupingFunc *) node; + GroupingFuncExprState *grp_state = makeNode(GroupingFuncExprState); + Agg *agg = NULL; + + if (!parent || !IsA(parent, AggState) || !IsA(parent->plan, Agg)) + elog(ERROR, "parent of GROUPING is not Agg node"); + + grp_state->aggstate = (AggState *) parent; + + agg = (Agg *) (parent->plan); + + if (agg->groupingSets) + grp_state->clauses = grp_node->cols; + else + grp_state->clauses = NIL; + + state = (ExprState *) grp_state; + state->evalfunc = (ExprStateEvalFunc) ExecEvalGroupingFuncExpr; + } + break; case T_WindowFunc: { WindowFunc *wfunc = (WindowFunc *) node; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 0da8e53e816c68aed95e6f0ada981b6db581828e..3963408b18c1771cf90876e9e4e3c5bc6275fcd6 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -642,9 +642,10 @@ get_last_attnums(Node *node, ProjectionInfo *projInfo) /* * Don't examine the arguments or filters of Aggrefs or WindowFuncs, * because those do not represent expressions to be evaluated within the - * overall targetlist's econtext. + * overall targetlist's econtext. GroupingFunc arguments are never + * evaluated at all. */ - if (IsA(node, Aggref)) + if (IsA(node, Aggref) || IsA(node, GroupingFunc)) return false; if (IsA(node, WindowFunc)) return false; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index fcb61177c5169b1715e5a7110b73a426ee31adc8..01a1e67f09e840b9e0150c838bec09d9dffb0125 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -45,15 +45,19 @@ * needed to allow resolution of a polymorphic aggregate's result type. * * We compute aggregate input expressions and run the transition functions - * in a temporary econtext (aggstate->tmpcontext). This is reset at - * least once per input tuple, so when the transvalue datatype is + * in a temporary econtext (aggstate->tmpcontext). This is reset at least + * once per input tuple, so when the transvalue datatype is * pass-by-reference, we have to be careful to copy it into a longer-lived - * memory context, and free the prior value to avoid memory leakage. - * We store transvalues in the memory context aggstate->aggcontext, - * which is also used for the hashtable structures in AGG_HASHED mode. - * The node's regular econtext (aggstate->ss.ps.ps_ExprContext) - * is used to run finalize functions and compute the output tuple; - * this context can be reset once per output tuple. + * memory context, and free the prior value to avoid memory leakage. We + * store transvalues in another set of econtexts, aggstate->aggcontexts + * (one per grouping set, see below), which are also used for the hashtable + * structures in AGG_HASHED mode. These econtexts are rescanned, not just + * reset, at group boundaries so that aggregate transition functions can + * register shutdown callbacks via AggRegisterCallback. + * + * The node's regular econtext (aggstate->ss.ps.ps_ExprContext) is used to + * run finalize functions and compute the output tuple; this context can be + * reset once per output tuple. * * The executor's AggState node is passed as the fmgr "context" value in * all transfunc and finalfunc calls. It is not recommended that the @@ -84,6 +88,36 @@ * need some fallback logic to use this, since there's no Aggref node * for a window function.) * + * Grouping sets: + * + * A list of grouping sets which is structurally equivalent to a ROLLUP + * clause (e.g. (a,b,c), (a,b), (a)) can be processed in a single pass over + * ordered data. We do this by keeping a separate set of transition values + * for each grouping set being concurrently processed; for each input tuple + * we update them all, and on group boundaries we reset those states + * (starting at the front of the list) whose grouping values have changed + * (the list of grouping sets is ordered from most specific to least + * specific). + * + * Where more complex grouping sets are used, we break them down into + * "phases", where each phase has a different sort order. During each + * phase but the last, the input tuples are additionally stored in a + * tuplesort which is keyed to the next phase's sort order; during each + * phase but the first, the input tuples are drawn from the previously + * sorted data. (The sorting of the data for the first phase is handled by + * the planner, as it might be satisfied by underlying nodes.) + * + * From the perspective of aggregate transition and final functions, the + * only issue regarding grouping sets is this: a single call site (flinfo) + * of an aggregate function may be used for updating several different + * transition values in turn. So the function must not cache in the flinfo + * anything which logically belongs as part of the transition value (most + * importantly, the memory context in which the transition value exists). + * The support API functions (AggCheckCallContext, AggRegisterCallback) are + * sensitive to the grouping set for which the aggregate function is + * currently being called. + * + * TODO: AGG_HASHED doesn't support multiple grouping sets yet. * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -241,9 +275,11 @@ typedef struct AggStatePerAggData * then at completion of the input tuple group, we scan the sorted values, * eliminate duplicates if needed, and run the transition function on the * rest. + * + * We need a separate tuplesort for each grouping set. */ - Tuplesortstate *sortstate; /* sort object, if DISTINCT or ORDER BY */ + Tuplesortstate **sortstates; /* sort objects, if DISTINCT or ORDER BY */ /* * This field is a pre-initialized FunctionCallInfo struct used for @@ -286,6 +322,27 @@ typedef struct AggStatePerGroupData */ } AggStatePerGroupData; +/* + * AggStatePerPhaseData - per-grouping-set-phase state + * + * Grouping sets are divided into "phases", where a single phase can be + * processed in one pass over the input. If there is more than one phase, then + * at the end of input from the current phase, state is reset and another pass + * taken over the data which has been re-sorted in the mean time. + * + * Accordingly, each phase specifies a list of grouping sets and group clause + * information, plus each phase after the first also has a sort order. + */ +typedef struct AggStatePerPhaseData +{ + int numsets; /* number of grouping sets (or 0) */ + int *gset_lengths; /* lengths of grouping sets */ + Bitmapset **grouped_cols; /* column groupings for rollup */ + FmgrInfo *eqfunctions; /* per-grouping-field equality fns */ + Agg *aggnode; /* Agg node for phase data */ + Sort *sortnode; /* Sort node for input ordering for phase */ +} AggStatePerPhaseData; + /* * To implement hashed aggregation, we need a hashtable that stores a * representative tuple and an array of AggStatePerGroup structs for each @@ -302,9 +359,12 @@ typedef struct AggHashEntryData } AggHashEntryData; +static void initialize_phase(AggState *aggstate, int newphase); +static TupleTableSlot *fetch_input_tuple(AggState *aggstate); static void initialize_aggregates(AggState *aggstate, AggStatePerAgg peragg, - AggStatePerGroup pergroup); + AggStatePerGroup pergroup, + int numReset); static void advance_transition_function(AggState *aggstate, AggStatePerAgg peraggstate, AggStatePerGroup pergroupstate); @@ -319,6 +379,14 @@ static void finalize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate, AggStatePerGroup pergroupstate, Datum *resultVal, bool *resultIsNull); +static void prepare_projection_slot(AggState *aggstate, + TupleTableSlot *slot, + int currentSet); +static void finalize_aggregates(AggState *aggstate, + AggStatePerAgg peragg, + AggStatePerGroup pergroup, + int currentSet); +static TupleTableSlot *project_aggregates(AggState *aggstate); static Bitmapset *find_unaggregated_cols(AggState *aggstate); static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos); static void build_hash_table(AggState *aggstate); @@ -331,46 +399,135 @@ static Datum GetAggInitVal(Datum textInitVal, Oid transtype); /* - * Initialize all aggregates for a new group of input values. - * - * When called, CurrentMemoryContext should be the per-query context. + * Switch to phase "newphase", which must either be 0 (to reset) or + * current_phase + 1. Juggle the tuplesorts accordingly. */ static void -initialize_aggregates(AggState *aggstate, - AggStatePerAgg peragg, - AggStatePerGroup pergroup) +initialize_phase(AggState *aggstate, int newphase) { - int aggno; + Assert(newphase == 0 || newphase == aggstate->current_phase + 1); - for (aggno = 0; aggno < aggstate->numaggs; aggno++) + /* + * Whatever the previous state, we're now done with whatever input + * tuplesort was in use. + */ + if (aggstate->sort_in) { - AggStatePerAgg peraggstate = &peragg[aggno]; - AggStatePerGroup pergroupstate = &pergroup[aggno]; + tuplesort_end(aggstate->sort_in); + aggstate->sort_in = NULL; + } + if (newphase == 0) + { /* - * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate. + * Discard any existing output tuplesort. */ - if (peraggstate->numSortCols > 0) + if (aggstate->sort_out) { - /* - * In case of rescan, maybe there could be an uncompleted sort - * operation? Clean it up if so. - */ - if (peraggstate->sortstate) - tuplesort_end(peraggstate->sortstate); + tuplesort_end(aggstate->sort_out); + aggstate->sort_out = NULL; + } + } + else + { + /* + * The old output tuplesort becomes the new input one, and this is the + * right time to actually sort it. + */ + aggstate->sort_in = aggstate->sort_out; + aggstate->sort_out = NULL; + Assert(aggstate->sort_in); + tuplesort_performsort(aggstate->sort_in); + } - /* - * We use a plain Datum sorter when there's a single input column; - * otherwise sort the full tuple. (See comments for - * process_ordered_aggregate_single.) - */ - peraggstate->sortstate = - (peraggstate->numInputs == 1) ? + /* + * If this isn't the last phase, we need to sort appropriately for the next + * phase in sequence. + */ + if (newphase < aggstate->numphases - 1) + { + Sort *sortnode = aggstate->phases[newphase+1].sortnode; + PlanState *outerNode = outerPlanState(aggstate); + TupleDesc tupDesc = ExecGetResultType(outerNode); + + aggstate->sort_out = tuplesort_begin_heap(tupDesc, + sortnode->numCols, + sortnode->sortColIdx, + sortnode->sortOperators, + sortnode->collations, + sortnode->nullsFirst, + work_mem, + false); + } + + aggstate->current_phase = newphase; + aggstate->phase = &aggstate->phases[newphase]; +} + +/* + * Fetch a tuple from either the outer plan (for phase 0) or from the sorter + * populated by the previous phase. Copy it to the sorter for the next phase + * if any. + */ +static TupleTableSlot * +fetch_input_tuple(AggState *aggstate) +{ + TupleTableSlot *slot; + + if (aggstate->sort_in) + { + if (!tuplesort_gettupleslot(aggstate->sort_in, true, aggstate->sort_slot)) + return NULL; + slot = aggstate->sort_slot; + } + else + slot = ExecProcNode(outerPlanState(aggstate)); + + if (!TupIsNull(slot) && aggstate->sort_out) + tuplesort_puttupleslot(aggstate->sort_out, slot); + + return slot; +} + +/* + * (Re)Initialize an individual aggregate. + * + * This function handles only one grouping set (already set in + * aggstate->current_set). + * + * When called, CurrentMemoryContext should be the per-query context. + */ +static void +initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate, + AggStatePerGroup pergroupstate) +{ + /* + * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate. + */ + if (peraggstate->numSortCols > 0) + { + /* + * In case of rescan, maybe there could be an uncompleted sort + * operation? Clean it up if so. + */ + if (peraggstate->sortstates[aggstate->current_set]) + tuplesort_end(peraggstate->sortstates[aggstate->current_set]); + + + /* + * We use a plain Datum sorter when there's a single input column; + * otherwise sort the full tuple. (See comments for + * process_ordered_aggregate_single.) + */ + if (peraggstate->numInputs == 1) + peraggstate->sortstates[aggstate->current_set] = tuplesort_begin_datum(peraggstate->evaldesc->attrs[0]->atttypid, peraggstate->sortOperators[0], peraggstate->sortCollations[0], peraggstate->sortNullsFirst[0], - work_mem, false) : + work_mem, false); + else + peraggstate->sortstates[aggstate->current_set] = tuplesort_begin_heap(peraggstate->evaldesc, peraggstate->numSortCols, peraggstate->sortColIdx, @@ -378,41 +535,83 @@ initialize_aggregates(AggState *aggstate, peraggstate->sortCollations, peraggstate->sortNullsFirst, work_mem, false); - } + } - /* - * (Re)set transValue to the initial value. - * - * Note that when the initial value is pass-by-ref, we must copy it - * (into the aggcontext) since we will pfree the transValue later. - */ - if (peraggstate->initValueIsNull) - pergroupstate->transValue = peraggstate->initValue; - else + /* + * (Re)set transValue to the initial value. + * + * Note that when the initial value is pass-by-ref, we must copy + * it (into the aggcontext) since we will pfree the transValue + * later. + */ + if (peraggstate->initValueIsNull) + pergroupstate->transValue = peraggstate->initValue; + else + { + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo( + aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + pergroupstate->transValue = datumCopy(peraggstate->initValue, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + MemoryContextSwitchTo(oldContext); + } + pergroupstate->transValueIsNull = peraggstate->initValueIsNull; + + /* + * If the initial value for the transition state doesn't exist in + * the pg_aggregate table then we will let the first non-NULL + * value returned from the outer procNode become the initial + * value. (This is useful for aggregates like max() and min().) + * The noTransValue flag signals that we still need to do this. + */ + pergroupstate->noTransValue = peraggstate->initValueIsNull; +} + +/* + * Initialize all aggregates for a new group of input values. + * + * If there are multiple grouping sets, we initialize only the first numReset + * of them (the grouping sets are ordered so that the most specific one, which + * is reset most often, is first). As a convenience, if numReset is < 1, we + * reinitialize all sets. + * + * When called, CurrentMemoryContext should be the per-query context. + */ +static void +initialize_aggregates(AggState *aggstate, + AggStatePerAgg peragg, + AggStatePerGroup pergroup, + int numReset) +{ + int aggno; + int numGroupingSets = Max(aggstate->phase->numsets, 1); + int setno = 0; + + if (numReset < 1) + numReset = numGroupingSets; + + for (aggno = 0; aggno < aggstate->numaggs; aggno++) + { + AggStatePerAgg peraggstate = &peragg[aggno]; + + for (setno = 0; setno < numReset; setno++) { - MemoryContext oldContext; + AggStatePerGroup pergroupstate; - oldContext = MemoryContextSwitchTo(aggstate->aggcontext); - pergroupstate->transValue = datumCopy(peraggstate->initValue, - peraggstate->transtypeByVal, - peraggstate->transtypeLen); - MemoryContextSwitchTo(oldContext); - } - pergroupstate->transValueIsNull = peraggstate->initValueIsNull; + pergroupstate = &pergroup[aggno + (setno * (aggstate->numaggs))]; - /* - * If the initial value for the transition state doesn't exist in the - * pg_aggregate table then we will let the first non-NULL value - * returned from the outer procNode become the initial value. (This is - * useful for aggregates like max() and min().) The noTransValue flag - * signals that we still need to do this. - */ - pergroupstate->noTransValue = peraggstate->initValueIsNull; + aggstate->current_set = setno; + + initialize_aggregate(aggstate, peraggstate, pergroupstate); + } } } /* - * Given new input value(s), advance the transition function of an aggregate. + * Given new input value(s), advance the transition function of one aggregate + * within one grouping set only (already set in aggstate->current_set) * * The new values (and null flags) have been preloaded into argument positions * 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again @@ -455,7 +654,8 @@ advance_transition_function(AggState *aggstate, * We must copy the datum into aggcontext if it is pass-by-ref. We * do not need to pfree the old transValue, since it's NULL. */ - oldContext = MemoryContextSwitchTo(aggstate->aggcontext); + oldContext = MemoryContextSwitchTo( + aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); pergroupstate->transValue = datumCopy(fcinfo->arg[1], peraggstate->transtypeByVal, peraggstate->transtypeLen); @@ -503,7 +703,7 @@ advance_transition_function(AggState *aggstate, { if (!fcinfo->isnull) { - MemoryContextSwitchTo(aggstate->aggcontext); + MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); newVal = datumCopy(newVal, peraggstate->transtypeByVal, peraggstate->transtypeLen); @@ -530,11 +730,13 @@ static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) { int aggno; + int setno = 0; + int numGroupingSets = Max(aggstate->phase->numsets, 1); + int numAggs = aggstate->numaggs; - for (aggno = 0; aggno < aggstate->numaggs; aggno++) + for (aggno = 0; aggno < numAggs; aggno++) { AggStatePerAgg peraggstate = &aggstate->peragg[aggno]; - AggStatePerGroup pergroupstate = &pergroup[aggno]; ExprState *filter = peraggstate->aggrefstate->aggfilter; int numTransInputs = peraggstate->numTransInputs; int i; @@ -578,13 +780,16 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) continue; } - /* OK, put the tuple into the tuplesort object */ - if (peraggstate->numInputs == 1) - tuplesort_putdatum(peraggstate->sortstate, - slot->tts_values[0], - slot->tts_isnull[0]); - else - tuplesort_puttupleslot(peraggstate->sortstate, slot); + for (setno = 0; setno < numGroupingSets; setno++) + { + /* OK, put the tuple into the tuplesort object */ + if (peraggstate->numInputs == 1) + tuplesort_putdatum(peraggstate->sortstates[setno], + slot->tts_values[0], + slot->tts_isnull[0]); + else + tuplesort_puttupleslot(peraggstate->sortstates[setno], slot); + } } else { @@ -600,7 +805,14 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) fcinfo->argnull[i + 1] = slot->tts_isnull[i]; } - advance_transition_function(aggstate, peraggstate, pergroupstate); + for (setno = 0; setno < numGroupingSets; setno++) + { + AggStatePerGroup pergroupstate = &pergroup[aggno + (setno * numAggs)]; + + aggstate->current_set = setno; + + advance_transition_function(aggstate, peraggstate, pergroupstate); + } } } } @@ -623,6 +835,9 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) * is around 300% faster. (The speedup for by-reference types is less * but still noticeable.) * + * This function handles only one grouping set (already set in + * aggstate->current_set). + * * When called, CurrentMemoryContext should be the per-query context. */ static void @@ -642,7 +857,7 @@ process_ordered_aggregate_single(AggState *aggstate, Assert(peraggstate->numDistinctCols < 2); - tuplesort_performsort(peraggstate->sortstate); + tuplesort_performsort(peraggstate->sortstates[aggstate->current_set]); /* Load the column into argument 1 (arg 0 will be transition value) */ newVal = fcinfo->arg + 1; @@ -654,8 +869,8 @@ process_ordered_aggregate_single(AggState *aggstate, * pfree them when they are no longer needed. */ - while (tuplesort_getdatum(peraggstate->sortstate, true, - newVal, isNull)) + while (tuplesort_getdatum(peraggstate->sortstates[aggstate->current_set], + true, newVal, isNull)) { /* * Clear and select the working context for evaluation of the equality @@ -698,8 +913,8 @@ process_ordered_aggregate_single(AggState *aggstate, if (!oldIsNull && !peraggstate->inputtypeByVal) pfree(DatumGetPointer(oldVal)); - tuplesort_end(peraggstate->sortstate); - peraggstate->sortstate = NULL; + tuplesort_end(peraggstate->sortstates[aggstate->current_set]); + peraggstate->sortstates[aggstate->current_set] = NULL; } /* @@ -709,6 +924,9 @@ process_ordered_aggregate_single(AggState *aggstate, * sort, read out the values in sorted order, and run the transition * function on each value (applying DISTINCT if appropriate). * + * This function handles only one grouping set (already set in + * aggstate->current_set). + * * When called, CurrentMemoryContext should be the per-query context. */ static void @@ -725,13 +943,14 @@ process_ordered_aggregate_multi(AggState *aggstate, bool haveOldValue = false; int i; - tuplesort_performsort(peraggstate->sortstate); + tuplesort_performsort(peraggstate->sortstates[aggstate->current_set]); ExecClearTuple(slot1); if (slot2) ExecClearTuple(slot2); - while (tuplesort_gettupleslot(peraggstate->sortstate, true, slot1)) + while (tuplesort_gettupleslot(peraggstate->sortstates[aggstate->current_set], + true, slot1)) { /* * Extract the first numTransInputs columns as datums to pass to the @@ -779,13 +998,16 @@ process_ordered_aggregate_multi(AggState *aggstate, if (slot2) ExecClearTuple(slot2); - tuplesort_end(peraggstate->sortstate); - peraggstate->sortstate = NULL; + tuplesort_end(peraggstate->sortstates[aggstate->current_set]); + peraggstate->sortstates[aggstate->current_set] = NULL; } /* * Compute the final value of one aggregate. * + * This function handles only one grouping set (already set in + * aggstate->current_set). + * * The finalfunction will be run, and the result delivered, in the * output-tuple context; caller's CurrentMemoryContext does not matter. */ @@ -832,7 +1054,7 @@ finalize_aggregate(AggState *aggstate, /* set up aggstate->curperagg for AggGetAggref() */ aggstate->curperagg = peraggstate; - InitFunctionCallInfoData(fcinfo, &(peraggstate->finalfn), + InitFunctionCallInfoData(fcinfo, &peraggstate->finalfn, numFinalArgs, peraggstate->aggCollation, (void *) aggstate, NULL); @@ -882,6 +1104,154 @@ finalize_aggregate(AggState *aggstate, MemoryContextSwitchTo(oldContext); } + +/* + * Prepare to finalize and project based on the specified representative tuple + * slot and grouping set. + * + * In the specified tuple slot, force to null all attributes that should be + * read as null in the context of the current grouping set. Also stash the + * current group bitmap where GroupingExpr can get at it. + * + * This relies on three conditions: + * + * 1) Nothing is ever going to try and extract the whole tuple from this slot, + * only reference it in evaluations, which will only access individual + * attributes. + * + * 2) No system columns are going to need to be nulled. (If a system column is + * referenced in a group clause, it is actually projected in the outer plan + * tlist.) + * + * 3) Within a given phase, we never need to recover the value of an attribute + * once it has been set to null. + * + * Poking into the slot this way is a bit ugly, but the consensus is that the + * alternative was worse. + */ +static void +prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet) +{ + if (aggstate->phase->grouped_cols) + { + Bitmapset *grouped_cols = aggstate->phase->grouped_cols[currentSet]; + + aggstate->grouped_cols = grouped_cols; + + if (slot->tts_isempty) + { + /* + * Force all values to be NULL if working on an empty input tuple + * (i.e. an empty grouping set for which no input rows were + * supplied). + */ + ExecStoreAllNullTuple(slot); + } + else if (aggstate->all_grouped_cols) + { + ListCell *lc; + + /* all_grouped_cols is arranged in desc order */ + slot_getsomeattrs(slot, linitial_int(aggstate->all_grouped_cols)); + + foreach(lc, aggstate->all_grouped_cols) + { + int attnum = lfirst_int(lc); + + if (!bms_is_member(attnum, grouped_cols)) + slot->tts_isnull[attnum - 1] = true; + } + } + } +} + +/* + * Compute the final value of all aggregates for one group. + * + * This function handles only one grouping set at a time. + * + * Results are stored in the output econtext aggvalues/aggnulls. + */ +static void +finalize_aggregates(AggState *aggstate, + AggStatePerAgg peragg, + AggStatePerGroup pergroup, + int currentSet) +{ + ExprContext *econtext = aggstate->ss.ps.ps_ExprContext; + Datum *aggvalues = econtext->ecxt_aggvalues; + bool *aggnulls = econtext->ecxt_aggnulls; + int aggno; + + Assert(currentSet == 0 || + ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); + + aggstate->current_set = currentSet; + + for (aggno = 0; aggno < aggstate->numaggs; aggno++) + { + AggStatePerAgg peraggstate = &peragg[aggno]; + AggStatePerGroup pergroupstate; + + pergroupstate = &pergroup[aggno + (currentSet * (aggstate->numaggs))]; + + if (peraggstate->numSortCols > 0) + { + Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); + + if (peraggstate->numInputs == 1) + process_ordered_aggregate_single(aggstate, + peraggstate, + pergroupstate); + else + process_ordered_aggregate_multi(aggstate, + peraggstate, + pergroupstate); + } + + finalize_aggregate(aggstate, peraggstate, pergroupstate, + &aggvalues[aggno], &aggnulls[aggno]); + } +} + +/* + * Project the result of a group (whose aggs have already been calculated by + * finalize_aggregates). Returns the result slot, or NULL if no row is + * projected (suppressed by qual or by an empty SRF). + */ +static TupleTableSlot * +project_aggregates(AggState *aggstate) +{ + ExprContext *econtext = aggstate->ss.ps.ps_ExprContext; + + /* + * Check the qual (HAVING clause); if the group does not match, ignore + * it. + */ + if (ExecQual(aggstate->ss.ps.qual, econtext, false)) + { + /* + * Form and return or store a projection tuple using the aggregate + * results and the representative input tuple. + */ + ExprDoneCond isDone; + TupleTableSlot *result; + + result = ExecProject(aggstate->ss.ps.ps_ProjInfo, &isDone); + + if (isDone != ExprEndResult) + { + aggstate->ss.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } + } + else + InstrCountFiltered1(aggstate, 1); + + return NULL; +} + /* * find_unaggregated_cols * Construct a bitmapset of the column numbers of un-aggregated Vars @@ -916,8 +1286,11 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) *colnos = bms_add_member(*colnos, var->varattno); return false; } - if (IsA(node, Aggref)) /* do not descend into aggregate exprs */ + if (IsA(node, Aggref) || IsA(node, GroupingFunc)) + { + /* do not descend into aggregate exprs */ return false; + } return expression_tree_walker(node, find_unaggregated_cols_walker, (void *) colnos); } @@ -942,11 +1315,11 @@ build_hash_table(AggState *aggstate) aggstate->hashtable = BuildTupleHashTable(node->numCols, node->grpColIdx, - aggstate->eqfunctions, + aggstate->phase->eqfunctions, aggstate->hashfunctions, node->numGroups, entrysize, - aggstate->aggcontext, + aggstate->aggcontexts[0]->ecxt_per_tuple_memory, tmpmem); } @@ -1057,7 +1430,7 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) if (isnew) { /* initialize aggregates for new tuple group */ - initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup); + initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup, 0); } return entry; @@ -1079,6 +1452,8 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) TupleTableSlot * ExecAgg(AggState *node) { + TupleTableSlot *result; + /* * Check to see if we're still projecting out tuples from a previous agg * tuple (because there is a function-returning-set in the projection @@ -1086,7 +1461,6 @@ ExecAgg(AggState *node) */ if (node->ss.ps.ps_TupFromTlist) { - TupleTableSlot *result; ExprDoneCond isDone; result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); @@ -1097,22 +1471,30 @@ ExecAgg(AggState *node) } /* - * Exit if nothing left to do. (We must do the ps_TupFromTlist check - * first, because in some cases agg_done gets set before we emit the final - * aggregate tuple, and we have to finish running SRFs for it.) + * (We must do the ps_TupFromTlist check first, because in some cases + * agg_done gets set before we emit the final aggregate tuple, and we have + * to finish running SRFs for it.) */ - if (node->agg_done) - return NULL; - - /* Dispatch based on strategy */ - if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + if (!node->agg_done) { - if (!node->table_filled) - agg_fill_hash_table(node); - return agg_retrieve_hash_table(node); + /* Dispatch based on strategy */ + switch (node->phase->aggnode->aggstrategy) + { + case AGG_HASHED: + if (!node->table_filled) + agg_fill_hash_table(node); + result = agg_retrieve_hash_table(node); + break; + default: + result = agg_retrieve_direct(node); + break; + } + + if (!TupIsNull(result)) + return result; } - else - return agg_retrieve_direct(node); + + return NULL; } /* @@ -1121,28 +1503,30 @@ ExecAgg(AggState *node) static TupleTableSlot * agg_retrieve_direct(AggState *aggstate) { - Agg *node = (Agg *) aggstate->ss.ps.plan; - PlanState *outerPlan; + Agg *node = aggstate->phase->aggnode; ExprContext *econtext; ExprContext *tmpcontext; - Datum *aggvalues; - bool *aggnulls; AggStatePerAgg peragg; AggStatePerGroup pergroup; TupleTableSlot *outerslot; TupleTableSlot *firstSlot; - int aggno; + TupleTableSlot *result; + bool hasGroupingSets = aggstate->phase->numsets > 0; + int numGroupingSets = Max(aggstate->phase->numsets, 1); + int currentSet; + int nextSetSize; + int numReset; + int i; /* * get state info from node + * + * econtext is the per-output-tuple expression context + * tmpcontext is the per-input-tuple expression context */ - outerPlan = outerPlanState(aggstate); - /* econtext is the per-output-tuple expression context */ econtext = aggstate->ss.ps.ps_ExprContext; - aggvalues = econtext->ecxt_aggvalues; - aggnulls = econtext->ecxt_aggnulls; - /* tmpcontext is the per-input-tuple expression context */ tmpcontext = aggstate->tmpcontext; + peragg = aggstate->peragg; pergroup = aggstate->pergroup; firstSlot = aggstate->ss.ss_ScanTupleSlot; @@ -1150,172 +1534,281 @@ agg_retrieve_direct(AggState *aggstate) /* * We loop retrieving groups until we find one matching * aggstate->ss.ps.qual + * + * For grouping sets, we have the invariant that aggstate->projected_set + * is either -1 (initial call) or the index (starting from 0) in + * gset_lengths for the group we just completed (either by projecting a + * row or by discarding it in the qual). */ while (!aggstate->agg_done) { /* - * If we don't already have the first tuple of the new group, fetch it - * from the outer plan. + * Clear the per-output-tuple context for each group, as well as + * aggcontext (which contains any pass-by-ref transvalues of the old + * group). Some aggregate functions store working state in child + * contexts; those now get reset automatically without us needing to + * do anything special. + * + * We use ReScanExprContext not just ResetExprContext because we want + * any registered shutdown callbacks to be called. That allows + * aggregate functions to ensure they've cleaned up any non-memory + * resources. + */ + ReScanExprContext(econtext); + + /* + * Determine how many grouping sets need to be reset at this boundary. */ - if (aggstate->grp_firstTuple == NULL) + if (aggstate->projected_set >= 0 && + aggstate->projected_set < numGroupingSets) + numReset = aggstate->projected_set + 1; + else + numReset = numGroupingSets; + + /* + * numReset can change on a phase boundary, but that's OK; we want to + * reset the contexts used in _this_ phase, and later, after possibly + * changing phase, initialize the right number of aggregates for the + * _new_ phase. + */ + + for (i = 0; i < numReset; i++) + { + ReScanExprContext(aggstate->aggcontexts[i]); + } + + /* + * Check if input is complete and there are no more groups to project + * in this phase; move to next phase or mark as done. + */ + if (aggstate->input_done == true && + aggstate->projected_set >= (numGroupingSets - 1)) { - outerslot = ExecProcNode(outerPlan); - if (!TupIsNull(outerslot)) + if (aggstate->current_phase < aggstate->numphases - 1) { - /* - * Make a copy of the first input tuple; we will use this for - * comparisons (in group mode) and for projection. - */ - aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot); + initialize_phase(aggstate, aggstate->current_phase + 1); + aggstate->input_done = false; + aggstate->projected_set = -1; + numGroupingSets = Max(aggstate->phase->numsets, 1); + node = aggstate->phase->aggnode; + numReset = numGroupingSets; } else { - /* outer plan produced no tuples at all */ aggstate->agg_done = true; - /* If we are grouping, we should produce no tuples too */ - if (node->aggstrategy != AGG_PLAIN) - return NULL; + break; } } /* - * Clear the per-output-tuple context for each group, as well as - * aggcontext (which contains any pass-by-ref transvalues of the old - * group). We also clear any child contexts of the aggcontext; some - * aggregate functions store working state in such contexts. - * - * We use ReScanExprContext not just ResetExprContext because we want - * any registered shutdown callbacks to be called. That allows - * aggregate functions to ensure they've cleaned up any non-memory - * resources. + * Get the number of columns in the next grouping set after the last + * projected one (if any). This is the number of columns to compare to + * see if we reached the boundary of that set too. */ - ReScanExprContext(econtext); - - MemoryContextResetAndDeleteChildren(aggstate->aggcontext); + if (aggstate->projected_set >= 0 && + aggstate->projected_set < (numGroupingSets - 1)) + nextSetSize = aggstate->phase->gset_lengths[aggstate->projected_set + 1]; + else + nextSetSize = 0; - /* - * Initialize working state for a new input tuple group + /*- + * If a subgroup for the current grouping set is present, project it. + * + * We have a new group if: + * - we're out of input but haven't projected all grouping sets + * (checked above) + * OR + * - we already projected a row that wasn't from the last grouping + * set + * AND + * - the next grouping set has at least one grouping column (since + * empty grouping sets project only once input is exhausted) + * AND + * - the previous and pending rows differ on the grouping columns + * of the next grouping set */ - initialize_aggregates(aggstate, peragg, pergroup); + if (aggstate->input_done || + (node->aggstrategy == AGG_SORTED && + aggstate->projected_set != -1 && + aggstate->projected_set < (numGroupingSets - 1) && + nextSetSize > 0 && + !execTuplesMatch(econtext->ecxt_outertuple, + tmpcontext->ecxt_outertuple, + nextSetSize, + node->grpColIdx, + aggstate->phase->eqfunctions, + tmpcontext->ecxt_per_tuple_memory))) + { + aggstate->projected_set += 1; - if (aggstate->grp_firstTuple != NULL) + Assert(aggstate->projected_set < numGroupingSets); + Assert(nextSetSize > 0 || aggstate->input_done); + } + else { /* - * Store the copied first input tuple in the tuple table slot - * reserved for it. The tuple will be deleted when it is cleared - * from the slot. + * We no longer care what group we just projected, the next + * projection will always be the first (or only) grouping set + * (unless the input proves to be empty). */ - ExecStoreTuple(aggstate->grp_firstTuple, - firstSlot, - InvalidBuffer, - true); - aggstate->grp_firstTuple = NULL; /* don't keep two pointers */ - - /* set up for first advance_aggregates call */ - tmpcontext->ecxt_outertuple = firstSlot; + aggstate->projected_set = 0; /* - * Process each outer-plan tuple, and then fetch the next one, - * until we exhaust the outer plan or cross a group boundary. + * If we don't already have the first tuple of the new group, + * fetch it from the outer plan. */ - for (;;) + if (aggstate->grp_firstTuple == NULL) { - advance_aggregates(aggstate, pergroup); - - /* Reset per-input-tuple context after each tuple */ - ResetExprContext(tmpcontext); - - outerslot = ExecProcNode(outerPlan); - if (TupIsNull(outerslot)) + outerslot = fetch_input_tuple(aggstate); + if (!TupIsNull(outerslot)) { - /* no more outer-plan tuples available */ - aggstate->agg_done = true; - break; + /* + * Make a copy of the first input tuple; we will use this + * for comparisons (in group mode) and for projection. + */ + aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot); } - /* set up for next advance_aggregates call */ - tmpcontext->ecxt_outertuple = outerslot; - - /* - * If we are grouping, check whether we've crossed a group - * boundary. - */ - if (node->aggstrategy == AGG_SORTED) + else { - if (!execTuplesMatch(firstSlot, - outerslot, - node->numCols, node->grpColIdx, - aggstate->eqfunctions, - tmpcontext->ecxt_per_tuple_memory)) + /* outer plan produced no tuples at all */ + if (hasGroupingSets) { /* - * Save the first input tuple of the next group. + * If there was no input at all, we need to project + * rows only if there are grouping sets of size 0. + * Note that this implies that there can't be any + * references to ungrouped Vars, which would otherwise + * cause issues with the empty output slot. + * + * XXX: This is no longer true, we currently deal with + * this in finalize_aggregates(). */ - aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot); - break; + aggstate->input_done = true; + + while (aggstate->phase->gset_lengths[aggstate->projected_set] > 0) + { + aggstate->projected_set += 1; + if (aggstate->projected_set >= numGroupingSets) + { + /* + * We can't set agg_done here because we might + * have more phases to do, even though the + * input is empty. So we need to restart the + * whole outer loop. + */ + break; + } + } + + if (aggstate->projected_set >= numGroupingSets) + continue; + } + else + { + aggstate->agg_done = true; + /* If we are grouping, we should produce no tuples too */ + if (node->aggstrategy != AGG_PLAIN) + return NULL; } } } - } - /* - * Use the representative input tuple for any references to - * non-aggregated input columns in aggregate direct args, the node - * qual, and the tlist. (If we are not grouping, and there are no - * input rows at all, we will come here with an empty firstSlot ... - * but if not grouping, there can't be any references to - * non-aggregated input columns, so no problem.) - */ - econtext->ecxt_outertuple = firstSlot; - - /* - * Done scanning input tuple group. Finalize each aggregate - * calculation, and stash results in the per-output-tuple context. - */ - for (aggno = 0; aggno < aggstate->numaggs; aggno++) - { - AggStatePerAgg peraggstate = &peragg[aggno]; - AggStatePerGroup pergroupstate = &pergroup[aggno]; + /* + * Initialize working state for a new input tuple group. + */ + initialize_aggregates(aggstate, peragg, pergroup, numReset); - if (peraggstate->numSortCols > 0) + if (aggstate->grp_firstTuple != NULL) { - if (peraggstate->numInputs == 1) - process_ordered_aggregate_single(aggstate, - peraggstate, - pergroupstate); - else - process_ordered_aggregate_multi(aggstate, - peraggstate, - pergroupstate); - } + /* + * Store the copied first input tuple in the tuple table slot + * reserved for it. The tuple will be deleted when it is + * cleared from the slot. + */ + ExecStoreTuple(aggstate->grp_firstTuple, + firstSlot, + InvalidBuffer, + true); + aggstate->grp_firstTuple = NULL; /* don't keep two pointers */ - finalize_aggregate(aggstate, peraggstate, pergroupstate, - &aggvalues[aggno], &aggnulls[aggno]); - } + /* set up for first advance_aggregates call */ + tmpcontext->ecxt_outertuple = firstSlot; - /* - * Check the qual (HAVING clause); if the group does not match, ignore - * it and loop back to try to process another group. - */ - if (ExecQual(aggstate->ss.ps.qual, econtext, false)) - { - /* - * Form and return a projection tuple using the aggregate results - * and the representative input tuple. - */ - TupleTableSlot *result; - ExprDoneCond isDone; + /* + * Process each outer-plan tuple, and then fetch the next one, + * until we exhaust the outer plan or cross a group boundary. + */ + for (;;) + { + advance_aggregates(aggstate, pergroup); - result = ExecProject(aggstate->ss.ps.ps_ProjInfo, &isDone); + /* Reset per-input-tuple context after each tuple */ + ResetExprContext(tmpcontext); - if (isDone != ExprEndResult) - { - aggstate->ss.ps.ps_TupFromTlist = - (isDone == ExprMultipleResult); - return result; + outerslot = fetch_input_tuple(aggstate); + if (TupIsNull(outerslot)) + { + /* no more outer-plan tuples available */ + if (hasGroupingSets) + { + aggstate->input_done = true; + break; + } + else + { + aggstate->agg_done = true; + break; + } + } + /* set up for next advance_aggregates call */ + tmpcontext->ecxt_outertuple = outerslot; + + /* + * If we are grouping, check whether we've crossed a group + * boundary. + */ + if (node->aggstrategy == AGG_SORTED) + { + if (!execTuplesMatch(firstSlot, + outerslot, + node->numCols, + node->grpColIdx, + aggstate->phase->eqfunctions, + tmpcontext->ecxt_per_tuple_memory)) + { + aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot); + break; + } + } + } } + + /* + * Use the representative input tuple for any references to + * non-aggregated input columns in aggregate direct args, the node + * qual, and the tlist. (If we are not grouping, and there are no + * input rows at all, we will come here with an empty firstSlot ... + * but if not grouping, there can't be any references to + * non-aggregated input columns, so no problem.) + */ + econtext->ecxt_outertuple = firstSlot; } - else - InstrCountFiltered1(aggstate, 1); + + Assert(aggstate->projected_set >= 0); + + currentSet = aggstate->projected_set; + + prepare_projection_slot(aggstate, econtext->ecxt_outertuple, currentSet); + + finalize_aggregates(aggstate, peragg, pergroup, currentSet); + + /* + * If there's no row to project right now, we must continue rather than + * returning a null since there might be more groups. + */ + result = project_aggregates(aggstate); + if (result) + return result; } /* No more groups */ @@ -1328,16 +1821,15 @@ agg_retrieve_direct(AggState *aggstate) static void agg_fill_hash_table(AggState *aggstate) { - PlanState *outerPlan; ExprContext *tmpcontext; AggHashEntry entry; TupleTableSlot *outerslot; /* * get state info from node + * + * tmpcontext is the per-input-tuple expression context */ - outerPlan = outerPlanState(aggstate); - /* tmpcontext is the per-input-tuple expression context */ tmpcontext = aggstate->tmpcontext; /* @@ -1346,7 +1838,7 @@ agg_fill_hash_table(AggState *aggstate) */ for (;;) { - outerslot = ExecProcNode(outerPlan); + outerslot = fetch_input_tuple(aggstate); if (TupIsNull(outerslot)) break; /* set up for advance_aggregates call */ @@ -1374,21 +1866,17 @@ static TupleTableSlot * agg_retrieve_hash_table(AggState *aggstate) { ExprContext *econtext; - Datum *aggvalues; - bool *aggnulls; AggStatePerAgg peragg; AggStatePerGroup pergroup; AggHashEntry entry; TupleTableSlot *firstSlot; - int aggno; + TupleTableSlot *result; /* * get state info from node */ /* econtext is the per-output-tuple expression context */ econtext = aggstate->ss.ps.ps_ExprContext; - aggvalues = econtext->ecxt_aggvalues; - aggnulls = econtext->ecxt_aggnulls; peragg = aggstate->peragg; firstSlot = aggstate->ss.ss_ScanTupleSlot; @@ -1428,19 +1916,7 @@ agg_retrieve_hash_table(AggState *aggstate) pergroup = entry->pergroup; - /* - * Finalize each aggregate calculation, and stash results in the - * per-output-tuple context. - */ - for (aggno = 0; aggno < aggstate->numaggs; aggno++) - { - AggStatePerAgg peraggstate = &peragg[aggno]; - AggStatePerGroup pergroupstate = &pergroup[aggno]; - - Assert(peraggstate->numSortCols == 0); - finalize_aggregate(aggstate, peraggstate, pergroupstate, - &aggvalues[aggno], &aggnulls[aggno]); - } + finalize_aggregates(aggstate, peragg, pergroup, 0); /* * Use the representative input tuple for any references to @@ -1448,30 +1924,9 @@ agg_retrieve_hash_table(AggState *aggstate) */ econtext->ecxt_outertuple = firstSlot; - /* - * Check the qual (HAVING clause); if the group does not match, ignore - * it and loop back to try to process another group. - */ - if (ExecQual(aggstate->ss.ps.qual, econtext, false)) - { - /* - * Form and return a projection tuple using the aggregate results - * and the representative input tuple. - */ - TupleTableSlot *result; - ExprDoneCond isDone; - - result = ExecProject(aggstate->ss.ps.ps_ProjInfo, &isDone); - - if (isDone != ExprEndResult) - { - aggstate->ss.ps.ps_TupFromTlist = - (isDone == ExprMultipleResult); - return result; - } - } - else - InstrCountFiltered1(aggstate, 1); + result = project_aggregates(aggstate); + if (result) + return result; } /* No more groups */ @@ -1494,7 +1949,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ExprContext *econtext; int numaggs, aggno; + int phase; ListCell *l; + Bitmapset *all_grouped_cols = NULL; + int numGroupingSets = 1; + int numPhases; + int currentsortno = 0; + int i = 0; + int j = 0; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -1508,38 +1970,68 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->aggs = NIL; aggstate->numaggs = 0; - aggstate->eqfunctions = NULL; + aggstate->maxsets = 0; aggstate->hashfunctions = NULL; + aggstate->projected_set = -1; + aggstate->current_set = 0; aggstate->peragg = NULL; aggstate->curperagg = NULL; aggstate->agg_done = false; + aggstate->input_done = false; aggstate->pergroup = NULL; aggstate->grp_firstTuple = NULL; aggstate->hashtable = NULL; + aggstate->sort_in = NULL; + aggstate->sort_out = NULL; /* - * Create expression contexts. We need two, one for per-input-tuple - * processing and one for per-output-tuple processing. We cheat a little - * by using ExecAssignExprContext() to build both. + * Calculate the maximum number of grouping sets in any phase; this + * determines the size of some allocations. */ - ExecAssignExprContext(estate, &aggstate->ss.ps); - aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext; - ExecAssignExprContext(estate, &aggstate->ss.ps); + if (node->groupingSets) + { + Assert(node->aggstrategy != AGG_HASHED); + + numGroupingSets = list_length(node->groupingSets); + + foreach(l, node->chain) + { + Agg *agg = lfirst(l); + + numGroupingSets = Max(numGroupingSets, + list_length(agg->groupingSets)); + } + } + + aggstate->maxsets = numGroupingSets; + aggstate->numphases = numPhases = 1 + list_length(node->chain); + + aggstate->aggcontexts = (ExprContext **) + palloc0(sizeof(ExprContext *) * numGroupingSets); /* - * We also need a long-lived memory context for holding hashtable data - * structures and transition values. NOTE: the details of what is stored - * in aggcontext and what is stored in the regular per-query memory - * context are driven by a simple decision: we want to reset the - * aggcontext at group boundaries (if not hashing) and in ExecReScanAgg to - * recover no-longer-wanted space. + * Create expression contexts. We need three or more, one for + * per-input-tuple processing, one for per-output-tuple processing, and + * one for each grouping set. The per-tuple memory context of the + * per-grouping-set ExprContexts (aggcontexts) replaces the standalone + * memory context formerly used to hold transition values. We cheat a + * little by using ExecAssignExprContext() to build all of them. + * + * NOTE: the details of what is stored in aggcontexts and what is stored + * in the regular per-query memory context are driven by a simple + * decision: we want to reset the aggcontext at group boundaries (if not + * hashing) and in ExecReScanAgg to recover no-longer-wanted space. */ - aggstate->aggcontext = - AllocSetContextCreate(CurrentMemoryContext, - "AggContext", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + ExecAssignExprContext(estate, &aggstate->ss.ps); + aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext; + + for (i = 0; i < numGroupingSets; ++i) + { + ExecAssignExprContext(estate, &aggstate->ss.ps); + aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext; + } + + ExecAssignExprContext(estate, &aggstate->ss.ps); /* * tuple table initialization @@ -1547,6 +2039,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ExecInitScanTupleSlot(estate, &aggstate->ss); ExecInitResultTupleSlot(estate, &aggstate->ss.ps); aggstate->hashslot = ExecInitExtraTupleSlot(estate); + aggstate->sort_slot = ExecInitExtraTupleSlot(estate); /* * initialize child expressions @@ -1565,7 +2058,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) (PlanState *) aggstate); /* - * initialize child nodes + * Initialize child nodes. * * If we are doing a hashed aggregation then the child plan does not need * to handle REWIND efficiently; see ExecReScanAgg. @@ -1579,6 +2072,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) * initialize source tuple type. */ ExecAssignScanTypeFromOuterPlan(&aggstate->ss); + if (node->chain) + ExecSetSlotDescriptor(aggstate->sort_slot, + aggstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor); /* * Initialize result tuple type and projection info. @@ -1606,23 +2102,104 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) } /* - * If we are grouping, precompute fmgr lookup data for inner loop. We need - * both equality and hashing functions to do it by hashing, but only - * equality if not hashing. + * For each phase, prepare grouping set data and fmgr lookup data for + * compare functions. Accumulate all_grouped_cols in passing. */ - if (node->numCols > 0) + + aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData)); + + for (phase = 0; phase < numPhases; ++phase) { - if (node->aggstrategy == AGG_HASHED) - execTuplesHashPrepare(node->numCols, - node->grpOperators, - &aggstate->eqfunctions, - &aggstate->hashfunctions); + AggStatePerPhase phasedata = &aggstate->phases[phase]; + Agg *aggnode; + Sort *sortnode; + int num_sets; + + if (phase > 0) + { + aggnode = list_nth(node->chain, phase-1); + sortnode = (Sort *) aggnode->plan.lefttree; + Assert(IsA(sortnode, Sort)); + } + else + { + aggnode = node; + sortnode = NULL; + } + + phasedata->numsets = num_sets = list_length(aggnode->groupingSets); + + if (num_sets) + { + phasedata->gset_lengths = palloc(num_sets * sizeof(int)); + phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *)); + + i = 0; + foreach(l, aggnode->groupingSets) + { + int current_length = list_length(lfirst(l)); + Bitmapset *cols = NULL; + + /* planner forces this to be correct */ + for (j = 0; j < current_length; ++j) + cols = bms_add_member(cols, aggnode->grpColIdx[j]); + + phasedata->grouped_cols[i] = cols; + phasedata->gset_lengths[i] = current_length; + ++i; + } + + all_grouped_cols = bms_add_members(all_grouped_cols, + phasedata->grouped_cols[0]); + } else - aggstate->eqfunctions = - execTuplesMatchPrepare(node->numCols, - node->grpOperators); + { + Assert(phase == 0); + + phasedata->gset_lengths = NULL; + phasedata->grouped_cols = NULL; + } + + /* + * If we are grouping, precompute fmgr lookup data for inner loop. + */ + if (aggnode->aggstrategy == AGG_SORTED) + { + Assert(aggnode->numCols > 0); + + phasedata->eqfunctions = + execTuplesMatchPrepare(aggnode->numCols, + aggnode->grpOperators); + } + + phasedata->aggnode = aggnode; + phasedata->sortnode = sortnode; } + /* + * Convert all_grouped_cols to a descending-order list. + */ + i = -1; + while ((i = bms_next_member(all_grouped_cols, i)) >= 0) + aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols); + + /* + * Hashing can only appear in the initial phase. + */ + + if (node->aggstrategy == AGG_HASHED) + execTuplesHashPrepare(node->numCols, + node->grpOperators, + &aggstate->phases[0].eqfunctions, + &aggstate->hashfunctions); + + /* + * Initialize current phase-dependent values to initial phase + */ + + aggstate->current_phase = 0; + initialize_phase(aggstate, 0); + /* * Set up aggregate-result storage in the output expr context, and also * allocate my private per-agg working storage @@ -1645,7 +2222,10 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) { AggStatePerGroup pergroup; - pergroup = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData) * numaggs); + pergroup = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData) + * numaggs + * numGroupingSets); + aggstate->pergroup = pergroup; } @@ -1708,7 +2288,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) /* Begin filling in the peraggstate data */ peraggstate->aggrefstate = aggrefstate; peraggstate->aggref = aggref; - peraggstate->sortstate = NULL; + peraggstate->sortstates =(Tuplesortstate**) + palloc0(sizeof(Tuplesortstate*) * numGroupingSets); + + for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++) + peraggstate->sortstates[currentsortno] = NULL; /* Fetch the pg_aggregate row */ aggTuple = SearchSysCache1(AGGFNOID, @@ -2016,31 +2600,41 @@ ExecEndAgg(AggState *node) { PlanState *outerPlan; int aggno; + int numGroupingSets = Max(node->maxsets, 1); + int setno; /* Make sure we have closed any open tuplesorts */ + + if (node->sort_in) + tuplesort_end(node->sort_in); + if (node->sort_out) + tuplesort_end(node->sort_out); + for (aggno = 0; aggno < node->numaggs; aggno++) { AggStatePerAgg peraggstate = &node->peragg[aggno]; - if (peraggstate->sortstate) - tuplesort_end(peraggstate->sortstate); + for (setno = 0; setno < numGroupingSets; setno++) + { + if (peraggstate->sortstates[setno]) + tuplesort_end(peraggstate->sortstates[setno]); + } } /* And ensure any agg shutdown callbacks have been called */ - ReScanExprContext(node->ss.ps.ps_ExprContext); + for (setno = 0; setno < numGroupingSets; setno++) + ReScanExprContext(node->aggcontexts[setno]); /* - * Free both the expr contexts. + * We don't actually free any ExprContexts here (see comment in + * ExecFreeExprContext), just unlinking the output one from the plan node + * suffices. */ ExecFreeExprContext(&node->ss.ps); - node->ss.ps.ps_ExprContext = node->tmpcontext; - ExecFreeExprContext(&node->ss.ps); /* clean up tuple table */ ExecClearTuple(node->ss.ss_ScanTupleSlot); - MemoryContextDelete(node->aggcontext); - outerPlan = outerPlanState(node); ExecEndNode(outerPlan); } @@ -2050,13 +2644,16 @@ ExecReScanAgg(AggState *node) { ExprContext *econtext = node->ss.ps.ps_ExprContext; PlanState *outerPlan = outerPlanState(node); + Agg *aggnode = (Agg *) node->ss.ps.plan; int aggno; + int numGroupingSets = Max(node->maxsets, 1); + int setno; node->agg_done = false; node->ss.ps.ps_TupFromTlist = false; - if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + if (aggnode->aggstrategy == AGG_HASHED) { /* * In the hashed case, if we haven't yet built the hash table then we @@ -2082,14 +2679,34 @@ ExecReScanAgg(AggState *node) /* Make sure we have closed any open tuplesorts */ for (aggno = 0; aggno < node->numaggs; aggno++) { - AggStatePerAgg peraggstate = &node->peragg[aggno]; + for (setno = 0; setno < numGroupingSets; setno++) + { + AggStatePerAgg peraggstate = &node->peragg[aggno]; - if (peraggstate->sortstate) - tuplesort_end(peraggstate->sortstate); - peraggstate->sortstate = NULL; + if (peraggstate->sortstates[setno]) + { + tuplesort_end(peraggstate->sortstates[setno]); + peraggstate->sortstates[setno] = NULL; + } + } } - /* We don't need to ReScanExprContext here; ExecReScan already did it */ + /* + * We don't need to ReScanExprContext the output tuple context here; + * ExecReScan already did it. But we do need to reset our per-grouping-set + * contexts, which may have transvalues stored in them. (We use rescan + * rather than just reset because transfns may have registered callbacks + * that need to be run now.) + * + * Note that with AGG_HASHED, the hash table is allocated in a sub-context + * of the aggcontext. This used to be an issue, but now, resetting a + * context automatically deletes sub-contexts too. + */ + + for (setno = 0; setno < numGroupingSets; setno++) + { + ReScanExprContext(node->aggcontexts[setno]); + } /* Release first tuple of group, if we have made a copy */ if (node->grp_firstTuple != NULL) @@ -2097,21 +2714,13 @@ ExecReScanAgg(AggState *node) heap_freetuple(node->grp_firstTuple); node->grp_firstTuple = NULL; } + ExecClearTuple(node->ss.ss_ScanTupleSlot); /* Forget current agg values */ MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs); MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs); - /* - * Release all temp storage. Note that with AGG_HASHED, the hash table is - * allocated in a sub-context of the aggcontext. We're going to rebuild - * the hash table from scratch, so we need to use - * MemoryContextResetAndDeleteChildren() to avoid leaking the old hash - * table's memory context header. - */ - MemoryContextResetAndDeleteChildren(node->aggcontext); - - if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + if (aggnode->aggstrategy == AGG_HASHED) { /* Rebuild an empty hash table */ build_hash_table(node); @@ -2123,13 +2732,15 @@ ExecReScanAgg(AggState *node) * Reset the per-group state (in particular, mark transvalues null) */ MemSet(node->pergroup, 0, - sizeof(AggStatePerGroupData) * node->numaggs); + sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets); + + /* reset to phase 0 */ + initialize_phase(node, 0); + + node->input_done = false; + node->projected_set = -1; } - /* - * if chgParam of subnode is not null then plan will be re-scanned by - * first ExecProcNode. - */ if (outerPlan->chgParam == NULL) ExecReScan(outerPlan); } @@ -2151,8 +2762,11 @@ ExecReScanAgg(AggState *node) * values could conceivably appear in future.) * * If aggcontext isn't NULL, the function also stores at *aggcontext the - * identity of the memory context that aggregate transition values are - * being stored in. + * identity of the memory context that aggregate transition values are being + * stored in. Note that the same aggregate call site (flinfo) may be called + * interleaved on different transition values in different contexts, so it's + * not kosher to cache aggcontext under fn_extra. It is, however, kosher to + * cache it in the transvalue itself (for internal-type transvalues). */ int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext) @@ -2160,7 +2774,11 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext) if (fcinfo->context && IsA(fcinfo->context, AggState)) { if (aggcontext) - *aggcontext = ((AggState *) fcinfo->context)->aggcontext; + { + AggState *aggstate = ((AggState *) fcinfo->context); + ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set]; + *aggcontext = cxt->ecxt_per_tuple_memory; + } return AGG_CONTEXT_AGGREGATE; } if (fcinfo->context && IsA(fcinfo->context, WindowAggState)) @@ -2244,8 +2862,9 @@ AggRegisterCallback(FunctionCallInfo fcinfo, if (fcinfo->context && IsA(fcinfo->context, AggState)) { AggState *aggstate = (AggState *) fcinfo->context; + ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set]; - RegisterExprContextCallback(aggstate->ss.ps.ps_ExprContext, func, arg); + RegisterExprContextCallback(cxt, func, arg); return; } diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile index fe4781a8e88bd4790ebe4733ab38d444b1b60460..2d2ba84fe9d1202a5f0c51307ccaca8529b57245 100644 --- a/src/backend/lib/Makefile +++ b/src/backend/lib/Makefile @@ -12,6 +12,7 @@ subdir = src/backend/lib top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = ilist.o binaryheap.o hyperloglog.o pairingheap.o rbtree.o stringinfo.o +OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \ + rbtree.o stringinfo.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/lib/bipartite_match.c b/src/backend/lib/bipartite_match.c new file mode 100644 index 0000000000000000000000000000000000000000..57d6d548cff58599f39172facea8364b3cd0293f --- /dev/null +++ b/src/backend/lib/bipartite_match.c @@ -0,0 +1,161 @@ +/*------------------------------------------------------------------------- + * + * bipartite_match.c + * Hopcroft-Karp maximum cardinality algorithm for bipartite graphs + * + * This implementation is based on pseudocode found at: + * + * http://en.wikipedia.org/w/index.php?title=Hopcroft%E2%80%93Karp_algorithm&oldid=593898016 + * + * Copyright (c) 2015, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/lib/bipartite_match.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> +#include <limits.h> + +#include "lib/bipartite_match.h" +#include "miscadmin.h" +#include "utils/palloc.h" + +static bool hk_breadth_search(BipartiteMatchState *state); +static bool hk_depth_search(BipartiteMatchState *state, int u, int depth); + +/* + * Given the size of U and V, where each is indexed 1..size, and an adjacency + * list, perform the matching and return the resulting state. + */ +BipartiteMatchState * +BipartiteMatch(int u_size, int v_size, short **adjacency) +{ + BipartiteMatchState *state = palloc(sizeof(BipartiteMatchState)); + + Assert(u_size < SHRT_MAX); + Assert(v_size < SHRT_MAX); + + state->u_size = u_size; + state->v_size = v_size; + state->matching = 0; + state->adjacency = adjacency; + state->pair_uv = palloc0((u_size + 1) * sizeof(short)); + state->pair_vu = palloc0((v_size + 1) * sizeof(short)); + state->distance = palloc((u_size + 1) * sizeof(float)); + state->queue = palloc((u_size + 2) * sizeof(short)); + + while (hk_breadth_search(state)) + { + int u; + + for (u = 1; u <= u_size; ++u) + if (state->pair_uv[u] == 0) + if (hk_depth_search(state, u, 1)) + state->matching++; + + CHECK_FOR_INTERRUPTS(); /* just in case */ + } + + return state; +} + +/* + * Free a state returned by BipartiteMatch, except for the original adjacency + * list, which is owned by the caller. This only frees memory, so it's optional. + */ +void +BipartiteMatchFree(BipartiteMatchState *state) +{ + /* adjacency matrix is treated as owned by the caller */ + pfree(state->pair_uv); + pfree(state->pair_vu); + pfree(state->distance); + pfree(state->queue); + pfree(state); +} + +static bool +hk_breadth_search(BipartiteMatchState *state) +{ + int usize = state->u_size; + short *queue = state->queue; + float *distance = state->distance; + int qhead = 0; /* we never enqueue any node more than once */ + int qtail = 0; /* so don't have to worry about wrapping */ + int u; + + distance[0] = INFINITY; + + for (u = 1; u <= usize; ++u) + { + if (state->pair_uv[u] == 0) + { + distance[u] = 0; + queue[qhead++] = u; + } + else + distance[u] = INFINITY; + } + + while (qtail < qhead) + { + u = queue[qtail++]; + + if (distance[u] < distance[0]) + { + short *u_adj = state->adjacency[u]; + int i = u_adj ? u_adj[0] : 0; + + for (; i > 0; --i) + { + int u_next = state->pair_vu[u_adj[i]]; + + if (isinf(distance[u_next])) + { + distance[u_next] = 1 + distance[u]; + queue[qhead++] = u_next; + Assert(qhead <= usize+2); + } + } + } + } + + return !isinf(distance[0]); +} + +static bool +hk_depth_search(BipartiteMatchState *state, int u, int depth) +{ + float *distance = state->distance; + short *pair_uv = state->pair_uv; + short *pair_vu = state->pair_vu; + short *u_adj = state->adjacency[u]; + int i = u_adj ? u_adj[0] : 0; + + if (u == 0) + return true; + + if ((depth % 8) == 0) + check_stack_depth(); + + for (; i > 0; --i) + { + int v = u_adj[i]; + + if (distance[pair_vu[v]] == distance[u] + 1) + { + if (hk_depth_search(state, pair_vu[v], depth+1)) + { + pair_vu[v] = u; + pair_uv[u] = v; + return true; + } + } + } + + distance[u] = INFINITY; + return false; +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index bdc7e61935c0bbabe9ee13a81d3ec6571e2f98eb..fa7d2865c1ec42438b74cfe4bf4125ffd224f441 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -839,6 +839,8 @@ _copyAgg(const Agg *from) COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); + COPY_NODE_FIELD(groupingSets); + COPY_NODE_FIELD(chain); return newnode; } @@ -1208,6 +1210,23 @@ _copyAggref(const Aggref *from) return newnode; } +/* + * _copyGroupingFunc + */ +static GroupingFunc * +_copyGroupingFunc(const GroupingFunc *from) +{ + GroupingFunc *newnode = makeNode(GroupingFunc); + + COPY_NODE_FIELD(args); + COPY_NODE_FIELD(refs); + COPY_NODE_FIELD(cols); + COPY_SCALAR_FIELD(agglevelsup); + COPY_LOCATION_FIELD(location); + + return newnode; +} + /* * _copyWindowFunc */ @@ -2152,6 +2171,18 @@ _copySortGroupClause(const SortGroupClause *from) return newnode; } +static GroupingSet * +_copyGroupingSet(const GroupingSet *from) +{ + GroupingSet *newnode = makeNode(GroupingSet); + + COPY_SCALAR_FIELD(kind); + COPY_NODE_FIELD(content); + COPY_LOCATION_FIELD(location); + + return newnode; +} + static WindowClause * _copyWindowClause(const WindowClause *from) { @@ -2676,6 +2707,7 @@ _copyQuery(const Query *from) COPY_NODE_FIELD(onConflict); COPY_NODE_FIELD(returningList); COPY_NODE_FIELD(groupClause); + COPY_NODE_FIELD(groupingSets); COPY_NODE_FIELD(havingQual); COPY_NODE_FIELD(windowClause); COPY_NODE_FIELD(distinctClause); @@ -4309,6 +4341,9 @@ copyObject(const void *from) case T_Aggref: retval = _copyAggref(from); break; + case T_GroupingFunc: + retval = _copyGroupingFunc(from); + break; case T_WindowFunc: retval = _copyWindowFunc(from); break; @@ -4878,6 +4913,9 @@ copyObject(const void *from) case T_SortGroupClause: retval = _copySortGroupClause(from); break; + case T_GroupingSet: + retval = _copyGroupingSet(from); + break; case T_WindowClause: retval = _copyWindowClause(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index d483221fb7a2eff4064f571a42e5cad07a75dce3..d7928a99176609ae83c5da20f695492576721055 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -207,6 +207,21 @@ _equalAggref(const Aggref *a, const Aggref *b) return true; } +static bool +_equalGroupingFunc(const GroupingFunc *a, const GroupingFunc *b) +{ + COMPARE_NODE_FIELD(args); + + /* + * We must not compare the refs or cols field + */ + + COMPARE_SCALAR_FIELD(agglevelsup); + COMPARE_LOCATION_FIELD(location); + + return true; +} + static bool _equalWindowFunc(const WindowFunc *a, const WindowFunc *b) { @@ -896,6 +911,7 @@ _equalQuery(const Query *a, const Query *b) COMPARE_NODE_FIELD(onConflict); COMPARE_NODE_FIELD(returningList); COMPARE_NODE_FIELD(groupClause); + COMPARE_NODE_FIELD(groupingSets); COMPARE_NODE_FIELD(havingQual); COMPARE_NODE_FIELD(windowClause); COMPARE_NODE_FIELD(distinctClause); @@ -2426,6 +2442,16 @@ _equalSortGroupClause(const SortGroupClause *a, const SortGroupClause *b) return true; } +static bool +_equalGroupingSet(const GroupingSet *a, const GroupingSet *b) +{ + COMPARE_SCALAR_FIELD(kind); + COMPARE_NODE_FIELD(content); + COMPARE_LOCATION_FIELD(location); + + return true; +} + static bool _equalWindowClause(const WindowClause *a, const WindowClause *b) { @@ -2693,6 +2719,9 @@ equal(const void *a, const void *b) case T_Aggref: retval = _equalAggref(a, b); break; + case T_GroupingFunc: + retval = _equalGroupingFunc(a, b); + break; case T_WindowFunc: retval = _equalWindowFunc(a, b); break; @@ -3249,6 +3278,9 @@ equal(const void *a, const void *b) case T_SortGroupClause: retval = _equalSortGroupClause(a, b); break; + case T_GroupingSet: + retval = _equalGroupingSet(a, b); + break; case T_WindowClause: retval = _equalWindowClause(a, b); break; diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index 94cab476eb01f698924b995eceec34eb62c87c83..a6737514ef176cc1cecce235c4d177fde6249fbf 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -822,6 +822,32 @@ list_intersection(const List *list1, const List *list2) return result; } +/* + * As list_intersection but operates on lists of integers. + */ +List * +list_intersection_int(const List *list1, const List *list2) +{ + List *result; + const ListCell *cell; + + if (list1 == NIL || list2 == NIL) + return NIL; + + Assert(IsIntegerList(list1)); + Assert(IsIntegerList(list2)); + + result = NIL; + foreach(cell, list1) + { + if (list_member_int(list2, lfirst_int(cell))) + result = lappend_int(result, lfirst_int(cell)); + } + + check_list_invariants(result); + return result; +} + /* * Return a list that contains all the cells in list1 that are not in * list2. The returned list is freshly allocated via palloc(), but the diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index 6fdf44d57367e4b60bb7cb966bee72a6d89310f3..a9b58eb31fc7085533e7240343893660ba395b77 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -554,3 +554,18 @@ makeFuncCall(List *name, List *args, int location) n->location = location; return n; } + +/* + * makeGroupingSet + * + */ +GroupingSet * +makeGroupingSet(GroupingSetKind kind, List *content, int location) +{ + GroupingSet *n = makeNode(GroupingSet); + + n->kind = kind; + n->content = content; + n->location = location; + return n; +} diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 42d62d32d93ffb709b210a54b876647086e305ea..41763931339adc68f39b8a511e1b498911bef1ba 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -54,6 +54,9 @@ exprType(const Node *expr) case T_Aggref: type = ((const Aggref *) expr)->aggtype; break; + case T_GroupingFunc: + type = INT4OID; + break; case T_WindowFunc: type = ((const WindowFunc *) expr)->wintype; break; @@ -750,6 +753,9 @@ exprCollation(const Node *expr) case T_Aggref: coll = ((const Aggref *) expr)->aggcollid; break; + case T_GroupingFunc: + coll = InvalidOid; + break; case T_WindowFunc: coll = ((const WindowFunc *) expr)->wincollid; break; @@ -986,6 +992,9 @@ exprSetCollation(Node *expr, Oid collation) case T_Aggref: ((Aggref *) expr)->aggcollid = collation; break; + case T_GroupingFunc: + Assert(!OidIsValid(collation)); + break; case T_WindowFunc: ((WindowFunc *) expr)->wincollid = collation; break; @@ -1202,6 +1211,9 @@ exprLocation(const Node *expr) /* function name should always be the first thing */ loc = ((const Aggref *) expr)->location; break; + case T_GroupingFunc: + loc = ((const GroupingFunc *) expr)->location; + break; case T_WindowFunc: /* function name should always be the first thing */ loc = ((const WindowFunc *) expr)->location; @@ -1491,6 +1503,9 @@ exprLocation(const Node *expr) /* XMLSERIALIZE keyword should always be the first thing */ loc = ((const XmlSerialize *) expr)->location; break; + case T_GroupingSet: + loc = ((const GroupingSet *) expr)->location; + break; case T_WithClause: loc = ((const WithClause *) expr)->location; break; @@ -1685,6 +1700,15 @@ expression_tree_walker(Node *node, return true; } break; + case T_GroupingFunc: + { + GroupingFunc *grouping = (GroupingFunc *) node; + + if (expression_tree_walker((Node *) grouping->args, + walker, context)) + return true; + } + break; case T_WindowFunc: { WindowFunc *expr = (WindowFunc *) node; @@ -2243,6 +2267,29 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_GroupingFunc: + { + GroupingFunc *grouping = (GroupingFunc *) node; + GroupingFunc *newnode; + + FLATCOPY(newnode, grouping, GroupingFunc); + MUTATE(newnode->args, grouping->args, List *); + + /* + * We assume here that mutating the arguments does not change + * the semantics, i.e. that the arguments are not mutated in a + * way that makes them semantically different from their + * previously matching expressions in the GROUP BY clause. + * + * If a mutator somehow wanted to do this, it would have to + * handle the refs and cols lists itself as appropriate. + */ + newnode->refs = list_copy(grouping->refs); + newnode->cols = list_copy(grouping->cols); + + return (Node *) newnode; + } + break; case T_WindowFunc: { WindowFunc *wfunc = (WindowFunc *) node; @@ -2962,6 +3009,8 @@ raw_expression_tree_walker(Node *node, break; case T_RangeVar: return walker(((RangeVar *) node)->alias, context); + case T_GroupingFunc: + return walker(((GroupingFunc *) node)->args, context); case T_SubLink: { SubLink *sublink = (SubLink *) node; @@ -3287,6 +3336,8 @@ raw_expression_tree_walker(Node *node, /* for now, constraints are ignored */ } break; + case T_GroupingSet: + return walker(((GroupingSet *) node)->content, context); case T_LockingClause: return walker(((LockingClause *) node)->lockedRels, context); case T_XmlSerialize: diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 7918553da0a84dbc4c766b830ba3aedee853c7fa..66fee3ef61d228899766b23a2727d0781628a224 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -679,6 +679,9 @@ _outAgg(StringInfo str, const Agg *node) appendStringInfo(str, " %u", node->grpOperators[i]); WRITE_LONG_FIELD(numGroups); + + WRITE_NODE_FIELD(groupingSets); + WRITE_NODE_FIELD(chain); } static void @@ -1003,6 +1006,18 @@ _outAggref(StringInfo str, const Aggref *node) WRITE_LOCATION_FIELD(location); } +static void +_outGroupingFunc(StringInfo str, const GroupingFunc *node) +{ + WRITE_NODE_TYPE("GROUPINGFUNC"); + + WRITE_NODE_FIELD(args); + WRITE_NODE_FIELD(refs); + WRITE_NODE_FIELD(cols); + WRITE_INT_FIELD(agglevelsup); + WRITE_LOCATION_FIELD(location); +} + static void _outWindowFunc(StringInfo str, const WindowFunc *node) { @@ -2364,6 +2379,7 @@ _outQuery(StringInfo str, const Query *node) WRITE_NODE_FIELD(onConflict); WRITE_NODE_FIELD(returningList); WRITE_NODE_FIELD(groupClause); + WRITE_NODE_FIELD(groupingSets); WRITE_NODE_FIELD(havingQual); WRITE_NODE_FIELD(windowClause); WRITE_NODE_FIELD(distinctClause); @@ -2398,6 +2414,16 @@ _outSortGroupClause(StringInfo str, const SortGroupClause *node) WRITE_BOOL_FIELD(hashable); } +static void +_outGroupingSet(StringInfo str, const GroupingSet *node) +{ + WRITE_NODE_TYPE("GROUPINGSET"); + + WRITE_ENUM_FIELD(kind, GroupingSetKind); + WRITE_NODE_FIELD(content); + WRITE_LOCATION_FIELD(location); +} + static void _outWindowClause(StringInfo str, const WindowClause *node) { @@ -3087,6 +3113,9 @@ _outNode(StringInfo str, const void *obj) case T_Aggref: _outAggref(str, obj); break; + case T_GroupingFunc: + _outGroupingFunc(str, obj); + break; case T_WindowFunc: _outWindowFunc(str, obj); break; @@ -3349,6 +3378,9 @@ _outNode(StringInfo str, const void *obj) case T_SortGroupClause: _outSortGroupClause(str, obj); break; + case T_GroupingSet: + _outGroupingSet(str, obj); + break; case T_WindowClause: _outWindowClause(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index c8fb894a75abde909da414d27b732bd874f5a331..6fd9d46ee7963f72d9b49b3f9a857aa013e7a8db 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -217,6 +217,7 @@ _readQuery(void) READ_NODE_FIELD(onConflict); READ_NODE_FIELD(returningList); READ_NODE_FIELD(groupClause); + READ_NODE_FIELD(groupingSets); READ_NODE_FIELD(havingQual); READ_NODE_FIELD(windowClause); READ_NODE_FIELD(distinctClause); @@ -292,6 +293,21 @@ _readSortGroupClause(void) READ_DONE(); } +/* + * _readGroupingSet + */ +static GroupingSet * +_readGroupingSet(void) +{ + READ_LOCALS(GroupingSet); + + READ_ENUM_FIELD(kind, GroupingSetKind); + READ_NODE_FIELD(content); + READ_LOCATION_FIELD(location); + + READ_DONE(); +} + /* * _readWindowClause */ @@ -551,6 +567,23 @@ _readAggref(void) READ_DONE(); } +/* + * _readGroupingFunc + */ +static GroupingFunc * +_readGroupingFunc(void) +{ + READ_LOCALS(GroupingFunc); + + READ_NODE_FIELD(args); + READ_NODE_FIELD(refs); + READ_NODE_FIELD(cols); + READ_INT_FIELD(agglevelsup); + READ_LOCATION_FIELD(location); + + READ_DONE(); +} + /* * _readWindowFunc */ @@ -1386,6 +1419,8 @@ parseNodeString(void) return_value = _readWithCheckOption(); else if (MATCH("SORTGROUPCLAUSE", 15)) return_value = _readSortGroupClause(); + else if (MATCH("GROUPINGSET", 11)) + return_value = _readGroupingSet(); else if (MATCH("WINDOWCLAUSE", 12)) return_value = _readWindowClause(); else if (MATCH("ROWMARKCLAUSE", 13)) @@ -1412,6 +1447,8 @@ parseNodeString(void) return_value = _readParam(); else if (MATCH("AGGREF", 6)) return_value = _readAggref(); + else if (MATCH("GROUPINGFUNC", 12)) + return_value = _readGroupingFunc(); else if (MATCH("WINDOWFUNC", 10)) return_value = _readWindowFunc(); else if (MATCH("ARRAYREF", 8)) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 4cd1bf65e7482edebe317261aa86e5362e361829..1fd8763c966719c98d40f8810c0cbfb9808430b6 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1290,6 +1290,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, */ if (parse->hasAggs || parse->groupClause || + parse->groupingSets || parse->havingQual || parse->distinctClause || parse->sortClause || @@ -2150,7 +2151,7 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) * subquery uses grouping or aggregation, put it in HAVING (since the * qual really refers to the group-result rows). */ - if (subquery->hasAggs || subquery->groupClause || subquery->havingQual) + if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual) subquery->havingQual = make_and_qual(subquery->havingQual, qual); else subquery->jointree->quals = diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 26e6e1b6512e528a238b1570a298f1a0dae0b0fc..976ca029c551d271db33a482ffad8de6d5484c89 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1954,7 +1954,8 @@ adjust_rowcount_for_semijoins(PlannerInfo *root, nraw = approximate_joinrel_size(root, sjinfo->syn_righthand); nunique = estimate_num_groups(root, sjinfo->semi_rhs_exprs, - nraw); + nraw, + NULL); if (rowcount > nunique) rowcount = nunique; } diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 11d39336b1a66172545c5f99d0203c6df742fd43..a6c17534f0aa38e09e655c1e3c5ceb5012610082 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -581,6 +581,7 @@ query_supports_distinctness(Query *query) { if (query->distinctClause != NIL || query->groupClause != NIL || + query->groupingSets != NIL || query->hasAggs || query->havingQual || query->setOperations) @@ -649,10 +650,10 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) } /* - * Similarly, GROUP BY guarantees uniqueness if all the grouped columns - * appear in colnos and operator semantics match. + * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all + * the grouped columns appear in colnos and operator semantics match. */ - if (query->groupClause) + if (query->groupClause && !query->groupingSets) { foreach(l, query->groupClause) { @@ -668,6 +669,27 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) if (l == NULL) /* had matches for all? */ return true; } + else if (query->groupingSets) + { + /* + * If we have grouping sets with expressions, we probably + * don't have uniqueness and analysis would be hard. Punt. + */ + if (query->groupClause) + return false; + + /* + * If we have no groupClause (therefore no grouping expressions), + * we might have one or many empty grouping sets. If there's just + * one, then we're returning only one row and are certainly unique. + * But otherwise, we know we're certainly not unique. + */ + if (list_length(query->groupingSets) == 1 && + ((GroupingSet *)linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY) + return true; + else + return false; + } else { /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 0775a676d05c1cf5832a6aaca44fdc7c7212c8fa..6fde832227791f41dc4ec7035a5bd5d26be3057c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1042,6 +1042,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) numGroupCols, groupColIdx, groupOperators, + NIL, numGroups, subplan); } @@ -4492,6 +4493,7 @@ Agg * make_agg(PlannerInfo *root, List *tlist, List *qual, AggStrategy aggstrategy, const AggClauseCosts *aggcosts, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + List *groupingSets, long numGroups, Plan *lefttree) { @@ -4521,10 +4523,12 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, * group otherwise. */ if (aggstrategy == AGG_PLAIN) - plan->plan_rows = 1; + plan->plan_rows = groupingSets ? list_length(groupingSets) : 1; else plan->plan_rows = numGroups; + node->groupingSets = groupingSets; + /* * We also need to account for the cost of evaluation of the qual (ie, the * HAVING clause) and the tlist. Note that cost_qual_eval doesn't charge @@ -4545,6 +4549,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, plan->qual = qual; plan->targetlist = tlist; + plan->lefttree = lefttree; plan->righttree = NULL; diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index af772a2c99333a8a9bf4ecf079122c500f0ef36c..f0e9c05a4520355a0592e954edbab68b96ec53fa 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -96,7 +96,7 @@ preprocess_minmax_aggregates(PlannerInfo *root, List *tlist) * performs assorted processing related to these features between calling * preprocess_minmax_aggregates and optimize_minmax_aggregates.) */ - if (parse->groupClause || parse->hasWindowFuncs) + if (parse->groupClause || list_length(parse->groupingSets) > 1 || parse->hasWindowFuncs) return; /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9ba10516bb2221dada084575ff0781fa00dd75dc..d3f5a1401702a1991ef619646e16d99661269253 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -16,13 +16,16 @@ #include "postgres.h" #include <limits.h> +#include <math.h> #include "access/htup_details.h" #include "executor/executor.h" #include "executor/nodeAgg.h" #include "foreign/fdwapi.h" #include "miscadmin.h" +#include "lib/bipartite_match.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #ifdef OPTIMIZER_DEBUG #include "nodes/print.h" #endif @@ -38,6 +41,7 @@ #include "optimizer/tlist.h" #include "parser/analyze.h" #include "parser/parsetree.h" +#include "parser/parse_agg.h" #include "rewrite/rewriteManip.h" #include "utils/rel.h" #include "utils/selfuncs.h" @@ -67,6 +71,7 @@ typedef struct { List *tlist; /* preprocessed query targetlist */ List *activeWindows; /* active windows, if any */ + List *groupClause; /* overrides parse->groupClause */ } standard_qp_extra; /* Local functions */ @@ -79,7 +84,9 @@ static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); static bool limit_needed(Query *parse); -static void preprocess_groupclause(PlannerInfo *root); +static List *preprocess_groupclause(PlannerInfo *root, List *force); +static List *extract_rollup_sets(List *groupingSets); +static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, double limit_tuples, @@ -115,7 +122,16 @@ static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc, int *ordNumCols, AttrNumber **ordColIdx, Oid **ordOperators); - +static Plan *build_grouping_chain(PlannerInfo *root, + Query *parse, + List *tlist, + bool need_sort_for_grouping, + List *rollup_groupclauses, + List *rollup_lists, + AttrNumber *groupColIdx, + AggClauseCosts *agg_costs, + long numGroups, + Plan *result_plan); /***************************************************************************** * @@ -321,6 +337,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->append_rel_list = NIL; root->rowMarks = NIL; root->hasInheritedTarget = false; + root->grouping_map = NULL; root->hasRecursion = hasRecursion; if (hasRecursion) @@ -559,7 +576,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse, if (contain_agg_clause(havingclause) || contain_volatile_functions(havingclause) || - contain_subplans(havingclause)) + contain_subplans(havingclause) || + parse->groupingSets) { /* keep it in HAVING */ newHaving = lappend(newHaving, havingclause); @@ -1248,11 +1266,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) List *sub_tlist; AttrNumber *groupColIdx = NULL; bool need_tlist_eval = true; - standard_qp_extra qp_extra; - RelOptInfo *final_rel; - Path *cheapest_path; - Path *sorted_path; - Path *best_path; long numGroups = 0; AggClauseCosts agg_costs; int numGroupCols; @@ -1262,15 +1275,89 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) WindowFuncLists *wflists = NULL; List *activeWindows = NIL; OnConflictExpr *onconfl; + int maxref = 0; + int *tleref_to_colnum_map; + List *rollup_lists = NIL; + List *rollup_groupclauses = NIL; + standard_qp_extra qp_extra; + RelOptInfo *final_rel; + Path *cheapest_path; + Path *sorted_path; + Path *best_path; MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); /* A recursive query should always have setOperations */ Assert(!root->hasRecursion); - /* Preprocess GROUP BY clause, if any */ + /* Preprocess Grouping set, if any */ + if (parse->groupingSets) + parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); + if (parse->groupClause) - preprocess_groupclause(root); + { + ListCell *lc; + + foreach(lc, parse->groupClause) + { + SortGroupClause *gc = lfirst(lc); + if (gc->tleSortGroupRef > maxref) + maxref = gc->tleSortGroupRef; + } + } + + tleref_to_colnum_map = palloc((maxref + 1) * sizeof(int)); + + if (parse->groupingSets) + { + ListCell *lc; + ListCell *lc2; + ListCell *lc_set; + List *sets = extract_rollup_sets(parse->groupingSets); + + foreach(lc_set, sets) + { + List *current_sets = reorder_grouping_sets(lfirst(lc_set), + (list_length(sets) == 1 + ? parse->sortClause + : NIL)); + List *groupclause = preprocess_groupclause(root, linitial(current_sets)); + int ref = 0; + + /* + * Now that we've pinned down an order for the groupClause for + * this list of grouping sets, we need to remap the entries in + * the grouping sets from sortgrouprefs to plain indices + * (0-based) into the groupClause for this collection of + * grouping sets. + */ + + foreach(lc, groupclause) + { + SortGroupClause *gc = lfirst(lc); + tleref_to_colnum_map[gc->tleSortGroupRef] = ref++; + } + + foreach(lc, current_sets) + { + foreach(lc2, (List *) lfirst(lc)) + { + lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)]; + } + } + + rollup_lists = lcons(current_sets, rollup_lists); + rollup_groupclauses = lcons(groupclause, rollup_groupclauses); + } + } + else + { + /* Preprocess GROUP BY clause, if any */ + if (parse->groupClause) + parse->groupClause = preprocess_groupclause(root, NIL); + rollup_groupclauses = list_make1(parse->groupClause); + } + numGroupCols = list_length(parse->groupClause); /* Preprocess targetlist */ @@ -1350,6 +1437,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * grouping/aggregation operations. */ if (parse->groupClause || + parse->groupingSets || parse->distinctClause || parse->hasAggs || parse->hasWindowFuncs || @@ -1361,6 +1449,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Set up data needed by standard_qp_callback */ qp_extra.tlist = tlist; qp_extra.activeWindows = activeWindows; + qp_extra.groupClause = llast(rollup_groupclauses); /* * Generate the best unsorted and presorted paths for this Query (but @@ -1393,9 +1482,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) { List *groupExprs; - groupExprs = get_sortgrouplist_exprs(parse->groupClause, - parse->targetList); - dNumGroups = estimate_num_groups(root, groupExprs, path_rows); + if (parse->groupingSets) + { + ListCell *lc, + *lc2; + + dNumGroups = 0; + + forboth(lc, rollup_groupclauses, lc2, rollup_lists) + { + ListCell *lc3; + + groupExprs = get_sortgrouplist_exprs(lfirst(lc), + parse->targetList); + + foreach(lc3, lfirst(lc2)) + { + List *gset = lfirst(lc3); + + dNumGroups += estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + } + } + } + else + { + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + + dNumGroups = estimate_num_groups(root, groupExprs, path_rows, + NULL); + } /* * In GROUP BY mode, an absolute LIMIT is relative to the number @@ -1406,6 +1525,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) if (tuple_fraction >= 1.0) tuple_fraction /= dNumGroups; + /* + * If there's more than one grouping set, we'll have to sort the + * entire input. + */ + if (list_length(rollup_lists) > 1) + tuple_fraction = 0.0; + /* * If both GROUP BY and ORDER BY are specified, we will need two * levels of sort --- and, therefore, certainly need to read all @@ -1421,14 +1547,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) root->group_pathkeys)) tuple_fraction = 0.0; } - else if (parse->hasAggs || root->hasHavingQual) + else if (parse->hasAggs || root->hasHavingQual || parse->groupingSets) { /* * Ungrouped aggregate will certainly want to read all the tuples, - * and it will deliver a single result row (so leave dNumGroups - * set to 1). + * and it will deliver a single result row per grouping set (or 1 + * if no grouping sets were explicitly given, in which case leave + * dNumGroups as-is) */ tuple_fraction = 0.0; + if (parse->groupingSets) + dNumGroups = list_length(parse->groupingSets); } else if (parse->distinctClause) { @@ -1443,7 +1572,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, parse->targetList); - dNumGroups = estimate_num_groups(root, distinctExprs, path_rows); + dNumGroups = estimate_num_groups(root, distinctExprs, path_rows, NULL); /* * Adjust tuple_fraction the same way as for GROUP BY, too. @@ -1526,13 +1655,24 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) { /* * If grouping, decide whether to use sorted or hashed grouping. + * If grouping sets are present, we can currently do only sorted + * grouping. */ - use_hashed_grouping = - choose_hashed_grouping(root, - tuple_fraction, limit_tuples, - path_rows, path_width, - cheapest_path, sorted_path, - dNumGroups, &agg_costs); + + if (parse->groupingSets) + { + use_hashed_grouping = false; + } + else + { + use_hashed_grouping = + choose_hashed_grouping(root, + tuple_fraction, limit_tuples, + path_rows, path_width, + cheapest_path, sorted_path, + dNumGroups, &agg_costs); + } + /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); } @@ -1598,7 +1738,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Detect if we'll need an explicit sort for grouping */ if (parse->groupClause && !use_hashed_grouping && - !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) + !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { need_sort_for_grouping = true; @@ -1657,6 +1797,27 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) groupColIdx); } + /* + * groupColIdx is now cast in stone, so record a mapping from + * tleSortGroupRef to column index. setrefs.c needs this to + * finalize GROUPING() operations. + */ + + if (parse->groupingSets) + { + AttrNumber *grouping_map = palloc0(sizeof(AttrNumber) * (maxref + 1)); + ListCell *lc; + int i = 0; + + foreach(lc, parse->groupClause) + { + SortGroupClause *gc = lfirst(lc); + grouping_map[gc->tleSortGroupRef] = groupColIdx[i++]; + } + + root->grouping_map = grouping_map; + } + /* * Insert AGG or GROUP node if needed, plus an explicit sort step * if necessary. @@ -1673,52 +1834,43 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) &agg_costs, numGroupCols, groupColIdx, - extract_grouping_ops(parse->groupClause), + extract_grouping_ops(parse->groupClause), + NIL, numGroups, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } - else if (parse->hasAggs) + else if (parse->hasAggs || (parse->groupingSets && parse->groupClause)) { - /* Plain aggregate plan --- sort if needed */ - AggStrategy aggstrategy; - - if (parse->groupClause) - { - if (need_sort_for_grouping) - { - result_plan = (Plan *) - make_sort_from_groupcols(root, - parse->groupClause, - groupColIdx, - result_plan); - current_pathkeys = root->group_pathkeys; - } - aggstrategy = AGG_SORTED; - - /* - * The AGG node will not change the sort ordering of its - * groups, so current_pathkeys describes the result too. - */ - } + /* + * Output is in sorted order by group_pathkeys if, and only if, + * there is a single rollup operation on a non-empty list of + * grouping expressions. + */ + if (list_length(rollup_groupclauses) == 1 + && list_length(linitial(rollup_groupclauses)) > 0) + current_pathkeys = root->group_pathkeys; else - { - aggstrategy = AGG_PLAIN; - /* Result will be only one row anyway; no sort order */ current_pathkeys = NIL; - } - result_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - aggstrategy, - &agg_costs, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - numGroups, - result_plan); + result_plan = build_grouping_chain(root, + parse, + tlist, + need_sort_for_grouping, + rollup_groupclauses, + rollup_lists, + groupColIdx, + &agg_costs, + numGroups, + result_plan); + + /* + * these are destroyed by build_grouping_chain, so make sure we + * don't try and touch them again + */ + rollup_groupclauses = NIL; + rollup_lists = NIL; } else if (parse->groupClause) { @@ -1749,24 +1901,45 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) result_plan); /* The Group node won't change sort ordering */ } - else if (root->hasHavingQual) + else if (root->hasHavingQual || parse->groupingSets) { + int nrows = list_length(parse->groupingSets); + /* - * No aggregates, and no GROUP BY, but we have a HAVING qual. + * No aggregates, and no GROUP BY, but we have a HAVING qual or + * grouping sets (which by elimination of cases above must + * consist solely of empty grouping sets, since otherwise + * groupClause will be non-empty). + * * This is a degenerate case in which we are supposed to emit - * either 0 or 1 row depending on whether HAVING succeeds. - * Furthermore, there cannot be any variables in either HAVING - * or the targetlist, so we actually do not need the FROM - * table at all! We can just throw away the plan-so-far and - * generate a Result node. This is a sufficiently unusual - * corner case that it's not worth contorting the structure of - * this routine to avoid having to generate the plan in the - * first place. + * either 0 or 1 row for each grouping set depending on whether + * HAVING succeeds. Furthermore, there cannot be any variables + * in either HAVING or the targetlist, so we actually do not + * need the FROM table at all! We can just throw away the + * plan-so-far and generate a Result node. This is a + * sufficiently unusual corner case that it's not worth + * contorting the structure of this routine to avoid having to + * generate the plan in the first place. */ result_plan = (Plan *) make_result(root, tlist, parse->havingQual, NULL); + + /* + * Doesn't seem worthwhile writing code to cons up a + * generate_series or a values scan to emit multiple rows. + * Instead just clone the result in an Append. + */ + if (nrows > 1) + { + List *plans = list_make1(result_plan); + + while (--nrows > 0) + plans = lappend(plans, copyObject(result_plan)); + + result_plan = (Plan *) make_append(plans, tlist); + } } } /* end of non-minmax-aggregate case */ @@ -1932,7 +2105,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * result was already mostly unique). If not, use the number of * distinct-groups calculated previously. */ - if (parse->groupClause || root->hasHavingQual || parse->hasAggs) + if (parse->groupClause || parse->groupingSets || root->hasHavingQual || parse->hasAggs) dNumDistinctRows = result_plan->plan_rows; else dNumDistinctRows = dNumGroups; @@ -1973,6 +2146,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) extract_grouping_cols(parse->distinctClause, result_plan->targetlist), extract_grouping_ops(parse->distinctClause), + NIL, numDistinctRows, result_plan); /* Hashed aggregation produces randomly-ordered results */ @@ -2082,6 +2256,198 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) return result_plan; } + +/* + * Given a groupclause for a collection of grouping sets, produce the + * corresponding groupColIdx. + * + * root->grouping_map maps the tleSortGroupRef to the actual column position in + * the input tuple. So we get the ref from the entries in the groupclause and + * look them up there. + */ +static AttrNumber * +remap_groupColIdx(PlannerInfo *root, List *groupClause) +{ + AttrNumber *grouping_map = root->grouping_map; + AttrNumber *new_grpColIdx; + ListCell *lc; + int i; + + Assert(grouping_map); + + new_grpColIdx = palloc0(sizeof(AttrNumber) * list_length(groupClause)); + + i = 0; + foreach(lc, groupClause) + { + SortGroupClause *clause = lfirst(lc); + new_grpColIdx[i++] = grouping_map[clause->tleSortGroupRef]; + } + + return new_grpColIdx; +} + +/* + * Build Agg and Sort nodes to implement sorted grouping with one or more + * grouping sets. (A plain GROUP BY or just the presence of aggregates counts + * for this purpose as a single grouping set; the calling code is responsible + * for providing a non-empty rollup_groupclauses list for such cases, though + * rollup_lists may be null.) + * + * The last entry in rollup_groupclauses (which is the one the input is sorted + * on, if at all) is the one used for the returned Agg node. Any additional + * rollups are attached, with corresponding sort info, to subsidiary Agg and + * Sort nodes attached to the side of the real Agg node; these nodes don't + * participate in the plan directly, but they are both a convenient way to + * represent the required data and a convenient way to account for the costs + * of execution. + * + * rollup_groupclauses and rollup_lists are destroyed by this function. + */ +static Plan * +build_grouping_chain(PlannerInfo *root, + Query *parse, + List *tlist, + bool need_sort_for_grouping, + List *rollup_groupclauses, + List *rollup_lists, + AttrNumber *groupColIdx, + AggClauseCosts *agg_costs, + long numGroups, + Plan *result_plan) +{ + AttrNumber *top_grpColIdx = groupColIdx; + List *chain = NIL; + + /* + * Prepare the grpColIdx for the real Agg node first, because we may need + * it for sorting + */ + if (list_length(rollup_groupclauses) > 1) + { + Assert(rollup_lists && llast(rollup_lists)); + + top_grpColIdx = + remap_groupColIdx(root, llast(rollup_groupclauses)); + } + + /* + * If we need a Sort operation on the input, generate that. + */ + if (need_sort_for_grouping) + { + result_plan = (Plan *) + make_sort_from_groupcols(root, + llast(rollup_groupclauses), + top_grpColIdx, + result_plan); + } + + /* + * Generate the side nodes that describe the other sort and group + * operations besides the top one. + */ + while (list_length(rollup_groupclauses) > 1) + { + List *groupClause = linitial(rollup_groupclauses); + List *gsets = linitial(rollup_lists); + AttrNumber *new_grpColIdx; + Plan *sort_plan; + Plan *agg_plan; + + Assert(groupClause); + Assert(gsets); + + new_grpColIdx = remap_groupColIdx(root, groupClause); + + sort_plan = (Plan *) + make_sort_from_groupcols(root, + groupClause, + new_grpColIdx, + result_plan); + + /* + * sort_plan includes the cost of result_plan over again, which is not + * what we want (since it's not actually running that plan). So correct + * the cost figures. + */ + + sort_plan->startup_cost -= result_plan->total_cost; + sort_plan->total_cost -= result_plan->total_cost; + + agg_plan = (Plan *) make_agg(root, + tlist, + (List *) parse->havingQual, + AGG_SORTED, + agg_costs, + list_length(linitial(gsets)), + new_grpColIdx, + extract_grouping_ops(groupClause), + gsets, + numGroups, + sort_plan); + + sort_plan->lefttree = NULL; + + chain = lappend(chain, agg_plan); + + if (rollup_lists) + rollup_lists = list_delete_first(rollup_lists); + + rollup_groupclauses = list_delete_first(rollup_groupclauses); + } + + /* + * Now make the final Agg node + */ + { + List *groupClause = linitial(rollup_groupclauses); + List *gsets = rollup_lists ? linitial(rollup_lists) : NIL; + int numGroupCols; + ListCell *lc; + + if (gsets) + numGroupCols = list_length(linitial(gsets)); + else + numGroupCols = list_length(parse->groupClause); + + result_plan = (Plan *) make_agg(root, + tlist, + (List *) parse->havingQual, + (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, + agg_costs, + numGroupCols, + top_grpColIdx, + extract_grouping_ops(groupClause), + gsets, + numGroups, + result_plan); + + ((Agg *) result_plan)->chain = chain; + + /* + * Add the additional costs. But only the total costs count, since the + * additional sorts aren't run on startup. + */ + foreach(lc, chain) + { + Plan *subplan = lfirst(lc); + + result_plan->total_cost += subplan->total_cost; + + /* + * Nuke stuff we don't need to avoid bloating debug output. + */ + + subplan->targetlist = NIL; + subplan->qual = NIL; + subplan->lefttree->targetlist = NIL; + } + } + + return result_plan; +} + /* * add_tlist_costs_to_plan * @@ -2642,19 +3008,38 @@ limit_needed(Query *parse) * * Note: we need no comparable processing of the distinctClause because * the parser already enforced that that matches ORDER BY. + * + * For grouping sets, the order of items is instead forced to agree with that + * of the grouping set (and items not in the grouping set are skipped). The + * work of sorting the order of grouping set elements to match the ORDER BY if + * possible is done elsewhere. */ -static void -preprocess_groupclause(PlannerInfo *root) +static List * +preprocess_groupclause(PlannerInfo *root, List *force) { Query *parse = root->parse; - List *new_groupclause; + List *new_groupclause = NIL; bool partial_match; ListCell *sl; ListCell *gl; + /* For grouping sets, we need to force the ordering */ + if (force) + { + foreach(sl, force) + { + Index ref = lfirst_int(sl); + SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause); + + new_groupclause = lappend(new_groupclause, cl); + } + + return new_groupclause; + } + /* If no ORDER BY, nothing useful to do here */ if (parse->sortClause == NIL) - return; + return parse->groupClause; /* * Scan the ORDER BY clause and construct a list of matching GROUP BY @@ -2662,7 +3047,6 @@ preprocess_groupclause(PlannerInfo *root) * * This code assumes that the sortClause contains no duplicate items. */ - new_groupclause = NIL; foreach(sl, parse->sortClause) { SortGroupClause *sc = (SortGroupClause *) lfirst(sl); @@ -2686,7 +3070,7 @@ preprocess_groupclause(PlannerInfo *root) /* If no match at all, no point in reordering GROUP BY */ if (new_groupclause == NIL) - return; + return parse->groupClause; /* * Add any remaining GROUP BY items to the new list, but only if we were @@ -2703,15 +3087,290 @@ preprocess_groupclause(PlannerInfo *root) if (list_member_ptr(new_groupclause, gc)) continue; /* it matched an ORDER BY item */ if (partial_match) - return; /* give up, no common sort possible */ + return parse->groupClause; /* give up, no common sort possible */ if (!OidIsValid(gc->sortop)) - return; /* give up, GROUP BY can't be sorted */ + return parse->groupClause; /* give up, GROUP BY can't be sorted */ new_groupclause = lappend(new_groupclause, gc); } /* Success --- install the rearranged GROUP BY list */ Assert(list_length(parse->groupClause) == list_length(new_groupclause)); - parse->groupClause = new_groupclause; + return new_groupclause; +} + +/* + * Extract lists of grouping sets that can be implemented using a single + * rollup-type aggregate pass each. Returns a list of lists of grouping sets. + * + * Input must be sorted with smallest sets first. Result has each sublist + * sorted with smallest sets first. + * + * We want to produce the absolute minimum possible number of lists here to + * avoid excess sorts. Fortunately, there is an algorithm for this; the problem + * of finding the minimal partition of a partially-ordered set into chains + * (which is what we need, taking the list of grouping sets as a poset ordered + * by set inclusion) can be mapped to the problem of finding the maximum + * cardinality matching on a bipartite graph, which is solvable in polynomial + * time with a worst case of no worse than O(n^2.5) and usually much + * better. Since our N is at most 4096, we don't need to consider fallbacks to + * heuristic or approximate methods. (Planning time for a 12-d cube is under + * half a second on my modest system even with optimization off and assertions + * on.) + */ +static List * +extract_rollup_sets(List *groupingSets) +{ + int num_sets_raw = list_length(groupingSets); + int num_empty = 0; + int num_sets = 0; /* distinct sets */ + int num_chains = 0; + List *result = NIL; + List **results; + List **orig_sets; + Bitmapset **set_masks; + int *chains; + short **adjacency; + short *adjacency_buf; + BipartiteMatchState *state; + int i; + int j; + int j_size; + ListCell *lc1 = list_head(groupingSets); + ListCell *lc; + + /* + * Start by stripping out empty sets. The algorithm doesn't require this, + * but the planner currently needs all empty sets to be returned in the + * first list, so we strip them here and add them back after. + */ + while (lc1 && lfirst(lc1) == NIL) + { + ++num_empty; + lc1 = lnext(lc1); + } + + /* bail out now if it turns out that all we had were empty sets. */ + if (!lc1) + return list_make1(groupingSets); + + /* + * We don't strictly need to remove duplicate sets here, but if we + * don't, they tend to become scattered through the result, which is + * a bit confusing (and irritating if we ever decide to optimize them + * out). So we remove them here and add them back after. + * + * For each non-duplicate set, we fill in the following: + * + * orig_sets[i] = list of the original set lists + * set_masks[i] = bitmapset for testing inclusion + * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices + * + * chains[i] will be the result group this set is assigned to. + * + * We index all of these from 1 rather than 0 because it is convenient + * to leave 0 free for the NIL node in the graph algorithm. + */ + orig_sets = palloc0((num_sets_raw + 1) * sizeof(List*)); + set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *)); + adjacency = palloc0((num_sets_raw + 1) * sizeof(short *)); + adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short)); + + j_size = 0; + j = 0; + i = 1; + + for_each_cell(lc, lc1) + { + List *candidate = lfirst(lc); + Bitmapset *candidate_set = NULL; + ListCell *lc2; + int dup_of = 0; + + foreach(lc2, candidate) + { + candidate_set = bms_add_member(candidate_set, lfirst_int(lc2)); + } + + /* we can only be a dup if we're the same length as a previous set */ + if (j_size == list_length(candidate)) + { + int k; + for (k = j; k < i; ++k) + { + if (bms_equal(set_masks[k], candidate_set)) + { + dup_of = k; + break; + } + } + } + else if (j_size < list_length(candidate)) + { + j_size = list_length(candidate); + j = i; + } + + if (dup_of > 0) + { + orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate); + bms_free(candidate_set); + } + else + { + int k; + int n_adj = 0; + + orig_sets[i] = list_make1(candidate); + set_masks[i] = candidate_set; + + /* fill in adjacency list; no need to compare equal-size sets */ + + for (k = j - 1; k > 0; --k) + { + if (bms_is_subset(set_masks[k], candidate_set)) + adjacency_buf[++n_adj] = k; + } + + if (n_adj > 0) + { + adjacency_buf[0] = n_adj; + adjacency[i] = palloc((n_adj + 1) * sizeof(short)); + memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short)); + } + else + adjacency[i] = NULL; + + ++i; + } + } + + num_sets = i - 1; + + /* + * Apply the graph matching algorithm to do the work. + */ + state = BipartiteMatch(num_sets, num_sets, adjacency); + + /* + * Now, the state->pair* fields have the info we need to assign sets to + * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or + * pair_vu[v] = u (both will be true, but we check both so that we can do + * it in one pass) + */ + chains = palloc0((num_sets + 1) * sizeof(int)); + + for (i = 1; i <= num_sets; ++i) + { + int u = state->pair_vu[i]; + int v = state->pair_uv[i]; + + if (u > 0 && u < i) + chains[i] = chains[u]; + else if (v > 0 && v < i) + chains[i] = chains[v]; + else + chains[i] = ++num_chains; + } + + /* build result lists. */ + results = palloc0((num_chains + 1) * sizeof(List*)); + + for (i = 1; i <= num_sets; ++i) + { + int c = chains[i]; + + Assert(c > 0); + + results[c] = list_concat(results[c], orig_sets[i]); + } + + /* push any empty sets back on the first list. */ + while (num_empty-- > 0) + results[1] = lcons(NIL, results[1]); + + /* make result list */ + for (i = 1; i <= num_chains; ++i) + result = lappend(result, results[i]); + + /* + * Free all the things. + * + * (This is over-fussy for small sets but for large sets we could have + * tied up a nontrivial amount of memory.) + */ + BipartiteMatchFree(state); + pfree(results); + pfree(chains); + for (i = 1; i <= num_sets; ++i) + if (adjacency[i]) + pfree(adjacency[i]); + pfree(adjacency); + pfree(adjacency_buf); + pfree(orig_sets); + for (i = 1; i <= num_sets; ++i) + bms_free(set_masks[i]); + pfree(set_masks); + + return result; +} + +/* + * Reorder the elements of a list of grouping sets such that they have correct + * prefix relationships. + * + * The input must be ordered with smallest sets first; the result is returned + * with largest sets first. + * + * If we're passed in a sortclause, we follow its order of columns to the + * extent possible, to minimize the chance that we add unnecessary sorts. + * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a + * gets implemented in one pass.) + */ +static List * +reorder_grouping_sets(List *groupingsets, List *sortclause) +{ + ListCell *lc; + ListCell *lc2; + List *previous = NIL; + List *result = NIL; + + foreach(lc, groupingsets) + { + List *candidate = lfirst(lc); + List *new_elems = list_difference_int(candidate, previous); + + if (list_length(new_elems) > 0) + { + while (list_length(sortclause) > list_length(previous)) + { + SortGroupClause *sc = list_nth(sortclause, list_length(previous)); + int ref = sc->tleSortGroupRef; + if (list_member_int(new_elems, ref)) + { + previous = lappend_int(previous, ref); + new_elems = list_delete_int(new_elems, ref); + } + else + { + /* diverged from the sortclause; give up on it */ + sortclause = NIL; + break; + } + } + + foreach(lc2, new_elems) + { + previous = lappend_int(previous, lfirst_int(lc2)); + } + } + + result = lcons(list_copy(previous), result); + list_free(new_elems); + } + + list_free(previous); + + return result; } /* @@ -2730,11 +3389,11 @@ standard_qp_callback(PlannerInfo *root, void *extra) * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not * be, in which case we just leave their pathkeys empty. */ - if (parse->groupClause && - grouping_is_sortable(parse->groupClause)) + if (qp_extra->groupClause && + grouping_is_sortable(qp_extra->groupClause)) root->group_pathkeys = make_pathkeys_for_sortclauses(root, - parse->groupClause, + qp_extra->groupClause, tlist); else root->group_pathkeys = NIL; @@ -3159,7 +3818,7 @@ make_subplanTargetList(PlannerInfo *root, * If we're not grouping or aggregating, there's nothing to do here; * query_planner should receive the unmodified target list. */ - if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual && + if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && !root->hasHavingQual && !parse->hasWindowFuncs) { *need_tlist_eval = true; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 73b198883696be2e90b5c16d1f430b43e91c4785..90e13e498895c158457d7a996cfc0894096a2c74 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -140,7 +140,6 @@ static bool fix_opfuncids_walker(Node *node, void *context); static bool extract_query_dependencies_walker(Node *node, PlannerInfo *context); - /***************************************************************************** * * SUBPLAN REFERENCES @@ -656,6 +655,8 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) } break; case T_Agg: + set_upper_references(root, plan, rtoffset); + break; case T_Group: set_upper_references(root, plan, rtoffset); break; @@ -1229,6 +1230,7 @@ copyVar(Var *var) * We must look up operator opcode info for OpExpr and related nodes, * add OIDs from regclass Const nodes into root->glob->relationOids, and * add catalog TIDs for user-defined functions into root->glob->invalItems. + * We also fill in column index lists for GROUPING() expressions. * * We assume it's okay to update opcode info in-place. So this could possibly * scribble on the planner's input data structures, but it's OK. @@ -1292,6 +1294,31 @@ fix_expr_common(PlannerInfo *root, Node *node) lappend_oid(root->glob->relationOids, DatumGetObjectId(con->constvalue)); } + else if (IsA(node, GroupingFunc)) + { + GroupingFunc *g = (GroupingFunc *) node; + AttrNumber *grouping_map = root->grouping_map; + + /* If there are no grouping sets, we don't need this. */ + + Assert(grouping_map || g->cols == NIL); + + if (grouping_map) + { + ListCell *lc; + List *cols = NIL; + + foreach(lc, g->refs) + { + cols = lappend_int(cols, grouping_map[lfirst_int(lc)]); + } + + Assert(!g->cols || equal(cols, g->cols)); + + if (!g->cols) + g->cols = cols; + } + } } /* @@ -2186,6 +2213,7 @@ set_returning_clause_references(PlannerInfo *root, return rlist; } + /***************************************************************************** * OPERATOR REGPROC LOOKUP *****************************************************************************/ diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 2f7f5c0df0e4274a6edce17167d456b9de8223f5..f80abb494c7e1b7470e313859929044a53f71a18 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -335,6 +335,48 @@ replace_outer_agg(PlannerInfo *root, Aggref *agg) return retval; } +/* + * Generate a Param node to replace the given GroupingFunc expression which is + * expected to have agglevelsup > 0 (ie, it is not local). + */ +static Param * +replace_outer_grouping(PlannerInfo *root, GroupingFunc *grp) +{ + Param *retval; + PlannerParamItem *pitem; + Index levelsup; + + Assert(grp->agglevelsup > 0 && grp->agglevelsup < root->query_level); + + /* Find the query level the GroupingFunc belongs to */ + for (levelsup = grp->agglevelsup; levelsup > 0; levelsup--) + root = root->parent_root; + + /* + * It does not seem worthwhile to try to match duplicate outer aggs. Just + * make a new slot every time. + */ + grp = (GroupingFunc *) copyObject(grp); + IncrementVarSublevelsUp((Node *) grp, -((int) grp->agglevelsup), 0); + Assert(grp->agglevelsup == 0); + + pitem = makeNode(PlannerParamItem); + pitem->item = (Node *) grp; + pitem->paramId = root->glob->nParamExec++; + + root->plan_params = lappend(root->plan_params, pitem); + + retval = makeNode(Param); + retval->paramkind = PARAM_EXEC; + retval->paramid = pitem->paramId; + retval->paramtype = exprType((Node *) grp); + retval->paramtypmod = -1; + retval->paramcollid = InvalidOid; + retval->location = grp->location; + + return retval; +} + /* * Generate a new Param node that will not conflict with any other. * @@ -1494,14 +1536,16 @@ simplify_EXISTS_query(PlannerInfo *root, Query *query) { /* * We don't try to simplify at all if the query uses set operations, - * aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE; none - * of these seem likely in normal usage and their possible effects are - * complex. (Note: we could ignore an "OFFSET 0" clause, but that - * traditionally is used as an optimization fence, so we don't.) + * aggregates, grouping sets, modifying CTEs, HAVING, OFFSET, or FOR + * UPDATE/SHARE; none of these seem likely in normal usage and their + * possible effects are complex. (Note: we could ignore an "OFFSET 0" + * clause, but that traditionally is used as an optimization fence, so we + * don't.) */ if (query->commandType != CMD_SELECT || query->setOperations || query->hasAggs || + query->groupingSets || query->hasWindowFuncs || query->hasModifyingCTE || query->havingQual || @@ -1851,6 +1895,11 @@ replace_correlation_vars_mutator(Node *node, PlannerInfo *root) if (((Aggref *) node)->agglevelsup > 0) return (Node *) replace_outer_agg(root, (Aggref *) node); } + if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup > 0) + return (Node *) replace_outer_grouping(root, (GroupingFunc *) node); + } return expression_tree_mutator(node, replace_correlation_vars_mutator, (void *) root); diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 4f0dc80d025341bb69755ab5eea75db95be8224e..92b0562843458b403517d2c008c9db5cd26a1f79 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -1412,6 +1412,7 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte, if (subquery->hasAggs || subquery->hasWindowFuncs || subquery->groupClause || + subquery->groupingSets || subquery->havingQual || subquery->sortClause || subquery->distinctClause || diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 5859748df8eeda5812ad9674a3146cca13d08846..8884fb1bae3e910c06514f2afaad4b92203a412e 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -268,13 +268,15 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, */ if (pNumGroups) { - if (subquery->groupClause || subquery->distinctClause || + if (subquery->groupClause || subquery->groupingSets || + subquery->distinctClause || subroot->hasHavingQual || subquery->hasAggs) *pNumGroups = subplan->plan_rows; else *pNumGroups = estimate_num_groups(subroot, get_tlist_exprs(subquery->targetList, false), - subplan->plan_rows); + subplan->plan_rows, + NULL); } /* @@ -771,6 +773,7 @@ make_union_unique(SetOperationStmt *op, Plan *plan, extract_grouping_cols(groupList, plan->targetlist), extract_grouping_ops(groupList), + NIL, numGroups, plan); /* Hashed aggregation produces randomly-ordered results */ diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 480114d92b68cd1a481173bd8764ab1746294d2f..86585c58ee7b1876919ed9d48a6beb1772a27c16 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -4353,6 +4353,7 @@ inline_function(Oid funcid, Oid result_type, Oid result_collid, querytree->jointree->fromlist || querytree->jointree->quals || querytree->groupClause || + querytree->groupingSets || querytree->havingQual || querytree->windowClause || querytree->distinctClause || diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ea7a47bdf457b93251534edf3e0e59b1b0a50026..3fe27126086e0b80719e6923048cd7264ad3e708 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1214,7 +1214,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, /* Estimate number of output rows */ pathnode->path.rows = estimate_num_groups(root, sjinfo->semi_rhs_exprs, - rel->rows); + rel->rows, + NULL); numCols = list_length(sjinfo->semi_rhs_exprs); if (sjinfo->semi_can_btree) diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index a1a504b24d6c15408831d93818362c22205a9ef3..f702b8c442dd16a78a6129b6338bb74632883631 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -394,6 +394,28 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList) * functions just above, and they don't seem to deserve their own file. *****************************************************************************/ +/* + * get_sortgroupref_clause + * Find the SortGroupClause matching the given SortGroupRef index, + * and return it. + */ +SortGroupClause * +get_sortgroupref_clause(Index sortref, List *clauses) +{ + ListCell *l; + + foreach(l, clauses) + { + SortGroupClause *cl = (SortGroupClause *) lfirst(l); + + if (cl->tleSortGroupRef == sortref) + return cl; + } + + elog(ERROR, "ORDER/GROUP BY expression not found in list"); + return NULL; /* keep compiler quiet */ +} + /* * extract_grouping_ops - make an array of the equality operator OIDs * for a SortGroupClause list diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 8f864323611d2ce15e0c1a8ca5bb25a1705db8de..0f25539d124086467535de117658827a66fb2c7a 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -564,6 +564,30 @@ pull_var_clause_walker(Node *node, pull_var_clause_context *context) break; } } + else if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup != 0) + elog(ERROR, "Upper-level GROUPING found where not expected"); + switch (context->aggbehavior) + { + case PVC_REJECT_AGGREGATES: + elog(ERROR, "GROUPING found where not expected"); + break; + case PVC_INCLUDE_AGGREGATES: + context->varlist = lappend(context->varlist, node); + /* we do NOT descend into the contained expression */ + return false; + case PVC_RECURSE_AGGREGATES: + /* + * we do NOT descend into the contained expression, + * even if the caller asked for it, because we never + * actually evaluate it - the result is driven entirely + * off the associated GROUP BY clause, so we never need + * to extract the actual Vars here. + */ + return false; + } + } else if (IsA(node, PlaceHolderVar)) { if (((PlaceHolderVar *) node)->phlevelsup != 0) diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 3eb4feabfd6b15ae1983914f49c9660ca11285a9..82c9abfa9157d9083e70ea8653d63d08fcadacf2 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -1060,6 +1060,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->groupClause = transformGroupClause(pstate, stmt->groupClause, + &qry->groupingSets, &qry->targetList, qry->sortClause, EXPR_KIND_GROUP_BY, @@ -1106,7 +1107,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->hasSubLinks = pstate->p_hasSubLinks; qry->hasWindowFuncs = pstate->p_hasWindowFuncs; qry->hasAggs = pstate->p_hasAggs; - if (pstate->p_hasAggs || qry->groupClause || qry->havingQual) + if (pstate->p_hasAggs || qry->groupClause || qry->groupingSets || qry->havingQual) parseCheckAggregates(pstate, qry); foreach(l, stmt->lockingClause) @@ -1566,7 +1567,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) qry->hasSubLinks = pstate->p_hasSubLinks; qry->hasWindowFuncs = pstate->p_hasWindowFuncs; qry->hasAggs = pstate->p_hasAggs; - if (pstate->p_hasAggs || qry->groupClause || qry->havingQual) + if (pstate->p_hasAggs || qry->groupClause || qry->groupingSets || qry->havingQual) parseCheckAggregates(pstate, qry); foreach(l, lockingClause) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 14397830686b11efbbcaa21078b561a195805bf4..46f2229b68c45fd88a7d185388a81c4550a9fad7 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -371,6 +371,10 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); relation_expr_list dostmt_opt_list transform_element_list transform_type_list +%type <list> group_by_list +%type <node> group_by_item empty_grouping_set rollup_clause cube_clause +%type <node> grouping_sets_clause + %type <list> opt_fdw_options fdw_options %type <defelt> fdw_option @@ -438,7 +442,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <list> ExclusionConstraintList ExclusionConstraintElem %type <list> func_arg_list %type <node> func_arg_expr -%type <list> row type_list array_expr_list +%type <list> row explicit_row implicit_row type_list array_expr_list %type <node> case_expr case_arg when_clause case_default %type <list> when_clause_list %type <ival> sub_type @@ -568,7 +572,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); CLUSTER COALESCE COLLATE COLLATION COLUMN COMMENT COMMENTS COMMIT COMMITTED CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE - CROSS CSV CURRENT_P + CROSS CSV CUBE CURRENT_P CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE @@ -583,7 +587,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); FALSE_P FAMILY FETCH FILTER FIRST_P FLOAT_P FOLLOWING FOR FORCE FOREIGN FORWARD FREEZE FROM FULL FUNCTION FUNCTIONS - GLOBAL GRANT GRANTED GREATEST GROUP_P + GLOBAL GRANT GRANTED GREATEST GROUP_P GROUPING HANDLER HAVING HEADER_P HOLD HOUR_P @@ -617,12 +621,12 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); RANGE READ REAL REASSIGN RECHECK RECURSIVE REF REFERENCES REFRESH REINDEX RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA - RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK + RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP ROW ROWS RULE SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES - SERIALIZABLE SERVER SESSION SESSION_USER SET SETOF SHARE - SHOW SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P START + SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW + SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P START STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING SYMMETRIC SYSID SYSTEM_P @@ -682,6 +686,11 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); * and for NULL so that it can follow b_expr in ColQualList without creating * postfix-operator problems. * + * To support CUBE and ROLLUP in GROUP BY without reserving them, we give them + * an explicit priority lower than '(', so that a rule with CUBE '(' will shift + * rather than reducing a conflicting rule that takes CUBE as a function name. + * Using the same precedence as IDENT seems right for the reasons given above. + * * The frame_bound productions UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING * are even messier: since UNBOUNDED is an unreserved keyword (per spec!), * there is no principled way to distinguish these from the productions @@ -692,7 +701,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); * blame any funny behavior of UNBOUNDED on the SQL standard, though. */ %nonassoc UNBOUNDED /* ideally should have same precedence as IDENT */ -%nonassoc IDENT NULL_P PARTITION RANGE ROWS PRECEDING FOLLOWING +%nonassoc IDENT NULL_P PARTITION RANGE ROWS PRECEDING FOLLOWING CUBE ROLLUP %left Op OPERATOR /* multi-character ops and user-defined operators */ %left '+' '-' %left '*' '/' '%' @@ -10296,11 +10305,78 @@ first_or_next: FIRST_P { $$ = 0; } ; +/* + * This syntax for group_clause tries to follow the spec quite closely. + * However, the spec allows only column references, not expressions, + * which introduces an ambiguity between implicit row constructors + * (a,b) and lists of column references. + * + * We handle this by using the a_expr production for what the spec calls + * <ordinary grouping set>, which in the spec represents either one column + * reference or a parenthesized list of column references. Then, we check the + * top node of the a_expr to see if it's an implicit RowExpr, and if so, just + * grab and use the list, discarding the node. (this is done in parse analysis, + * not here) + * + * (we abuse the row_format field of RowExpr to distinguish implicit and + * explicit row constructors; it's debatable if anyone sanely wants to use them + * in a group clause, but if they have a reason to, we make it possible.) + * + * Each item in the group_clause list is either an expression tree or a + * GroupingSet node of some type. + */ group_clause: - GROUP_P BY expr_list { $$ = $3; } + GROUP_P BY group_by_list { $$ = $3; } | /*EMPTY*/ { $$ = NIL; } ; +group_by_list: + group_by_item { $$ = list_make1($1); } + | group_by_list ',' group_by_item { $$ = lappend($1,$3); } + ; + +group_by_item: + a_expr { $$ = $1; } + | empty_grouping_set { $$ = $1; } + | cube_clause { $$ = $1; } + | rollup_clause { $$ = $1; } + | grouping_sets_clause { $$ = $1; } + ; + +empty_grouping_set: + '(' ')' + { + $$ = (Node *) makeGroupingSet(GROUPING_SET_EMPTY, NIL, @1); + } + ; + +/* + * These hacks rely on setting precedence of CUBE and ROLLUP below that of '(', + * so that they shift in these rules rather than reducing the conflicting + * unreserved_keyword rule. + */ + +rollup_clause: + ROLLUP '(' expr_list ')' + { + $$ = (Node *) makeGroupingSet(GROUPING_SET_ROLLUP, $3, @1); + } + ; + +cube_clause: + CUBE '(' expr_list ')' + { + $$ = (Node *) makeGroupingSet(GROUPING_SET_CUBE, $3, @1); + } + ; + +grouping_sets_clause: + GROUPING SETS '(' group_by_list ')' + { + $$ = (Node *) makeGroupingSet(GROUPING_SET_SETS, $4, @1); + } + ; + having_clause: HAVING a_expr { $$ = $2; } | /*EMPTY*/ { $$ = NULL; } @@ -11953,15 +12029,33 @@ c_expr: columnref { $$ = $1; } n->location = @1; $$ = (Node *)n; } - | row + | explicit_row + { + RowExpr *r = makeNode(RowExpr); + r->args = $1; + r->row_typeid = InvalidOid; /* not analyzed yet */ + r->colnames = NIL; /* to be filled in during analysis */ + r->row_format = COERCE_EXPLICIT_CALL; /* abuse */ + r->location = @1; + $$ = (Node *)r; + } + | implicit_row { RowExpr *r = makeNode(RowExpr); r->args = $1; r->row_typeid = InvalidOid; /* not analyzed yet */ r->colnames = NIL; /* to be filled in during analysis */ + r->row_format = COERCE_IMPLICIT_CAST; /* abuse */ r->location = @1; $$ = (Node *)r; } + | GROUPING '(' expr_list ')' + { + GroupingFunc *g = makeNode(GroupingFunc); + g->args = $3; + g->location = @1; + $$ = (Node *)g; + } ; func_application: func_name '(' ')' @@ -12711,6 +12805,13 @@ row: ROW '(' expr_list ')' { $$ = $3; } | '(' expr_list ',' a_expr ')' { $$ = lappend($2, $4); } ; +explicit_row: ROW '(' expr_list ')' { $$ = $3; } + | ROW '(' ')' { $$ = NIL; } + ; + +implicit_row: '(' expr_list ',' a_expr ')' { $$ = lappend($2, $4); } + ; + sub_type: ANY { $$ = ANY_SUBLINK; } | SOME { $$ = ANY_SUBLINK; } | ALL { $$ = ALL_SUBLINK; } @@ -13520,6 +13621,7 @@ unreserved_keyword: | COPY | COST | CSV + | CUBE | CURRENT_P | CURSOR | CYCLE @@ -13668,6 +13770,7 @@ unreserved_keyword: | REVOKE | ROLE | ROLLBACK + | ROLLUP | ROWS | RULE | SAVEPOINT @@ -13682,6 +13785,7 @@ unreserved_keyword: | SERVER | SESSION | SET + | SETS | SHARE | SHOW | SIMPLE @@ -13767,6 +13871,7 @@ col_name_keyword: | EXTRACT | FLOAT_P | GREATEST + | GROUPING | INOUT | INT_P | INTEGER diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 7b0e66807d46b7130334e99df12b6e7418faeb02..1e3f2e0ffa20f5d935ac2ab688a991a8bd5cd8c0 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -42,7 +42,9 @@ typedef struct { ParseState *pstate; Query *qry; + PlannerInfo *root; List *groupClauses; + List *groupClauseCommonVars; bool have_non_var_grouping; List **func_grouped_rels; int sublevels_up; @@ -56,11 +58,18 @@ static int check_agg_arguments(ParseState *pstate, static bool check_agg_arguments_walker(Node *node, check_agg_arguments_context *context); static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry, - List *groupClauses, bool have_non_var_grouping, + List *groupClauses, List *groupClauseVars, + bool have_non_var_grouping, List **func_grouped_rels); static bool check_ungrouped_columns_walker(Node *node, check_ungrouped_columns_context *context); - +static void finalize_grouping_exprs(Node *node, ParseState *pstate, Query *qry, + List *groupClauses, PlannerInfo *root, + bool have_non_var_grouping); +static bool finalize_grouping_exprs_walker(Node *node, + check_ungrouped_columns_context *context); +static void check_agglevels_and_constraints(ParseState *pstate,Node *expr); +static List *expand_groupingset_node(GroupingSet *gs); /* * transformAggregateCall - @@ -96,10 +105,7 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, List *tdistinct = NIL; AttrNumber attno = 1; int save_next_resno; - int min_varlevel; ListCell *lc; - const char *err; - bool errkind; if (AGGKIND_IS_ORDERED_SET(agg->aggkind)) { @@ -214,15 +220,97 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, agg->aggorder = torder; agg->aggdistinct = tdistinct; + check_agglevels_and_constraints(pstate, (Node *) agg); +} + +/* + * transformGroupingFunc + * Transform a GROUPING expression + * + * GROUPING() behaves very like an aggregate. Processing of levels and nesting + * is done as for aggregates. We set p_hasAggs for these expressions too. + */ +Node * +transformGroupingFunc(ParseState *pstate, GroupingFunc *p) +{ + ListCell *lc; + List *args = p->args; + List *result_list = NIL; + GroupingFunc *result = makeNode(GroupingFunc); + + if (list_length(args) > 31) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg("GROUPING must have fewer than 32 arguments"), + parser_errposition(pstate, p->location))); + + foreach(lc, args) + { + Node *current_result; + + current_result = transformExpr(pstate, (Node*) lfirst(lc), pstate->p_expr_kind); + + /* acceptability of expressions is checked later */ + + result_list = lappend(result_list, current_result); + } + + result->args = result_list; + result->location = p->location; + + check_agglevels_and_constraints(pstate, (Node *) result); + + return (Node *) result; +} + +/* + * Aggregate functions and grouping operations (which are combined in the spec + * as <set function specification>) are very similar with regard to level and + * nesting restrictions (though we allow a lot more things than the spec does). + * Centralise those restrictions here. + */ +static void +check_agglevels_and_constraints(ParseState *pstate, Node *expr) +{ + List *directargs = NIL; + List *args = NIL; + Expr *filter = NULL; + int min_varlevel; + int location = -1; + Index *p_levelsup; + const char *err; + bool errkind; + bool isAgg = IsA(expr, Aggref); + + if (isAgg) + { + Aggref *agg = (Aggref *) expr; + + directargs = agg->aggdirectargs; + args = agg->args; + filter = agg->aggfilter; + location = agg->location; + p_levelsup = &agg->agglevelsup; + } + else + { + GroupingFunc *grp = (GroupingFunc *) expr; + + args = grp->args; + location = grp->location; + p_levelsup = &grp->agglevelsup; + } + /* * Check the arguments to compute the aggregate's level and detect * improper nesting. */ min_varlevel = check_agg_arguments(pstate, - agg->aggdirectargs, - agg->args, - agg->aggfilter); - agg->agglevelsup = min_varlevel; + directargs, + args, + filter); + + *p_levelsup = min_varlevel; /* Mark the correct pstate level as having aggregates */ while (min_varlevel-- > 0) @@ -247,20 +335,32 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, Assert(false); /* can't happen */ break; case EXPR_KIND_OTHER: - /* Accept aggregate here; caller must throw error if wanted */ + /* Accept aggregate/grouping here; caller must throw error if wanted */ break; case EXPR_KIND_JOIN_ON: case EXPR_KIND_JOIN_USING: - err = _("aggregate functions are not allowed in JOIN conditions"); + if (isAgg) + err = _("aggregate functions are not allowed in JOIN conditions"); + else + err = _("grouping operations are not allowed in JOIN conditions"); + break; case EXPR_KIND_FROM_SUBSELECT: /* Should only be possible in a LATERAL subquery */ Assert(pstate->p_lateral_active); - /* Aggregate scope rules make it worth being explicit here */ - err = _("aggregate functions are not allowed in FROM clause of their own query level"); + /* Aggregate/grouping scope rules make it worth being explicit here */ + if (isAgg) + err = _("aggregate functions are not allowed in FROM clause of their own query level"); + else + err = _("grouping operations are not allowed in FROM clause of their own query level"); + break; case EXPR_KIND_FROM_FUNCTION: - err = _("aggregate functions are not allowed in functions in FROM"); + if (isAgg) + err = _("aggregate functions are not allowed in functions in FROM"); + else + err = _("grouping operations are not allowed in functions in FROM"); + break; case EXPR_KIND_WHERE: errkind = true; @@ -278,10 +378,18 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, /* okay */ break; case EXPR_KIND_WINDOW_FRAME_RANGE: - err = _("aggregate functions are not allowed in window RANGE"); + if (isAgg) + err = _("aggregate functions are not allowed in window RANGE"); + else + err = _("grouping operations are not allowed in window RANGE"); + break; case EXPR_KIND_WINDOW_FRAME_ROWS: - err = _("aggregate functions are not allowed in window ROWS"); + if (isAgg) + err = _("aggregate functions are not allowed in window ROWS"); + else + err = _("grouping operations are not allowed in window ROWS"); + break; case EXPR_KIND_SELECT_TARGET: /* okay */ @@ -312,26 +420,55 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, break; case EXPR_KIND_CHECK_CONSTRAINT: case EXPR_KIND_DOMAIN_CHECK: - err = _("aggregate functions are not allowed in check constraints"); + if (isAgg) + err = _("aggregate functions are not allowed in check constraints"); + else + err = _("grouping operations are not allowed in check constraints"); + break; case EXPR_KIND_COLUMN_DEFAULT: case EXPR_KIND_FUNCTION_DEFAULT: - err = _("aggregate functions are not allowed in DEFAULT expressions"); + + if (isAgg) + err = _("aggregate functions are not allowed in DEFAULT expressions"); + else + err = _("grouping operations are not allowed in DEFAULT expressions"); + break; case EXPR_KIND_INDEX_EXPRESSION: - err = _("aggregate functions are not allowed in index expressions"); + if (isAgg) + err = _("aggregate functions are not allowed in index expressions"); + else + err = _("grouping operations are not allowed in index expressions"); + break; case EXPR_KIND_INDEX_PREDICATE: - err = _("aggregate functions are not allowed in index predicates"); + if (isAgg) + err = _("aggregate functions are not allowed in index predicates"); + else + err = _("grouping operations are not allowed in index predicates"); + break; case EXPR_KIND_ALTER_COL_TRANSFORM: - err = _("aggregate functions are not allowed in transform expressions"); + if (isAgg) + err = _("aggregate functions are not allowed in transform expressions"); + else + err = _("grouping operations are not allowed in transform expressions"); + break; case EXPR_KIND_EXECUTE_PARAMETER: - err = _("aggregate functions are not allowed in EXECUTE parameters"); + if (isAgg) + err = _("aggregate functions are not allowed in EXECUTE parameters"); + else + err = _("grouping operations are not allowed in EXECUTE parameters"); + break; case EXPR_KIND_TRIGGER_WHEN: - err = _("aggregate functions are not allowed in trigger WHEN conditions"); + if (isAgg) + err = _("aggregate functions are not allowed in trigger WHEN conditions"); + else + err = _("grouping operations are not allowed in trigger WHEN conditions"); + break; /* @@ -342,18 +479,28 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, * which is sane anyway. */ } + if (err) ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg_internal("%s", err), - parser_errposition(pstate, agg->location))); + parser_errposition(pstate, location))); + if (errkind) + { + if (isAgg) + /* translator: %s is name of a SQL construct, eg GROUP BY */ + err = _("aggregate functions are not allowed in %s"); + else + /* translator: %s is name of a SQL construct, eg GROUP BY */ + err = _("grouping operations are not allowed in %s"); + ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), - /* translator: %s is name of a SQL construct, eg GROUP BY */ - errmsg("aggregate functions are not allowed in %s", - ParseExprKindName(pstate->p_expr_kind)), - parser_errposition(pstate, agg->location))); + errmsg_internal(err, + ParseExprKindName(pstate->p_expr_kind)), + parser_errposition(pstate, location))); + } } /* @@ -466,7 +613,6 @@ check_agg_arguments(ParseState *pstate, locate_agg_of_level((Node *) directargs, context.min_agglevel)))); } - return agglevel; } @@ -507,6 +653,21 @@ check_agg_arguments_walker(Node *node, /* no need to examine args of the inner aggregate */ return false; } + if (IsA(node, GroupingFunc)) + { + int agglevelsup = ((GroupingFunc *) node)->agglevelsup; + + /* convert levelsup to frame of reference of original query */ + agglevelsup -= context->sublevels_up; + /* ignore local aggs of subqueries */ + if (agglevelsup >= 0) + { + if (context->min_agglevel < 0 || + context->min_agglevel > agglevelsup) + context->min_agglevel = agglevelsup; + } + /* Continue and descend into subtree */ + } /* We can throw error on sight for a window function */ if (IsA(node, WindowFunc)) ereport(ERROR, @@ -527,6 +688,7 @@ check_agg_arguments_walker(Node *node, context->sublevels_up--; return result; } + return expression_tree_walker(node, check_agg_arguments_walker, (void *) context); @@ -770,17 +932,66 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, void parseCheckAggregates(ParseState *pstate, Query *qry) { + List *gset_common = NIL; List *groupClauses = NIL; + List *groupClauseCommonVars = NIL; bool have_non_var_grouping; List *func_grouped_rels = NIL; ListCell *l; bool hasJoinRTEs; bool hasSelfRefRTEs; - PlannerInfo *root; + PlannerInfo *root = NULL; Node *clause; /* This should only be called if we found aggregates or grouping */ - Assert(pstate->p_hasAggs || qry->groupClause || qry->havingQual); + Assert(pstate->p_hasAggs || qry->groupClause || qry->havingQual || qry->groupingSets); + + /* + * If we have grouping sets, expand them and find the intersection of all + * sets. + */ + if (qry->groupingSets) + { + /* + * The limit of 4096 is arbitrary and exists simply to avoid resource + * issues from pathological constructs. + */ + List *gsets = expand_grouping_sets(qry->groupingSets, 4096); + + if (!gsets) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + errmsg("too many grouping sets present (max 4096)"), + parser_errposition(pstate, + qry->groupClause + ? exprLocation((Node *) qry->groupClause) + : exprLocation((Node *) qry->groupingSets)))); + + /* + * The intersection will often be empty, so help things along by + * seeding the intersect with the smallest set. + */ + gset_common = linitial(gsets); + + if (gset_common) + { + for_each_cell(l, lnext(list_head(gsets))) + { + gset_common = list_intersection_int(gset_common, lfirst(l)); + if (!gset_common) + break; + } + } + + /* + * If there was only one grouping set in the expansion, AND if the + * groupClause is non-empty (meaning that the grouping set is not empty + * either), then we can ditch the grouping set and pretend we just had + * a normal GROUP BY. + */ + if (list_length(gsets) == 1 && qry->groupClause) + qry->groupingSets = NIL; + } /* * Scan the range table to see if there are JOIN or self-reference CTE @@ -800,15 +1011,19 @@ parseCheckAggregates(ParseState *pstate, Query *qry) /* * Build a list of the acceptable GROUP BY expressions for use by * check_ungrouped_columns(). + * + * We get the TLE, not just the expr, because GROUPING wants to know + * the sortgroupref. */ foreach(l, qry->groupClause) { SortGroupClause *grpcl = (SortGroupClause *) lfirst(l); - Node *expr; + TargetEntry *expr; - expr = get_sortgroupclause_expr(grpcl, qry->targetList); + expr = get_sortgroupclause_tle(grpcl, qry->targetList); if (expr == NULL) continue; /* probably cannot happen */ + groupClauses = lcons(expr, groupClauses); } @@ -830,21 +1045,28 @@ parseCheckAggregates(ParseState *pstate, Query *qry) groupClauses = (List *) flatten_join_alias_vars(root, (Node *) groupClauses); } - else - root = NULL; /* keep compiler quiet */ /* * Detect whether any of the grouping expressions aren't simple Vars; if * they're all Vars then we don't have to work so hard in the recursive * scans. (Note we have to flatten aliases before this.) + * + * Track Vars that are included in all grouping sets separately in + * groupClauseCommonVars, since these are the only ones we can use to check + * for functional dependencies. */ have_non_var_grouping = false; foreach(l, groupClauses) { - if (!IsA((Node *) lfirst(l), Var)) + TargetEntry *tle = lfirst(l); + if (!IsA(tle->expr, Var)) { have_non_var_grouping = true; - break; + } + else if (!qry->groupingSets || + list_member_int(gset_common, tle->ressortgroupref)) + { + groupClauseCommonVars = lappend(groupClauseCommonVars, tle->expr); } } @@ -855,19 +1077,30 @@ parseCheckAggregates(ParseState *pstate, Query *qry) * this will also find ungrouped variables that came from ORDER BY and * WINDOW clauses. For that matter, it's also going to examine the * grouping expressions themselves --- but they'll all pass the test ... + * + * We also finalize GROUPING expressions, but for that we need to traverse + * the original (unflattened) clause in order to modify nodes. */ clause = (Node *) qry->targetList; + finalize_grouping_exprs(clause, pstate, qry, + groupClauses, root, + have_non_var_grouping); if (hasJoinRTEs) clause = flatten_join_alias_vars(root, clause); check_ungrouped_columns(clause, pstate, qry, - groupClauses, have_non_var_grouping, + groupClauses, groupClauseCommonVars, + have_non_var_grouping, &func_grouped_rels); clause = (Node *) qry->havingQual; + finalize_grouping_exprs(clause, pstate, qry, + groupClauses, root, + have_non_var_grouping); if (hasJoinRTEs) clause = flatten_join_alias_vars(root, clause); check_ungrouped_columns(clause, pstate, qry, - groupClauses, have_non_var_grouping, + groupClauses, groupClauseCommonVars, + have_non_var_grouping, &func_grouped_rels); /* @@ -904,14 +1137,17 @@ parseCheckAggregates(ParseState *pstate, Query *qry) */ static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry, - List *groupClauses, bool have_non_var_grouping, + List *groupClauses, List *groupClauseCommonVars, + bool have_non_var_grouping, List **func_grouped_rels) { check_ungrouped_columns_context context; context.pstate = pstate; context.qry = qry; + context.root = NULL; context.groupClauses = groupClauses; + context.groupClauseCommonVars = groupClauseCommonVars; context.have_non_var_grouping = have_non_var_grouping; context.func_grouped_rels = func_grouped_rels; context.sublevels_up = 0; @@ -965,6 +1201,16 @@ check_ungrouped_columns_walker(Node *node, return false; } + if (IsA(node, GroupingFunc)) + { + GroupingFunc *grp = (GroupingFunc *) node; + + /* handled GroupingFunc separately, no need to recheck at this level */ + + if ((int) grp->agglevelsup >= context->sublevels_up) + return false; + } + /* * If we have any GROUP BY items that are not simple Vars, check to see if * subexpression as a whole matches any GROUP BY item. We need to do this @@ -976,7 +1222,9 @@ check_ungrouped_columns_walker(Node *node, { foreach(gl, context->groupClauses) { - if (equal(node, lfirst(gl))) + TargetEntry *tle = lfirst(gl); + + if (equal(node, tle->expr)) return false; /* acceptable, do not descend more */ } } @@ -1003,7 +1251,7 @@ check_ungrouped_columns_walker(Node *node, { foreach(gl, context->groupClauses) { - Var *gvar = (Var *) lfirst(gl); + Var *gvar = (Var *) ((TargetEntry *) lfirst(gl))->expr; if (IsA(gvar, Var) && gvar->varno == var->varno && @@ -1040,7 +1288,7 @@ check_ungrouped_columns_walker(Node *node, if (check_functional_grouping(rte->relid, var->varno, 0, - context->groupClauses, + context->groupClauseCommonVars, &context->qry->constraintDeps)) { *context->func_grouped_rels = @@ -1084,6 +1332,395 @@ check_ungrouped_columns_walker(Node *node, (void *) context); } +/* + * finalize_grouping_exprs - + * Scan the given expression tree for GROUPING() and related calls, + * and validate and process their arguments. + * + * This is split out from check_ungrouped_columns above because it needs + * to modify the nodes (which it does in-place, not via a mutator) while + * check_ungrouped_columns may see only a copy of the original thanks to + * flattening of join alias vars. So here, we flatten each individual + * GROUPING argument as we see it before comparing it. + */ +static void +finalize_grouping_exprs(Node *node, ParseState *pstate, Query *qry, + List *groupClauses, PlannerInfo *root, + bool have_non_var_grouping) +{ + check_ungrouped_columns_context context; + + context.pstate = pstate; + context.qry = qry; + context.root = root; + context.groupClauses = groupClauses; + context.groupClauseCommonVars = NIL; + context.have_non_var_grouping = have_non_var_grouping; + context.func_grouped_rels = NULL; + context.sublevels_up = 0; + context.in_agg_direct_args = false; + finalize_grouping_exprs_walker(node, &context); +} + +static bool +finalize_grouping_exprs_walker(Node *node, + check_ungrouped_columns_context *context) +{ + ListCell *gl; + + if (node == NULL) + return false; + if (IsA(node, Const) || + IsA(node, Param)) + return false; /* constants are always acceptable */ + + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + if ((int) agg->agglevelsup == context->sublevels_up) + { + /* + * If we find an aggregate call of the original level, do not + * recurse into its normal arguments, ORDER BY arguments, or + * filter; GROUPING exprs of this level are not allowed there. But + * check direct arguments as though they weren't in an aggregate. + */ + bool result; + + Assert(!context->in_agg_direct_args); + context->in_agg_direct_args = true; + result = finalize_grouping_exprs_walker((Node *) agg->aggdirectargs, + context); + context->in_agg_direct_args = false; + return result; + } + + /* + * We can skip recursing into aggregates of higher levels altogether, + * since they could not possibly contain exprs of concern to us (see + * transformAggregateCall). We do need to look at aggregates of lower + * levels, however. + */ + if ((int) agg->agglevelsup > context->sublevels_up) + return false; + } + + if (IsA(node, GroupingFunc)) + { + GroupingFunc *grp = (GroupingFunc *) node; + + /* + * We only need to check GroupingFunc nodes at the exact level to which + * they belong, since they cannot mix levels in arguments. + */ + + if ((int) grp->agglevelsup == context->sublevels_up) + { + ListCell *lc; + List *ref_list = NIL; + + foreach(lc, grp->args) + { + Node *expr = lfirst(lc); + Index ref = 0; + + if (context->root) + expr = flatten_join_alias_vars(context->root, expr); + + /* + * Each expression must match a grouping entry at the current + * query level. Unlike the general expression case, we don't + * allow functional dependencies or outer references. + */ + + if (IsA(expr, Var)) + { + Var *var = (Var *) expr; + + if (var->varlevelsup == context->sublevels_up) + { + foreach(gl, context->groupClauses) + { + TargetEntry *tle = lfirst(gl); + Var *gvar = (Var *) tle->expr; + + if (IsA(gvar, Var) && + gvar->varno == var->varno && + gvar->varattno == var->varattno && + gvar->varlevelsup == 0) + { + ref = tle->ressortgroupref; + break; + } + } + } + } + else if (context->have_non_var_grouping && + context->sublevels_up == 0) + { + foreach(gl, context->groupClauses) + { + TargetEntry *tle = lfirst(gl); + + if (equal(expr, tle->expr)) + { + ref = tle->ressortgroupref; + break; + } + } + } + + if (ref == 0) + ereport(ERROR, + (errcode(ERRCODE_GROUPING_ERROR), + errmsg("arguments to GROUPING must be grouping expressions of the associated query level"), + parser_errposition(context->pstate, + exprLocation(expr)))); + + ref_list = lappend_int(ref_list, ref); + } + + grp->refs = ref_list; + } + + if ((int) grp->agglevelsup > context->sublevels_up) + return false; + } + + if (IsA(node, Query)) + { + /* Recurse into subselects */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, + finalize_grouping_exprs_walker, + (void *) context, + 0); + context->sublevels_up--; + return result; + } + return expression_tree_walker(node, finalize_grouping_exprs_walker, + (void *) context); +} + + +/* + * Given a GroupingSet node, expand it and return a list of lists. + * + * For EMPTY nodes, return a list of one empty list. + * + * For SIMPLE nodes, return a list of one list, which is the node content. + * + * For CUBE and ROLLUP nodes, return a list of the expansions. + * + * For SET nodes, recursively expand contained CUBE and ROLLUP. + */ +static List* +expand_groupingset_node(GroupingSet *gs) +{ + List * result = NIL; + + switch (gs->kind) + { + case GROUPING_SET_EMPTY: + result = list_make1(NIL); + break; + + case GROUPING_SET_SIMPLE: + result = list_make1(gs->content); + break; + + case GROUPING_SET_ROLLUP: + { + List *rollup_val = gs->content; + ListCell *lc; + int curgroup_size = list_length(gs->content); + + while (curgroup_size > 0) + { + List *current_result = NIL; + int i = curgroup_size; + + foreach(lc, rollup_val) + { + GroupingSet *gs_current = (GroupingSet *) lfirst(lc); + + Assert(gs_current->kind == GROUPING_SET_SIMPLE); + + current_result + = list_concat(current_result, + list_copy(gs_current->content)); + + /* If we are done with making the current group, break */ + if (--i == 0) + break; + } + + result = lappend(result, current_result); + --curgroup_size; + } + + result = lappend(result, NIL); + } + break; + + case GROUPING_SET_CUBE: + { + List *cube_list = gs->content; + int number_bits = list_length(cube_list); + uint32 num_sets; + uint32 i; + + /* parser should cap this much lower */ + Assert(number_bits < 31); + + num_sets = (1U << number_bits); + + for (i = 0; i < num_sets; i++) + { + List *current_result = NIL; + ListCell *lc; + uint32 mask = 1U; + + foreach(lc, cube_list) + { + GroupingSet *gs_current = (GroupingSet *) lfirst(lc); + + Assert(gs_current->kind == GROUPING_SET_SIMPLE); + + if (mask & i) + { + current_result + = list_concat(current_result, + list_copy(gs_current->content)); + } + + mask <<= 1; + } + + result = lappend(result, current_result); + } + } + break; + + case GROUPING_SET_SETS: + { + ListCell *lc; + + foreach(lc, gs->content) + { + List *current_result = expand_groupingset_node(lfirst(lc)); + + result = list_concat(result, current_result); + } + } + break; + } + + return result; +} + +static int +cmp_list_len_asc(const void *a, const void *b) +{ + int la = list_length(*(List*const*)a); + int lb = list_length(*(List*const*)b); + return (la > lb) ? 1 : (la == lb) ? 0 : -1; +} + +/* + * Expand a groupingSets clause to a flat list of grouping sets. + * The returned list is sorted by length, shortest sets first. + * + * This is mainly for the planner, but we use it here too to do + * some consistency checks. + */ +List * +expand_grouping_sets(List *groupingSets, int limit) +{ + List *expanded_groups = NIL; + List *result = NIL; + double numsets = 1; + ListCell *lc; + + if (groupingSets == NIL) + return NIL; + + foreach(lc, groupingSets) + { + List *current_result = NIL; + GroupingSet *gs = lfirst(lc); + + current_result = expand_groupingset_node(gs); + + Assert(current_result != NIL); + + numsets *= list_length(current_result); + + if (limit >= 0 && numsets > limit) + return NIL; + + expanded_groups = lappend(expanded_groups, current_result); + } + + /* + * Do cartesian product between sublists of expanded_groups. + * While at it, remove any duplicate elements from individual + * grouping sets (we must NOT change the number of sets though) + */ + + foreach(lc, (List *) linitial(expanded_groups)) + { + result = lappend(result, list_union_int(NIL, (List *) lfirst(lc))); + } + + for_each_cell(lc, lnext(list_head(expanded_groups))) + { + List *p = lfirst(lc); + List *new_result = NIL; + ListCell *lc2; + + foreach(lc2, result) + { + List *q = lfirst(lc2); + ListCell *lc3; + + foreach(lc3, p) + { + new_result = lappend(new_result, + list_union_int(q, (List *) lfirst(lc3))); + } + } + result = new_result; + } + + if (list_length(result) > 1) + { + int result_len = list_length(result); + List **buf = palloc(sizeof(List*) * result_len); + List **ptr = buf; + + foreach(lc, result) + { + *ptr++ = lfirst(lc); + } + + qsort(buf, result_len, sizeof(List*), cmp_list_len_asc); + + result = NIL; + ptr = buf; + + while (result_len-- > 0) + result = lappend(result, *ptr++); + + pfree(buf); + } + + return result; +} + /* * get_aggregate_argtypes * Identify the specific datatypes passed to an aggregate call. diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 6b1bbe57d0eeaaf7efbd61d2d5976d226219fd96..a90bcf40c9d948f8cad385672fda9e459d0035b2 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -15,6 +15,8 @@ #include "postgres.h" +#include "miscadmin.h" + #include "access/heapam.h" #include "catalog/catalog.h" #include "access/htup_details.h" @@ -43,7 +45,6 @@ #include "utils/rel.h" #include "utils/syscache.h" - /* Convenience macro for the most common makeNamespaceItem() case */ #define makeDefaultNSItem(rte) makeNamespaceItem(rte, true, true, false, true) @@ -1725,40 +1726,181 @@ findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist, return target_result; } +/*------------------------------------------------------------------------- + * Flatten out parenthesized sublists in grouping lists, and some cases + * of nested grouping sets. + * + * Inside a grouping set (ROLLUP, CUBE, or GROUPING SETS), we expect the + * content to be nested no more than 2 deep: i.e. ROLLUP((a,b),(c,d)) is + * ok, but ROLLUP((a,(b,c)),d) is flattened to ((a,b,c),d), which we then + * normalize to ((a,b,c),(d)). + * + * CUBE or ROLLUP can be nested inside GROUPING SETS (but not the reverse), + * and we leave that alone if we find it. But if we see GROUPING SETS inside + * GROUPING SETS, we can flatten and normalize as follows: + * GROUPING SETS (a, (b,c), GROUPING SETS ((c,d),(e)), (f,g)) + * becomes + * GROUPING SETS ((a), (b,c), (c,d), (e), (f,g)) + * + * This is per the spec's syntax transformations, but these are the only such + * transformations we do in parse analysis, so that queries retain the + * originally specified grouping set syntax for CUBE and ROLLUP as much as + * possible when deparsed. (Full expansion of the result into a list of + * grouping sets is left to the planner.) + * + * When we're done, the resulting list should contain only these possible + * elements: + * - an expression + * - a CUBE or ROLLUP with a list of expressions nested 2 deep + * - a GROUPING SET containing any of: + * - expression lists + * - empty grouping sets + * - CUBE or ROLLUP nodes with lists nested 2 deep + * The return is a new list, but doesn't deep-copy the old nodes except for + * GroupingSet nodes. + * + * As a side effect, flag whether the list has any GroupingSet nodes. + *------------------------------------------------------------------------- + */ +static Node * +flatten_grouping_sets(Node *expr, bool toplevel, bool *hasGroupingSets) +{ + /* just in case of pathological input */ + check_stack_depth(); + + if (expr == (Node *) NIL) + return (Node *) NIL; + + switch (expr->type) + { + case T_RowExpr: + { + RowExpr *r = (RowExpr *) expr; + if (r->row_format == COERCE_IMPLICIT_CAST) + return flatten_grouping_sets((Node *) r->args, + false, NULL); + } + break; + case T_GroupingSet: + { + GroupingSet *gset = (GroupingSet *) expr; + ListCell *l2; + List *result_set = NIL; + + if (hasGroupingSets) + *hasGroupingSets = true; + + /* + * at the top level, we skip over all empty grouping sets; the + * caller can supply the canonical GROUP BY () if nothing is left. + */ + + if (toplevel && gset->kind == GROUPING_SET_EMPTY) + return (Node *) NIL; + + foreach(l2, gset->content) + { + Node *n2 = flatten_grouping_sets(lfirst(l2), false, NULL); + + result_set = lappend(result_set, n2); + } + + /* + * At top level, keep the grouping set node; but if we're in a nested + * grouping set, then we need to concat the flattened result into the + * outer list if it's simply nested. + */ + + if (toplevel || (gset->kind != GROUPING_SET_SETS)) + { + return (Node *) makeGroupingSet(gset->kind, result_set, gset->location); + } + else + return (Node *) result_set; + } + case T_List: + { + List *result = NIL; + ListCell *l; + + foreach(l, (List *)expr) + { + Node *n = flatten_grouping_sets(lfirst(l), toplevel, hasGroupingSets); + if (n != (Node *) NIL) + { + if (IsA(n,List)) + result = list_concat(result, (List *) n); + else + result = lappend(result, n); + } + } + + return (Node *) result; + } + default: + break; + } + + return expr; +} + /* - * transformGroupClause - - * transform a GROUP BY clause + * Transform a single expression within a GROUP BY clause or grouping set. * - * GROUP BY items will be added to the targetlist (as resjunk columns) - * if not already present, so the targetlist must be passed by reference. + * The expression is added to the targetlist if not already present, and to the + * flatresult list (which will become the groupClause) if not already present + * there. The sortClause is consulted for operator and sort order hints. * - * This is also used for window PARTITION BY clauses (which act almost the - * same, but are always interpreted per SQL99 rules). + * Returns the ressortgroupref of the expression. + * + * flatresult reference to flat list of SortGroupClause nodes + * seen_local bitmapset of sortgrouprefs already seen at the local level + * pstate ParseState + * gexpr node to transform + * targetlist reference to TargetEntry list + * sortClause ORDER BY clause (SortGroupClause nodes) + * exprKind expression kind + * useSQL99 SQL99 rather than SQL92 syntax + * toplevel false if within any grouping set */ -List * -transformGroupClause(ParseState *pstate, List *grouplist, - List **targetlist, List *sortClause, - ParseExprKind exprKind, bool useSQL99) +static Index +transformGroupClauseExpr(List **flatresult, Bitmapset *seen_local, + ParseState *pstate, Node *gexpr, + List **targetlist, List *sortClause, + ParseExprKind exprKind, bool useSQL99, bool toplevel) { - List *result = NIL; - ListCell *gl; + TargetEntry *tle; + bool found = false; + + if (useSQL99) + tle = findTargetlistEntrySQL99(pstate, gexpr, + targetlist, exprKind); + else + tle = findTargetlistEntrySQL92(pstate, gexpr, + targetlist, exprKind); - foreach(gl, grouplist) + if (tle->ressortgroupref > 0) { - Node *gexpr = (Node *) lfirst(gl); - TargetEntry *tle; - bool found = false; + ListCell *sl; - if (useSQL99) - tle = findTargetlistEntrySQL99(pstate, gexpr, - targetlist, exprKind); - else - tle = findTargetlistEntrySQL92(pstate, gexpr, - targetlist, exprKind); + /* + * Eliminate duplicates (GROUP BY x, x) but only at local level. + * (Duplicates in grouping sets can affect the number of returned + * rows, so can't be dropped indiscriminately.) + * + * Since we don't care about anything except the sortgroupref, + * we can use a bitmapset rather than scanning lists. + */ + if (bms_is_member(tle->ressortgroupref,seen_local)) + return 0; - /* Eliminate duplicates (GROUP BY x, x) */ - if (targetIsInSortList(tle, InvalidOid, result)) - continue; + /* + * If we're already in the flat clause list, we don't need + * to consider adding ourselves again. + */ + found = targetIsInSortList(tle, InvalidOid, *flatresult); + if (found) + return tle->ressortgroupref; /* * If the GROUP BY tlist entry also appears in ORDER BY, copy operator @@ -1770,35 +1912,308 @@ transformGroupClause(ParseState *pstate, List *grouplist, * sort step, and it allows the user to choose the equality semantics * used by GROUP BY, should she be working with a datatype that has * more than one equality operator. + * + * If we're in a grouping set, though, we force our requested ordering + * to be NULLS LAST, because if we have any hope of using a sorted agg + * for the job, we're going to be tacking on generated NULL values + * after the corresponding groups. If the user demands nulls first, + * another sort step is going to be inevitable, but that's the + * planner's problem. */ - if (tle->ressortgroupref > 0) + + foreach(sl, sortClause) { - ListCell *sl; + SortGroupClause *sc = (SortGroupClause *) lfirst(sl); - foreach(sl, sortClause) + if (sc->tleSortGroupRef == tle->ressortgroupref) { - SortGroupClause *sc = (SortGroupClause *) lfirst(sl); + SortGroupClause *grpc = copyObject(sc); + if (!toplevel) + grpc->nulls_first = false; + *flatresult = lappend(*flatresult, grpc); + found = true; + break; + } + } + } - if (sc->tleSortGroupRef == tle->ressortgroupref) - { - result = lappend(result, copyObject(sc)); - found = true; + /* + * If no match in ORDER BY, just add it to the result using default + * sort/group semantics. + */ + if (!found) + *flatresult = addTargetToGroupList(pstate, tle, + *flatresult, *targetlist, + exprLocation(gexpr), + true); + + /* + * _something_ must have assigned us a sortgroupref by now... + */ + + return tle->ressortgroupref; +} + +/* + * Transform a list of expressions within a GROUP BY clause or grouping set. + * + * The list of expressions belongs to a single clause within which duplicates + * can be safely eliminated. + * + * Returns an integer list of ressortgroupref values. + * + * flatresult reference to flat list of SortGroupClause nodes + * pstate ParseState + * list nodes to transform + * targetlist reference to TargetEntry list + * sortClause ORDER BY clause (SortGroupClause nodes) + * exprKind expression kind + * useSQL99 SQL99 rather than SQL92 syntax + * toplevel false if within any grouping set + */ +static List * +transformGroupClauseList(List **flatresult, + ParseState *pstate, List *list, + List **targetlist, List *sortClause, + ParseExprKind exprKind, bool useSQL99, bool toplevel) +{ + Bitmapset *seen_local = NULL; + List *result = NIL; + ListCell *gl; + + foreach(gl, list) + { + Node *gexpr = (Node *) lfirst(gl); + + Index ref = transformGroupClauseExpr(flatresult, + seen_local, + pstate, + gexpr, + targetlist, + sortClause, + exprKind, + useSQL99, + toplevel); + if (ref > 0) + { + seen_local = bms_add_member(seen_local, ref); + result = lappend_int(result, ref); + } + } + + return result; +} + +/* + * Transform a grouping set and (recursively) its content. + * + * The grouping set might be a GROUPING SETS node with other grouping sets + * inside it, but SETS within SETS have already been flattened out before + * reaching here. + * + * Returns the transformed node, which now contains SIMPLE nodes with lists + * of ressortgrouprefs rather than expressions. + * + * flatresult reference to flat list of SortGroupClause nodes + * pstate ParseState + * gset grouping set to transform + * targetlist reference to TargetEntry list + * sortClause ORDER BY clause (SortGroupClause nodes) + * exprKind expression kind + * useSQL99 SQL99 rather than SQL92 syntax + * toplevel false if within any grouping set + */ +static Node * +transformGroupingSet(List **flatresult, + ParseState *pstate, GroupingSet *gset, + List **targetlist, List *sortClause, + ParseExprKind exprKind, bool useSQL99, bool toplevel) +{ + ListCell *gl; + List *content = NIL; + + Assert(toplevel || gset->kind != GROUPING_SET_SETS); + + foreach(gl, gset->content) + { + Node *n = lfirst(gl); + + if (IsA(n, List)) + { + List *l = transformGroupClauseList(flatresult, + pstate, (List *) n, + targetlist, sortClause, + exprKind, useSQL99, false); + + content = lappend(content, makeGroupingSet(GROUPING_SET_SIMPLE, + l, + exprLocation(n))); + } + else if (IsA(n, GroupingSet)) + { + GroupingSet *gset2 = (GroupingSet *) lfirst(gl); + + content = lappend(content, transformGroupingSet(flatresult, + pstate, gset2, + targetlist, sortClause, + exprKind, useSQL99, false)); + } + else + { + Index ref = transformGroupClauseExpr(flatresult, + NULL, + pstate, + n, + targetlist, + sortClause, + exprKind, + useSQL99, + false); + + content = lappend(content, makeGroupingSet(GROUPING_SET_SIMPLE, + list_make1_int(ref), + exprLocation(n))); + } + } + + /* Arbitrarily cap the size of CUBE, which has exponential growth */ + if (gset->kind == GROUPING_SET_CUBE) + { + if (list_length(content) > 12) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("CUBE is limited to 12 elements"), + parser_errposition(pstate, gset->location))); + } + + return (Node *) makeGroupingSet(gset->kind, content, gset->location); +} + + +/* + * transformGroupClause - + * transform a GROUP BY clause + * + * GROUP BY items will be added to the targetlist (as resjunk columns) + * if not already present, so the targetlist must be passed by reference. + * + * This is also used for window PARTITION BY clauses (which act almost the + * same, but are always interpreted per SQL99 rules). + * + * Grouping sets make this a lot more complex than it was. Our goal here is + * twofold: we make a flat list of SortGroupClause nodes referencing each + * distinct expression used for grouping, with those expressions added to the + * targetlist if needed. At the same time, we build the groupingSets tree, + * which stores only ressortgrouprefs as integer lists inside GroupingSet nodes + * (possibly nested, but limited in depth: a GROUPING_SET_SETS node can contain + * nested SIMPLE, CUBE or ROLLUP nodes, but not more sets - we flatten that + * out; while CUBE and ROLLUP can contain only SIMPLE nodes). + * + * We skip much of the hard work if there are no grouping sets. + * + * One subtlety is that the groupClause list can end up empty while the + * groupingSets list is not; this happens if there are only empty grouping + * sets, or an explicit GROUP BY (). This has the same effect as specifying + * aggregates or a HAVING clause with no GROUP BY; the output is one row per + * grouping set even if the input is empty. + * + * Returns the transformed (flat) groupClause. + * + * pstate ParseState + * grouplist clause to transform + * groupingSets reference to list to contain the grouping set tree + * targetlist reference to TargetEntry list + * sortClause ORDER BY clause (SortGroupClause nodes) + * exprKind expression kind + * useSQL99 SQL99 rather than SQL92 syntax + */ +List * +transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets, + List **targetlist, List *sortClause, + ParseExprKind exprKind, bool useSQL99) +{ + List *result = NIL; + List *flat_grouplist; + List *gsets = NIL; + ListCell *gl; + bool hasGroupingSets = false; + Bitmapset *seen_local = NULL; + + /* + * Recursively flatten implicit RowExprs. (Technically this is only + * needed for GROUP BY, per the syntax rules for grouping sets, but + * we do it anyway.) + */ + flat_grouplist = (List *) flatten_grouping_sets((Node *) grouplist, + true, + &hasGroupingSets); + + /* + * If the list is now empty, but hasGroupingSets is true, it's because + * we elided redundant empty grouping sets. Restore a single empty + * grouping set to leave a canonical form: GROUP BY () + */ + + if (flat_grouplist == NIL && hasGroupingSets) + { + flat_grouplist = list_make1(makeGroupingSet(GROUPING_SET_EMPTY, + NIL, + exprLocation((Node *) grouplist))); + } + + foreach(gl, flat_grouplist) + { + Node *gexpr = (Node *) lfirst(gl); + + if (IsA(gexpr, GroupingSet)) + { + GroupingSet *gset = (GroupingSet *) gexpr; + + switch (gset->kind) + { + case GROUPING_SET_EMPTY: + gsets = lappend(gsets, gset); + break; + case GROUPING_SET_SIMPLE: + /* can't happen */ + Assert(false); + break; + case GROUPING_SET_SETS: + case GROUPING_SET_CUBE: + case GROUPING_SET_ROLLUP: + gsets = lappend(gsets, + transformGroupingSet(&result, + pstate, gset, + targetlist, sortClause, + exprKind, useSQL99, true)); break; - } } } + else + { + Index ref = transformGroupClauseExpr(&result, seen_local, + pstate, gexpr, + targetlist, sortClause, + exprKind, useSQL99, true); - /* - * If no match in ORDER BY, just add it to the result using default - * sort/group semantics. - */ - if (!found) - result = addTargetToGroupList(pstate, tle, - result, *targetlist, - exprLocation(gexpr), - true); + if (ref > 0) + { + seen_local = bms_add_member(seen_local, ref); + if (hasGroupingSets) + gsets = lappend(gsets, + makeGroupingSet(GROUPING_SET_SIMPLE, + list_make1_int(ref), + exprLocation(gexpr))); + } + } } + /* parser should prevent this */ + Assert(gsets == NIL || groupingSets != NULL); + + if (groupingSets) + *groupingSets = gsets; + return result; } @@ -1903,6 +2318,7 @@ transformWindowDefinitions(ParseState *pstate, true /* force SQL99 rules */ ); partitionClause = transformGroupClause(pstate, windef->partitionClause, + NULL, targetlist, orderClause, EXPR_KIND_WINDOW_PARTITION, diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index f759606f88bc9c53a5527c60cdcb3d75fbc5cc10..0ff46dd457c7938bbd7ad022ea5a5f2cf115eb62 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -32,6 +32,7 @@ #include "parser/parse_relation.h" #include "parser/parse_target.h" #include "parser/parse_type.h" +#include "parser/parse_agg.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/xml.h" @@ -269,6 +270,10 @@ transformExprRecurse(ParseState *pstate, Node *expr) result = transformMultiAssignRef(pstate, (MultiAssignRef *) expr); break; + case T_GroupingFunc: + result = transformGroupingFunc(pstate, (GroupingFunc *) expr); + break; + case T_NamedArgExpr: { NamedArgExpr *na = (NamedArgExpr *) expr; diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 59973ba9c3c3403612bfd33239cf57825a882477..1b3fcd629c109194bb8eb6831407192a7a7914de 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -1681,6 +1681,10 @@ FigureColnameInternal(Node *node, char **name) break; case T_CollateClause: return FigureColnameInternal(((CollateClause *) node)->arg, name); + case T_GroupingFunc: + /* make GROUPING() act like a regular function */ + *name = "grouping"; + return 2; case T_SubLink: switch (((SubLink *) node)->subLinkType) { diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index e27afd1a3e05dfdfd110dcc3c7863148f4cc5698..e15e23c2e1fa5614dd1a3f3e924539e7ba0f5a27 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -2158,7 +2158,7 @@ view_query_is_auto_updatable(Query *viewquery, bool check_cols) if (viewquery->distinctClause != NIL) return gettext_noop("Views containing DISTINCT are not automatically updatable."); - if (viewquery->groupClause != NIL) + if (viewquery->groupClause != NIL || viewquery->groupingSets) return gettext_noop("Views containing GROUP BY are not automatically updatable."); if (viewquery->havingQual != NULL) diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index a9c6e626ba787d7a9e1f4ed5d4c126e3e1fc7516..e3dfdefe55cd6b226b7e0b31369dbd4cbc206224 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -92,6 +92,12 @@ contain_aggs_of_level_walker(Node *node, return true; /* abort the tree traversal and return true */ /* else fall through to examine argument */ } + if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup == context->sublevels_up) + return true; + /* else fall through to examine argument */ + } if (IsA(node, Query)) { /* Recurse into subselects */ @@ -157,6 +163,15 @@ locate_agg_of_level_walker(Node *node, } /* else fall through to examine argument */ } + if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup == context->sublevels_up && + ((GroupingFunc *) node)->location >= 0) + { + context->agg_location = ((GroupingFunc *) node)->location; + return true; /* abort the tree traversal and return true */ + } + } if (IsA(node, Query)) { /* Recurse into subselects */ @@ -712,6 +727,14 @@ IncrementVarSublevelsUp_walker(Node *node, agg->agglevelsup += context->delta_sublevels_up; /* fall through to recurse into argument */ } + if (IsA(node, GroupingFunc)) + { + GroupingFunc *grp = (GroupingFunc *) node; + + if (grp->agglevelsup >= context->min_sublevels_up) + grp->agglevelsup += context->delta_sublevels_up; + /* fall through to recurse into argument */ + } if (IsA(node, PlaceHolderVar)) { PlaceHolderVar *phv = (PlaceHolderVar *) node; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 298eebf5e67650f500d0992fd4d519ac6d4bcb3f..0a77400a80184f4017618200f00d58300a274dba 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -44,6 +44,7 @@ #include "nodes/nodeFuncs.h" #include "optimizer/tlist.h" #include "parser/keywords.h" +#include "parser/parse_node.h" #include "parser/parse_agg.h" #include "parser/parse_func.h" #include "parser/parse_oper.h" @@ -105,6 +106,8 @@ typedef struct int wrapColumn; /* max line length, or -1 for no limit */ int indentLevel; /* current indent level for prettyprint */ bool varprefix; /* TRUE to print prefixes on Vars */ + ParseExprKind special_exprkind; /* set only for exprkinds needing */ + /* special handling */ } deparse_context; /* @@ -369,9 +372,11 @@ static void get_target_list(List *targetList, deparse_context *context, static void get_setop_query(Node *setOp, Query *query, deparse_context *context, TupleDesc resultDesc); -static Node *get_rule_sortgroupclause(SortGroupClause *srt, List *tlist, +static Node *get_rule_sortgroupclause(Index ref, List *tlist, bool force_colno, deparse_context *context); +static void get_rule_groupingset(GroupingSet *gset, List *targetlist, + bool omit_parens, deparse_context *context); static void get_rule_orderby(List *orderList, List *targetList, bool force_colno, deparse_context *context); static void get_rule_windowclause(Query *query, deparse_context *context); @@ -419,8 +424,9 @@ static void printSubscripts(ArrayRef *aref, deparse_context *context); static char *get_relation_name(Oid relid); static char *generate_relation_name(Oid relid, List *namespaces); static char *generate_function_name(Oid funcid, int nargs, - List *argnames, Oid *argtypes, - bool has_variadic, bool *use_variadic_p); + List *argnames, Oid *argtypes, + bool has_variadic, bool *use_variadic_p, + ParseExprKind special_exprkind); static char *generate_operator_name(Oid operid, Oid arg1, Oid arg2); static text *string_to_text(char *str); static char *flatten_reloptions(Oid relid); @@ -878,6 +884,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) context.prettyFlags = pretty ? PRETTYFLAG_PAREN | PRETTYFLAG_INDENT : PRETTYFLAG_INDENT; context.wrapColumn = WRAP_COLUMN_DEFAULT; context.indentLevel = PRETTYINDENT_STD; + context.special_exprkind = EXPR_KIND_NONE; get_rule_expr(qual, &context, false); @@ -887,7 +894,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) appendStringInfo(&buf, "EXECUTE PROCEDURE %s(", generate_function_name(trigrec->tgfoid, 0, NIL, NULL, - false, NULL)); + false, NULL, EXPR_KIND_NONE)); if (trigrec->tgnargs > 0) { @@ -2502,6 +2509,7 @@ deparse_expression_pretty(Node *expr, List *dpcontext, context.prettyFlags = prettyFlags; context.wrapColumn = WRAP_COLUMN_DEFAULT; context.indentLevel = startIndent; + context.special_exprkind = EXPR_KIND_NONE; get_rule_expr(expr, &context, showimplicit); @@ -4112,6 +4120,7 @@ make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc, context.prettyFlags = prettyFlags; context.wrapColumn = WRAP_COLUMN_DEFAULT; context.indentLevel = PRETTYINDENT_STD; + context.special_exprkind = EXPR_KIND_NONE; set_deparse_for_query(&dpns, query, NIL); @@ -4307,6 +4316,7 @@ get_query_def(Query *query, StringInfo buf, List *parentnamespace, context.prettyFlags = prettyFlags; context.wrapColumn = wrapColumn; context.indentLevel = startIndent; + context.special_exprkind = EXPR_KIND_NONE; set_deparse_for_query(&dpns, query, parentnamespace); @@ -4677,7 +4687,7 @@ get_basic_select_query(Query *query, deparse_context *context, SortGroupClause *srt = (SortGroupClause *) lfirst(l); appendStringInfoString(buf, sep); - get_rule_sortgroupclause(srt, query->targetList, + get_rule_sortgroupclause(srt->tleSortGroupRef, query->targetList, false, context); sep = ", "; } @@ -4702,20 +4712,43 @@ get_basic_select_query(Query *query, deparse_context *context, } /* Add the GROUP BY clause if given */ - if (query->groupClause != NULL) + if (query->groupClause != NULL || query->groupingSets != NULL) { + ParseExprKind save_exprkind; + appendContextKeyword(context, " GROUP BY ", -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); - sep = ""; - foreach(l, query->groupClause) + + save_exprkind = context->special_exprkind; + context->special_exprkind = EXPR_KIND_GROUP_BY; + + if (query->groupingSets == NIL) { - SortGroupClause *grp = (SortGroupClause *) lfirst(l); + sep = ""; + foreach(l, query->groupClause) + { + SortGroupClause *grp = (SortGroupClause *) lfirst(l); - appendStringInfoString(buf, sep); - get_rule_sortgroupclause(grp, query->targetList, - false, context); - sep = ", "; + appendStringInfoString(buf, sep); + get_rule_sortgroupclause(grp->tleSortGroupRef, query->targetList, + false, context); + sep = ", "; + } } + else + { + sep = ""; + foreach(l, query->groupingSets) + { + GroupingSet *grp = lfirst(l); + + appendStringInfoString(buf, sep); + get_rule_groupingset(grp, query->targetList, true, context); + sep = ", "; + } + } + + context->special_exprkind = save_exprkind; } /* Add the HAVING clause if given */ @@ -4782,7 +4815,7 @@ get_target_list(List *targetList, deparse_context *context, * different from a whole-row Var). We need to call get_variable * directly so that we can tell it to do the right thing. */ - if (tle->expr && IsA(tle->expr, Var)) + if (tle->expr && (IsA(tle->expr, Var))) { attname = get_variable((Var *) tle->expr, 0, true, context); } @@ -5001,23 +5034,24 @@ get_setop_query(Node *setOp, Query *query, deparse_context *context, * Also returns the expression tree, so caller need not find it again. */ static Node * -get_rule_sortgroupclause(SortGroupClause *srt, List *tlist, bool force_colno, +get_rule_sortgroupclause(Index ref, List *tlist, bool force_colno, deparse_context *context) { StringInfo buf = context->buf; TargetEntry *tle; Node *expr; - tle = get_sortgroupclause_tle(srt, tlist); + tle = get_sortgroupref_tle(ref, tlist); expr = (Node *) tle->expr; /* - * Use column-number form if requested by caller. Otherwise, if - * expression is a constant, force it to be dumped with an explicit cast - * as decoration --- this is because a simple integer constant is - * ambiguous (and will be misinterpreted by findTargetlistEntry()) if we - * dump it without any decoration. Otherwise, just dump the expression - * normally. + * Use column-number form if requested by caller. Otherwise, if expression + * is a constant, force it to be dumped with an explicit cast as decoration + * --- this is because a simple integer constant is ambiguous (and will be + * misinterpreted by findTargetlistEntry()) if we dump it without any + * decoration. If it's anything more complex than a simple Var, then force + * extra parens around it, to ensure it can't be misinterpreted as a cube() + * or rollup() construct. */ if (force_colno) { @@ -5026,12 +5060,91 @@ get_rule_sortgroupclause(SortGroupClause *srt, List *tlist, bool force_colno, } else if (expr && IsA(expr, Const)) get_const_expr((Const *) expr, context, 1); + else if (!expr || IsA(expr, Var)) + get_rule_expr(expr, context, true); else + { + /* + * We must force parens for function-like expressions even if + * PRETTY_PAREN is off, since those are the ones in danger of + * misparsing. For other expressions we need to force them + * only if PRETTY_PAREN is on, since otherwise the expression + * will output them itself. (We can't skip the parens.) + */ + bool need_paren = (PRETTY_PAREN(context) + || IsA(expr, FuncExpr) + || IsA(expr, Aggref) + || IsA(expr, WindowFunc)); + if (need_paren) + appendStringInfoString(context->buf, "("); get_rule_expr(expr, context, true); + if (need_paren) + appendStringInfoString(context->buf, ")"); + } return expr; } +/* + * Display a GroupingSet + */ +static void +get_rule_groupingset(GroupingSet *gset, List *targetlist, + bool omit_parens, deparse_context *context) +{ + ListCell *l; + StringInfo buf = context->buf; + bool omit_child_parens = true; + char *sep = ""; + + switch (gset->kind) + { + case GROUPING_SET_EMPTY: + appendStringInfoString(buf, "()"); + return; + + case GROUPING_SET_SIMPLE: + { + if (!omit_parens || list_length(gset->content) != 1) + appendStringInfoString(buf, "("); + + foreach(l, gset->content) + { + Index ref = lfirst_int(l); + + appendStringInfoString(buf, sep); + get_rule_sortgroupclause(ref, targetlist, + false, context); + sep = ", "; + } + + if (!omit_parens || list_length(gset->content) != 1) + appendStringInfoString(buf, ")"); + } + return; + + case GROUPING_SET_ROLLUP: + appendStringInfoString(buf, "ROLLUP("); + break; + case GROUPING_SET_CUBE: + appendStringInfoString(buf, "CUBE("); + break; + case GROUPING_SET_SETS: + appendStringInfoString(buf, "GROUPING SETS ("); + omit_child_parens = false; + break; + } + + foreach(l, gset->content) + { + appendStringInfoString(buf, sep); + get_rule_groupingset(lfirst(l), targetlist, omit_child_parens, context); + sep = ", "; + } + + appendStringInfoString(buf, ")"); +} + /* * Display an ORDER BY list. */ @@ -5052,7 +5165,7 @@ get_rule_orderby(List *orderList, List *targetList, TypeCacheEntry *typentry; appendStringInfoString(buf, sep); - sortexpr = get_rule_sortgroupclause(srt, targetList, + sortexpr = get_rule_sortgroupclause(srt->tleSortGroupRef, targetList, force_colno, context); sortcoltype = exprType(sortexpr); /* See whether operator is default < or > for datatype */ @@ -5152,7 +5265,7 @@ get_rule_windowspec(WindowClause *wc, List *targetList, SortGroupClause *grp = (SortGroupClause *) lfirst(l); appendStringInfoString(buf, sep); - get_rule_sortgroupclause(grp, targetList, + get_rule_sortgroupclause(grp->tleSortGroupRef, targetList, false, context); sep = ", "; } @@ -6879,6 +6992,16 @@ get_rule_expr(Node *node, deparse_context *context, get_agg_expr((Aggref *) node, context); break; + case T_GroupingFunc: + { + GroupingFunc *gexpr = (GroupingFunc *) node; + + appendStringInfoString(buf, "GROUPING("); + get_rule_expr((Node *) gexpr->args, context, true); + appendStringInfoChar(buf, ')'); + } + break; + case T_WindowFunc: get_windowfunc_expr((WindowFunc *) node, context); break; @@ -7917,7 +8040,8 @@ get_func_expr(FuncExpr *expr, deparse_context *context, generate_function_name(funcoid, nargs, argnames, argtypes, expr->funcvariadic, - &use_variadic)); + &use_variadic, + context->special_exprkind)); nargs = 0; foreach(l, expr->args) { @@ -7949,7 +8073,8 @@ get_agg_expr(Aggref *aggref, deparse_context *context) generate_function_name(aggref->aggfnoid, nargs, NIL, argtypes, aggref->aggvariadic, - &use_variadic), + &use_variadic, + context->special_exprkind), (aggref->aggdistinct != NIL) ? "DISTINCT " : ""); if (AGGKIND_IS_ORDERED_SET(aggref->aggkind)) @@ -8039,7 +8164,8 @@ get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context) appendStringInfo(buf, "%s(", generate_function_name(wfunc->winfnoid, nargs, argnames, argtypes, - false, NULL)); + false, NULL, + context->special_exprkind)); /* winstar can be set only in zero-argument aggregates */ if (wfunc->winstar) appendStringInfoChar(buf, '*'); @@ -9291,7 +9417,8 @@ generate_relation_name(Oid relid, List *namespaces) */ static char * generate_function_name(Oid funcid, int nargs, List *argnames, Oid *argtypes, - bool has_variadic, bool *use_variadic_p) + bool has_variadic, bool *use_variadic_p, + ParseExprKind special_exprkind) { char *result; HeapTuple proctup; @@ -9306,6 +9433,7 @@ generate_function_name(Oid funcid, int nargs, List *argnames, Oid *argtypes, int p_nvargs; Oid p_vatype; Oid *p_true_typeids; + bool force_qualify = false; proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); if (!HeapTupleIsValid(proctup)) @@ -9313,6 +9441,16 @@ generate_function_name(Oid funcid, int nargs, List *argnames, Oid *argtypes, procform = (Form_pg_proc) GETSTRUCT(proctup); proname = NameStr(procform->proname); + /* + * Due to parser hacks to avoid needing to reserve CUBE, we need to force + * qualification in some special cases. + */ + if (special_exprkind == EXPR_KIND_GROUP_BY) + { + if (strcmp(proname, "cube") == 0 || strcmp(proname, "rollup") == 0) + force_qualify = true; + } + /* * Determine whether VARIADIC should be printed. We must do this first * since it affects the lookup rules in func_get_detail(). @@ -9344,14 +9482,23 @@ generate_function_name(Oid funcid, int nargs, List *argnames, Oid *argtypes, /* * The idea here is to schema-qualify only if the parser would fail to * resolve the correct function given the unqualified func name with the - * specified argtypes and VARIADIC flag. + * specified argtypes and VARIADIC flag. But if we already decided to + * force qualification, then we can skip the lookup and pretend we didn't + * find it. */ - p_result = func_get_detail(list_make1(makeString(proname)), - NIL, argnames, nargs, argtypes, - !use_variadic, true, - &p_funcid, &p_rettype, - &p_retset, &p_nvargs, &p_vatype, - &p_true_typeids, NULL); + if (!force_qualify) + p_result = func_get_detail(list_make1(makeString(proname)), + NIL, argnames, nargs, argtypes, + !use_variadic, true, + &p_funcid, &p_rettype, + &p_retset, &p_nvargs, &p_vatype, + &p_true_typeids, NULL); + else + { + p_result = FUNCDETAIL_NOTFOUND; + p_funcid = InvalidOid; + } + if ((p_result == FUNCDETAIL_NORMAL || p_result == FUNCDETAIL_AGGREGATE || p_result == FUNCDETAIL_WINDOWFUNC) && diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 91399f79fc80273c4978f4520abe566fa5dd8ed3..04ed07b762df62734d66bc8bc27796ac167c6d70 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3158,6 +3158,8 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, * groupExprs - list of expressions being grouped by * input_rows - number of rows estimated to arrive at the group/unique * filter step + * pgset - NULL, or a List** pointing to a grouping set to filter the + * groupExprs against * * Given the lack of any cross-correlation statistics in the system, it's * impossible to do anything really trustworthy with GROUP BY conditions @@ -3205,11 +3207,13 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, * but we don't have the info to do better). */ double -estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows) +estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, + List **pgset) { List *varinfos = NIL; double numdistinct; ListCell *l; + int i; /* * We don't ever want to return an estimate of zero groups, as that tends @@ -3224,7 +3228,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows) * for normal cases with GROUP BY or DISTINCT, but it is possible for * corner cases with set operations.) */ - if (groupExprs == NIL) + if (groupExprs == NIL || (pgset && list_length(*pgset) < 1)) return 1.0; /* @@ -3236,6 +3240,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows) */ numdistinct = 1.0; + i = 0; foreach(l, groupExprs) { Node *groupexpr = (Node *) lfirst(l); @@ -3243,6 +3248,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows) List *varshere; ListCell *l2; + /* is expression in this grouping set? */ + if (pgset && !list_member_int(*pgset, i++)) + continue; + /* Short-circuit for expressions returning boolean */ if (exprType(groupexpr) == BOOLOID) { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index bb8a9b08e8a0fa3919110c9d0561de0b252d0cea..9a10fadfb5866af85c07a576bd0fdba040fb5d5e 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201505152 +#define CATALOG_VERSION_NO 201505153 #endif diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index c9f722361a128f819504a6f62ec4d18699828958..4df44d024208dbca53ff3d8edd85511f90b21b87 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -83,6 +83,8 @@ extern void ExplainSeparatePlans(ExplainState *es); extern void ExplainPropertyList(const char *qlabel, List *data, ExplainState *es); +extern void ExplainPropertyListNested(const char *qlabel, List *data, + ExplainState *es); extern void ExplainPropertyText(const char *qlabel, const char *value, ExplainState *es); extern void ExplainPropertyInteger(const char *qlabel, int value, diff --git a/src/include/lib/bipartite_match.h b/src/include/lib/bipartite_match.h new file mode 100644 index 0000000000000000000000000000000000000000..c80f9bfdd0c941254f8e0596ee24f60fb483ce77 --- /dev/null +++ b/src/include/lib/bipartite_match.h @@ -0,0 +1,44 @@ +/* + * bipartite_match.h + * + * Copyright (c) 2015, PostgreSQL Global Development Group + * + * src/include/lib/bipartite_match.h + */ +#ifndef BIPARTITE_MATCH_H +#define BIPARTITE_MATCH_H + +/* + * Given a bipartite graph consisting of nodes U numbered 1..nU, nodes V + * numbered 1..nV, and an adjacency map of undirected edges in the form + * adjacency[u] = [n, v1, v2, v3, ... vn], we wish to find a "maximum + * cardinality matching", which is defined as follows: a matching is a subset + * of the original edges such that no node has more than one edge, and a + * matching has maximum cardinality if there exists no other matching with a + * greater number of edges. + * + * This matching has various applications in graph theory, but the motivating + * example here is Dilworth's theorem: a partially-ordered set can be divided + * into the minimum number of chains (i.e. subsets X where x1 < x2 < x3 ...) by + * a bipartite graph construction. This gives us a polynomial-time solution to + * the problem of planning a collection of grouping sets with the provably + * minimal number of sort operations. + */ +typedef struct bipartite_match_state +{ + int u_size; /* size of U */ + int v_size; /* size of V */ + int matching; /* number of edges in matching */ + short **adjacency; /* adjacency[u] = [n, v1,v2,v3,...,vn] */ + short *pair_uv; /* pair_uv[u] -> v */ + short *pair_vu; /* pair_vu[v] -> u */ + + float *distance; /* distance[u], float so we can have +inf */ + short *queue; /* queue storage for breadth search */ +} BipartiteMatchState; + +BipartiteMatchState *BipartiteMatch(int u_size, int v_size, short **adjacency); + +void BipartiteMatchFree(BipartiteMatchState *state); + +#endif /* BIPARTITE_MATCH_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 972368019a9440183fd1a1d6d4c8919290ed4a5a..0a92cc4efc9c6e1de59dc4a865f723d7e56e5bf2 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -23,6 +23,7 @@ #include "utils/reltrigger.h" #include "utils/sortsupport.h" #include "utils/tuplestore.h" +#include "utils/tuplesort.h" /* ---------------- @@ -614,6 +615,22 @@ typedef struct AggrefExprState int aggno; /* ID number for agg within its plan node */ } AggrefExprState; +/* ---------------- + * GroupingFuncExprState node + * + * The list of column numbers refers to the input tuples of the Agg node to + * which the GroupingFunc belongs, and may contain 0 for references to columns + * that are only present in grouping sets processed by different Agg nodes (and + * which are therefore always considered "grouping" here). + * ---------------- + */ +typedef struct GroupingFuncExprState +{ + ExprState xprstate; + struct AggState *aggstate; + List *clauses; /* integer list of column numbers */ +} GroupingFuncExprState; + /* ---------------- * WindowFuncExprState node * ---------------- @@ -1796,19 +1813,33 @@ typedef struct GroupState /* these structs are private in nodeAgg.c: */ typedef struct AggStatePerAggData *AggStatePerAgg; typedef struct AggStatePerGroupData *AggStatePerGroup; +typedef struct AggStatePerPhaseData *AggStatePerPhase; typedef struct AggState { ScanState ss; /* its first field is NodeTag */ List *aggs; /* all Aggref nodes in targetlist & quals */ int numaggs; /* length of list (could be zero!) */ - FmgrInfo *eqfunctions; /* per-grouping-field equality fns */ + AggStatePerPhase phase; /* pointer to current phase data */ + int numphases; /* number of phases */ + int current_phase; /* current phase number */ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ AggStatePerAgg peragg; /* per-Aggref information */ - MemoryContext aggcontext; /* memory context for long-lived data */ + ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */ ExprContext *tmpcontext; /* econtext for input expressions */ AggStatePerAgg curperagg; /* identifies currently active aggregate */ + bool input_done; /* indicates end of input */ bool agg_done; /* indicates completion of Agg scan */ + int projected_set; /* The last projected grouping set */ + int current_set; /* The current grouping set being evaluated */ + Bitmapset *grouped_cols; /* grouped cols in current projection */ + List *all_grouped_cols; /* list of all grouped cols in DESC order */ + /* These fields are for grouping set phase data */ + int maxsets; /* The max number of sets in any phase */ + AggStatePerPhase phases; /* array of all phases */ + Tuplesortstate *sort_in; /* sorted input to phases > 0 */ + Tuplesortstate *sort_out; /* input is copied here for next phase */ + TupleTableSlot *sort_slot; /* slot for sort results */ /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ HeapTuple grp_firstTuple; /* copy of first tuple of current group */ diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h index 4dff6a09014f097f5f3a3a2186639365346560cb..01d9fedb54f78a4d029043c01d3b1dca20842741 100644 --- a/src/include/nodes/makefuncs.h +++ b/src/include/nodes/makefuncs.h @@ -81,4 +81,6 @@ extern DefElem *makeDefElem(char *name, Node *arg); extern DefElem *makeDefElemExtended(char *nameSpace, char *name, Node *arg, DefElemAction defaction); +extern GroupingSet *makeGroupingSet(GroupingSetKind kind, List *content, int location); + #endif /* MAKEFUNC_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 8b275f6e263e445555123fb12a3fb3f47b436416..669a0afa09c94e535d897cc36b1ad00bf3ed5329 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -136,6 +136,7 @@ typedef enum NodeTag T_Const, T_Param, T_Aggref, + T_GroupingFunc, T_WindowFunc, T_ArrayRef, T_FuncExpr, @@ -188,6 +189,7 @@ typedef enum NodeTag T_GenericExprState, T_WholeRowVarExprState, T_AggrefExprState, + T_GroupingFuncExprState, T_WindowFuncExprState, T_ArrayRefExprState, T_FuncExprState, @@ -406,6 +408,7 @@ typedef enum NodeTag T_RangeTblFunction, T_WithCheckOption, T_SortGroupClause, + T_GroupingSet, T_WindowClause, T_PrivGrantee, T_FuncWithArgs, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 6723f46f3f1cfa1e905e04c6b654609440fef9fc..23190e1af05112ab5b05e62c99ee168c25db3429 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -138,6 +138,8 @@ typedef struct Query List *groupClause; /* a list of SortGroupClause's */ + List *groupingSets; /* a list of GroupingSet's if present */ + Node *havingQual; /* qualifications applied to groups */ List *windowClause; /* a list of WindowClause's */ @@ -1001,6 +1003,73 @@ typedef struct SortGroupClause bool hashable; /* can eqop be implemented by hashing? */ } SortGroupClause; +/* + * GroupingSet - + * representation of CUBE, ROLLUP and GROUPING SETS clauses + * + * In a Query with grouping sets, the groupClause contains a flat list of + * SortGroupClause nodes for each distinct expression used. The actual + * structure of the GROUP BY clause is given by the groupingSets tree. + * + * In the raw parser output, GroupingSet nodes (of all types except SIMPLE + * which is not used) are potentially mixed in with the expressions in the + * groupClause of the SelectStmt. (An expression can't contain a GroupingSet, + * but a list may mix GroupingSet and expression nodes.) At this stage, the + * content of each node is a list of expressions, some of which may be RowExprs + * which represent sublists rather than actual row constructors, and nested + * GroupingSet nodes where legal in the grammar. The structure directly + * reflects the query syntax. + * + * In parse analysis, the transformed expressions are used to build the tlist + * and groupClause list (of SortGroupClause nodes), and the groupingSets tree + * is eventually reduced to a fixed format: + * + * EMPTY nodes represent (), and obviously have no content + * + * SIMPLE nodes represent a list of one or more expressions to be treated as an + * atom by the enclosing structure; the content is an integer list of + * ressortgroupref values (see SortGroupClause) + * + * CUBE and ROLLUP nodes contain a list of one or more SIMPLE nodes. + * + * SETS nodes contain a list of EMPTY, SIMPLE, CUBE or ROLLUP nodes, but after + * parse analysis they cannot contain more SETS nodes; enough of the syntactic + * transforms of the spec have been applied that we no longer have arbitrarily + * deep nesting (though we still preserve the use of cube/rollup). + * + * Note that if the groupingSets tree contains no SIMPLE nodes (only EMPTY + * nodes at the leaves), then the groupClause will be empty, but this is still + * an aggregation query (similar to using aggs or HAVING without GROUP BY). + * + * As an example, the following clause: + * + * GROUP BY GROUPING SETS ((a,b), CUBE(c,(d,e))) + * + * looks like this after raw parsing: + * + * SETS( RowExpr(a,b) , CUBE( c, RowExpr(d,e) ) ) + * + * and parse analysis converts it to: + * + * SETS( SIMPLE(1,2), CUBE( SIMPLE(3), SIMPLE(4,5) ) ) + */ +typedef enum +{ + GROUPING_SET_EMPTY, + GROUPING_SET_SIMPLE, + GROUPING_SET_ROLLUP, + GROUPING_SET_CUBE, + GROUPING_SET_SETS +} GroupingSetKind; + +typedef struct GroupingSet +{ + NodeTag type; + GroupingSetKind kind; + List *content; + int location; +} GroupingSet; + /* * WindowClause - * transformed representation of WINDOW and OVER clauses diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index a17500082b1ae2bd69f7661a65fed57bbf21e142..729456d6e5d12c3ebec5efe70c050408d898e96f 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -229,8 +229,9 @@ extern List *list_union_int(const List *list1, const List *list2); extern List *list_union_oid(const List *list1, const List *list2); extern List *list_intersection(const List *list1, const List *list2); +extern List *list_intersection_int(const List *list1, const List *list2); -/* currently, there's no need for list_intersection_int etc */ +/* currently, there's no need for list_intersection_ptr etc */ extern List *list_difference(const List *list1, const List *list2); extern List *list_difference_ptr(const List *list1, const List *list2); diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 4e655b0e6c1dfffb18bd0c9411736bc18cf0c4ba..51906d68985193667567400aa7a308d6470cd3b7 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -712,6 +712,8 @@ typedef struct Agg AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ + List *groupingSets; /* grouping sets to use */ + List *chain; /* chained Agg/Sort nodes */ } Agg; /* ---------------- diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 4a4dd7e9ef846497544426f8fcce55242de2d7bb..a5467c5379918b3eb5abc01c5a1c13a4395e8d11 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -271,6 +271,41 @@ typedef struct Aggref int location; /* token location, or -1 if unknown */ } Aggref; +/* + * GroupingFunc + * + * A GroupingFunc is a GROUPING(...) expression, which behaves in many ways + * like an aggregate function (e.g. it "belongs" to a specific query level, + * which might not be the one immediately containing it), but also differs in + * an important respect: it never evaluates its arguments, they merely + * designate expressions from the GROUP BY clause of the query level to which + * it belongs. + * + * The spec defines the evaluation of GROUPING() purely by syntactic + * replacement, but we make it a real expression for optimization purposes so + * that one Agg node can handle multiple grouping sets at once. Evaluating the + * result only needs the column positions to check against the grouping set + * being projected. However, for EXPLAIN to produce meaningful output, we have + * to keep the original expressions around, since expression deparse does not + * give us any feasible way to get at the GROUP BY clause. + * + * Also, we treat two GroupingFunc nodes as equal if they have equal arguments + * lists and agglevelsup, without comparing the refs and cols annotations. + * + * In raw parse output we have only the args list; parse analysis fills in the + * refs list, and the planner fills in the cols list. + */ +typedef struct GroupingFunc +{ + Expr xpr; + List *args; /* arguments, not evaluated but kept for + * benefit of EXPLAIN etc. */ + List *refs; /* ressortgrouprefs of arguments */ + List *cols; /* actual column positions set by planner */ + Index agglevelsup; /* same as Aggref.agglevelsup */ + int location; /* token location */ +} GroupingFunc; + /* * WindowFunc */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d3ee61c4d046101b1a6dcbceb6821958ee72f939..279051ed18711a80ad49f861dfb8730427b540ba 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -260,6 +260,9 @@ typedef struct PlannerInfo /* optional private data for join_search_hook, e.g., GEQO */ void *join_search_private; + + /* for GroupingFunc fixup in setrefs */ + AttrNumber *grouping_map; } PlannerInfo; diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index da15fca1f6c4374062ce8ffc8aaf28c46e416915..52b077a1b471516da3d4d4ada70d65afacb644f3 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -59,6 +59,7 @@ extern Sort *make_sort_from_groupcols(PlannerInfo *root, List *groupcls, extern Agg *make_agg(PlannerInfo *root, List *tlist, List *qual, AggStrategy aggstrategy, const AggClauseCosts *aggcosts, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + List *groupingSets, long numGroups, Plan *lefttree); extern WindowAgg *make_windowagg(PlannerInfo *root, List *tlist, diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 3dc8babacb58467797f3d4e2e7e85f36e59aa804..b0f0f196838768749431a9f8b4539ac61c7b5356 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -43,6 +43,9 @@ extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause, extern List *get_sortgrouplist_exprs(List *sgClauses, List *targetList); +extern SortGroupClause *get_sortgroupref_clause(Index sortref, + List *clauses); + extern Oid *extract_grouping_ops(List *groupClause); extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); extern bool grouping_is_sortable(List *groupClause); diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 7d5f857ae598f0c44f4e8450622af57cdf026ddb..241406907799378564ad7c2706602afad8ca2d7f 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -99,6 +99,7 @@ PG_KEYWORD("cost", COST, UNRESERVED_KEYWORD) PG_KEYWORD("create", CREATE, RESERVED_KEYWORD) PG_KEYWORD("cross", CROSS, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("csv", CSV, UNRESERVED_KEYWORD) +PG_KEYWORD("cube", CUBE, UNRESERVED_KEYWORD) PG_KEYWORD("current", CURRENT_P, UNRESERVED_KEYWORD) PG_KEYWORD("current_catalog", CURRENT_CATALOG, RESERVED_KEYWORD) PG_KEYWORD("current_date", CURRENT_DATE, RESERVED_KEYWORD) @@ -174,6 +175,7 @@ PG_KEYWORD("grant", GRANT, RESERVED_KEYWORD) PG_KEYWORD("granted", GRANTED, UNRESERVED_KEYWORD) PG_KEYWORD("greatest", GREATEST, COL_NAME_KEYWORD) PG_KEYWORD("group", GROUP_P, RESERVED_KEYWORD) +PG_KEYWORD("grouping", GROUPING, COL_NAME_KEYWORD) PG_KEYWORD("handler", HANDLER, UNRESERVED_KEYWORD) PG_KEYWORD("having", HAVING, RESERVED_KEYWORD) PG_KEYWORD("header", HEADER_P, UNRESERVED_KEYWORD) @@ -325,6 +327,7 @@ PG_KEYWORD("revoke", REVOKE, UNRESERVED_KEYWORD) PG_KEYWORD("right", RIGHT, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("role", ROLE, UNRESERVED_KEYWORD) PG_KEYWORD("rollback", ROLLBACK, UNRESERVED_KEYWORD) +PG_KEYWORD("rollup", ROLLUP, UNRESERVED_KEYWORD) PG_KEYWORD("row", ROW, COL_NAME_KEYWORD) PG_KEYWORD("rows", ROWS, UNRESERVED_KEYWORD) PG_KEYWORD("rule", RULE, UNRESERVED_KEYWORD) @@ -343,6 +346,7 @@ PG_KEYWORD("session", SESSION, UNRESERVED_KEYWORD) PG_KEYWORD("session_user", SESSION_USER, RESERVED_KEYWORD) PG_KEYWORD("set", SET, UNRESERVED_KEYWORD) PG_KEYWORD("setof", SETOF, COL_NAME_KEYWORD) +PG_KEYWORD("sets", SETS, UNRESERVED_KEYWORD) PG_KEYWORD("share", SHARE, UNRESERVED_KEYWORD) PG_KEYWORD("show", SHOW, UNRESERVED_KEYWORD) PG_KEYWORD("similar", SIMILAR, TYPE_FUNC_NAME_KEYWORD) diff --git a/src/include/parser/parse_agg.h b/src/include/parser/parse_agg.h index 91a0706f4593da2faba12e6f252bbfcfc8d4b68e..6a5f9bbdf1556ced6d4d2031791432711e6501c2 100644 --- a/src/include/parser/parse_agg.h +++ b/src/include/parser/parse_agg.h @@ -18,11 +18,16 @@ extern void transformAggregateCall(ParseState *pstate, Aggref *agg, List *args, List *aggorder, bool agg_distinct); + +extern Node *transformGroupingFunc(ParseState *pstate, GroupingFunc *g); + extern void transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, WindowDef *windef); extern void parseCheckAggregates(ParseState *pstate, Query *qry); +extern List *expand_grouping_sets(List *groupingSets, int limit); + extern int get_aggregate_argtypes(Aggref *aggref, Oid *inputTypes); extern Oid resolve_aggregate_transtype(Oid aggfuncid, diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index f1b7d3d8969f2e893e4dda7fc6c4aefb4d4301cc..cbe5e76bb84202641eb390ad457407b7de04f1cf 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -27,6 +27,7 @@ extern Node *transformWhereClause(ParseState *pstate, Node *clause, extern Node *transformLimitClause(ParseState *pstate, Node *clause, ParseExprKind exprKind, const char *constructName); extern List *transformGroupClause(ParseState *pstate, List *grouplist, + List **groupingSets, List **targetlist, List *sortClause, ParseExprKind exprKind, bool useSQL99); extern List *transformSortClause(ParseState *pstate, List *orderlist, diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index bf69f2a62949cd87f3ecfcf9cb231e927b7fe20d..fdca7130bb07984f7176c811248d55812b645908 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -185,7 +185,7 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, Selectivity *rightstart, Selectivity *rightend); extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, - double input_rows); + double input_rows, List **pgset); extern Selectivity estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets); diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out new file mode 100644 index 0000000000000000000000000000000000000000..842c2aec7e21032084dc160cc72d1f66abb6ccb9 --- /dev/null +++ b/src/test/regress/expected/groupingsets.out @@ -0,0 +1,590 @@ +-- +-- grouping sets +-- +-- test data sources +create temp view gstest1(a,b,v) + as values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14), + (2,3,15), + (3,3,16),(3,4,17), + (4,1,18),(4,1,19); +create temp table gstest2 (a integer, b integer, c integer, d integer, + e integer, f integer, g integer, h integer); +copy gstest2 from stdin; +create temp table gstest3 (a integer, b integer, c integer, d integer); +copy gstest3 from stdin; +alter table gstest3 add primary key (a); +create temp table gstest_empty (a integer, b integer, v integer); +create function gstest_data(v integer, out a integer, out b integer) + returns setof record + as $f$ + begin + return query select v, i from generate_series(1,3) i; + end; + $f$ language plpgsql; +-- basic functionality +-- simple rollup with multiple plain aggregates, with and without ordering +-- (and with ordering differing from grouping) +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by rollup (a,b); + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | 1 | 0 | 21 | 2 | 11 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 1 | | 1 | 60 | 5 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 2 | | 1 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 + 3 | | 1 | 33 | 2 | 17 + 4 | 1 | 0 | 37 | 2 | 19 + 4 | | 1 | 37 | 2 | 19 + | | 3 | 145 | 10 | 19 +(12 rows) + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by rollup (a,b) order by a,b; + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | 1 | 0 | 21 | 2 | 11 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 1 | | 1 | 60 | 5 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 2 | | 1 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 + 3 | | 1 | 33 | 2 | 17 + 4 | 1 | 0 | 37 | 2 | 19 + 4 | | 1 | 37 | 2 | 19 + | | 3 | 145 | 10 | 19 +(12 rows) + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by rollup (a,b) order by b desc, a; + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | | 1 | 60 | 5 | 14 + 2 | | 1 | 15 | 1 | 15 + 3 | | 1 | 33 | 2 | 17 + 4 | | 1 | 37 | 2 | 19 + | | 3 | 145 | 10 | 19 + 3 | 4 | 0 | 17 | 1 | 17 + 1 | 3 | 0 | 14 | 1 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 1 | 0 | 21 | 2 | 11 + 4 | 1 | 0 | 37 | 2 | 19 +(12 rows) + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by rollup (a,b) order by coalesce(a,0)+coalesce(b,0); + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + | | 3 | 145 | 10 | 19 + 1 | | 1 | 60 | 5 | 14 + 1 | 1 | 0 | 21 | 2 | 11 + 2 | | 1 | 15 | 1 | 15 + 3 | | 1 | 33 | 2 | 17 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 4 | | 1 | 37 | 2 | 19 + 4 | 1 | 0 | 37 | 2 | 19 + 2 | 3 | 0 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 +(12 rows) + +-- various types of ordered aggs +select a, b, grouping(a,b), + array_agg(v order by v), + string_agg(v::text, ':' order by v desc), + percentile_disc(0.5) within group (order by v), + rank(1,2,12) within group (order by a,b,v) + from gstest1 group by rollup (a,b) order by a,b; + a | b | grouping | array_agg | string_agg | percentile_disc | rank +---+---+----------+---------------------------------+-------------------------------+-----------------+------ + 1 | 1 | 0 | {10,11} | 11:10 | 10 | 3 + 1 | 2 | 0 | {12,13} | 13:12 | 12 | 1 + 1 | 3 | 0 | {14} | 14 | 14 | 1 + 1 | | 1 | {10,11,12,13,14} | 14:13:12:11:10 | 12 | 3 + 2 | 3 | 0 | {15} | 15 | 15 | 1 + 2 | | 1 | {15} | 15 | 15 | 1 + 3 | 3 | 0 | {16} | 16 | 16 | 1 + 3 | 4 | 0 | {17} | 17 | 17 | 1 + 3 | | 1 | {16,17} | 17:16 | 16 | 1 + 4 | 1 | 0 | {18,19} | 19:18 | 18 | 1 + 4 | | 1 | {18,19} | 19:18 | 18 | 1 + | | 3 | {10,11,12,13,14,15,16,17,18,19} | 19:18:17:16:15:14:13:12:11:10 | 14 | 3 +(12 rows) + +-- test usage of grouped columns in direct args of aggs +select grouping(a), a, array_agg(b), + rank(a) within group (order by b nulls first), + rank(a) within group (order by b nulls last) + from (values (1,1),(1,4),(1,5),(3,1),(3,2)) v(a,b) + group by rollup (a) order by a; + grouping | a | array_agg | rank | rank +----------+---+-------------+------+------ + 0 | 1 | {1,4,5} | 1 | 1 + 0 | 3 | {1,2} | 3 | 3 + 1 | | {1,4,5,1,2} | 1 | 6 +(3 rows) + +-- nesting with window functions +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by rollup (a,b) order by rsum, a, b; + a | b | sum | rsum +---+---+-----+------ + 1 | 1 | 8 | 8 + 1 | 2 | 2 | 10 + 1 | | 10 | 20 + 2 | 2 | 2 | 22 + 2 | | 2 | 24 + | | 12 | 36 +(6 rows) + +-- empty input: first is 0 rows, second 1, third 3 etc. +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); + a | b | sum | count +---+---+-----+------- +(0 rows) + +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),()); + a | b | sum | count +---+---+-----+------- + | | | 0 +(1 row) + +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); + a | b | sum | count +---+---+-----+------- + | | | 0 + | | | 0 + | | | 0 +(3 rows) + +select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); + sum | count +-----+------- + | 0 + | 0 + | 0 +(3 rows) + +-- empty input with joins tests some important code paths +select t1.a, t2.b, sum(t1.v), count(*) from gstest_empty t1, gstest_empty t2 + group by grouping sets ((t1.a,t2.b),()); + a | b | sum | count +---+---+-----+------- + | | | 0 +(1 row) + +-- simple joins, var resolution, GROUPING on join vars +select t1.a, t2.b, grouping(t1.a, t2.b), sum(t1.v), max(t2.a) + from gstest1 t1, gstest2 t2 + group by grouping sets ((t1.a, t2.b), ()); + a | b | grouping | sum | max +---+---+----------+------+----- + 1 | 1 | 0 | 420 | 1 + 1 | 2 | 0 | 120 | 2 + 2 | 1 | 0 | 105 | 1 + 2 | 2 | 0 | 30 | 2 + 3 | 1 | 0 | 231 | 1 + 3 | 2 | 0 | 66 | 2 + 4 | 1 | 0 | 259 | 1 + 4 | 2 | 0 | 74 | 2 + | | 3 | 1305 | 2 +(9 rows) + +select t1.a, t2.b, grouping(t1.a, t2.b), sum(t1.v), max(t2.a) + from gstest1 t1 join gstest2 t2 on (t1.a=t2.a) + group by grouping sets ((t1.a, t2.b), ()); + a | b | grouping | sum | max +---+---+----------+-----+----- + 1 | 1 | 0 | 420 | 1 + 1 | 2 | 0 | 60 | 1 + 2 | 2 | 0 | 15 | 2 + | | 3 | 495 | 2 +(4 rows) + +select a, b, grouping(a, b), sum(t1.v), max(t2.c) + from gstest1 t1 join gstest2 t2 using (a,b) + group by grouping sets ((a, b), ()); + a | b | grouping | sum | max +---+---+----------+-----+----- + 1 | 1 | 0 | 147 | 2 + 1 | 2 | 0 | 25 | 2 + | | 3 | 172 | 2 +(3 rows) + +-- check that functionally dependent cols are not nulled +select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); + a | d | grouping +---+---+---------- + 1 | 1 | 1 + 2 | 2 | 1 + 1 | 1 | 2 + 2 | 2 | 2 +(4 rows) + +-- simple rescan tests +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by rollup (a,b); + a | b | sum +---+---+----- + 1 | 1 | 1 + 1 | 2 | 1 + 1 | 3 | 1 + 1 | | 3 + 2 | 1 | 2 + 2 | 2 | 2 + 2 | 3 | 2 + 2 | | 6 + | | 9 +(9 rows) + +select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by rollup (a,b)) s; +ERROR: aggregate functions are not allowed in FROM clause of their own query level +LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ... + ^ +-- min max optimisation should still work with GROUP BY () +explain (costs off) + select min(unique1) from tenk1 GROUP BY (); + QUERY PLAN +------------------------------------------------------------ + Result + InitPlan 1 (returns $0) + -> Limit + -> Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 IS NOT NULL) +(5 rows) + +-- Views with GROUPING SET queries +CREATE VIEW gstest_view AS select a, b, grouping(a,b), sum(c), count(*), max(c) + from gstest2 group by rollup ((a,b,c),(c,d)); +NOTICE: view "gstest_view" will be a temporary view +select pg_get_viewdef('gstest_view'::regclass, true); + pg_get_viewdef +------------------------------------------------------------------------------- + SELECT gstest2.a, + + gstest2.b, + + GROUPING(gstest2.a, gstest2.b) AS "grouping", + + sum(gstest2.c) AS sum, + + count(*) AS count, + + max(gstest2.c) AS max + + FROM gstest2 + + GROUP BY ROLLUP((gstest2.a, gstest2.b, gstest2.c), (gstest2.c, gstest2.d)); +(1 row) + +-- Nested queries with 3 or more levels of nesting +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f); + grouping +---------- + 0 + 0 + 0 +(3 rows) + +select(select (select grouping(e,f) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f); + grouping +---------- + 0 + 1 + 3 +(3 rows) + +select(select (select grouping(c) from (values (1)) v2(c) GROUP BY c) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f); + grouping +---------- + 0 + 0 + 0 +(3 rows) + +-- Combinations of operations +select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d); + a | b | c | d +---+---+---+--- + 1 | 1 | 1 | + 1 | | 1 | + | | 1 | + 1 | 1 | 2 | + 1 | 2 | 2 | + 1 | | 2 | + 2 | 2 | 2 | + 2 | | 2 | + | | 2 | + 1 | 1 | | 1 + 1 | | | 1 + | | | 1 + 1 | 1 | | 2 + 1 | 2 | | 2 + 1 | | | 2 + 2 | 2 | | 2 + 2 | | | 2 + | | | 2 +(18 rows) + +select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); + a | b +---+--- + 1 | 2 + 2 | 3 +(2 rows) + +-- Tests for chained aggregates +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | 1 | 0 | 21 | 2 | 11 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 + 4 | 1 | 0 | 37 | 2 | 19 + | | 3 | 21 | 2 | 11 + | | 3 | 25 | 2 | 13 + | | 3 | 14 | 1 | 14 + | | 3 | 15 | 1 | 15 + | | 3 | 16 | 1 | 16 + | | 3 | 17 | 1 | 17 + | | 3 | 37 | 2 | 19 + | | 3 | 21 | 2 | 11 + | | 3 | 25 | 2 | 13 + | | 3 | 14 | 1 | 14 + | | 3 | 15 | 1 | 15 + | | 3 | 16 | 1 | 16 + | | 3 | 17 | 1 | 17 + | | 3 | 37 | 2 | 19 +(21 rows) + +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); + grouping +---------- + 0 + 0 + 0 +(3 rows) + +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1); + grouping +---------- + 0 + 0 + 0 + 0 +(4 rows) + +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; + a | b | sum | rsum +---+---+-----+------ + 1 | 1 | 8 | 8 + 1 | 2 | 2 | 10 + 1 | | 10 | 20 + 2 | 2 | 2 | 22 + 2 | | 2 | 24 + | 1 | 8 | 32 + | 2 | 4 | 36 + | | 12 | 48 +(8 rows) + +select a, b, sum(c) from (values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),(2,3,15),(3,3,16),(3,4,17),(4,1,18),(4,1,19)) v(a,b,c) group by rollup (a,b); + a | b | sum +---+---+----- + 1 | 1 | 21 + 1 | 2 | 25 + 1 | 3 | 14 + 1 | | 60 + 2 | 3 | 15 + 2 | | 15 + 3 | 3 | 16 + 3 | 4 | 17 + 3 | | 33 + 4 | 1 | 37 + 4 | | 37 + | | 145 +(12 rows) + +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + a | b | sum +---+---+----- + 1 | 1 | 1 + 1 | 2 | 1 + 1 | 3 | 1 + 1 | | 3 + 2 | 1 | 2 + 2 | 2 | 2 + 2 | 3 | 2 + 2 | | 6 + | 1 | 3 + | 2 | 3 + | 3 | 3 + | | 9 +(12 rows) + +-- Agg level check. This query should error out. +select (select grouping(a,b) from gstest2) from gstest2 group by a,b; +ERROR: arguments to GROUPING must be grouping expressions of the associated query level +LINE 1: select (select grouping(a,b) from gstest2) from gstest2 grou... + ^ +--Nested queries +select a, b, sum(c), count(*) from gstest2 group by grouping sets (rollup(a,b),a); + a | b | sum | count +---+---+-----+------- + 1 | 1 | 8 | 7 + 1 | 2 | 2 | 1 + 1 | | 10 | 8 + 1 | | 10 | 8 + 2 | 2 | 2 | 1 + 2 | | 2 | 1 + 2 | | 2 | 1 + | | 12 | 9 +(8 rows) + +-- HAVING queries +select ten, sum(distinct four) from onek a +group by grouping sets((ten,four),(ten)) +having exists (select 1 from onek b where sum(distinct a.four) = b.four); + ten | sum +-----+----- + 0 | 0 + 0 | 2 + 0 | 2 + 1 | 1 + 1 | 3 + 2 | 0 + 2 | 2 + 2 | 2 + 3 | 1 + 3 | 3 + 4 | 0 + 4 | 2 + 4 | 2 + 5 | 1 + 5 | 3 + 6 | 0 + 6 | 2 + 6 | 2 + 7 | 1 + 7 | 3 + 8 | 0 + 8 | 2 + 8 | 2 + 9 | 1 + 9 | 3 +(25 rows) + +-- FILTER queries +select ten, sum(distinct four) filter (where four::text ~ '123') from onek a +group by rollup(ten); + ten | sum +-----+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + | +(11 rows) + +-- More rescan tests +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; + a | a | four | ten | count +---+---+------+-----+------- + 1 | 1 | 0 | 0 | 50 + 1 | 1 | 0 | 2 | 50 + 1 | 1 | 0 | 4 | 50 + 1 | 1 | 0 | 6 | 50 + 1 | 1 | 0 | 8 | 50 + 1 | 1 | 0 | | 250 + 1 | 1 | 1 | 1 | 50 + 1 | 1 | 1 | 3 | 50 + 1 | 1 | 1 | 5 | 50 + 1 | 1 | 1 | 7 | 50 + 1 | 1 | 1 | 9 | 50 + 1 | 1 | 1 | | 250 + 1 | 1 | 2 | 0 | 50 + 1 | 1 | 2 | 2 | 50 + 1 | 1 | 2 | 4 | 50 + 1 | 1 | 2 | 6 | 50 + 1 | 1 | 2 | 8 | 50 + 1 | 1 | 2 | | 250 + 1 | 1 | 3 | 1 | 50 + 1 | 1 | 3 | 3 | 50 + 1 | 1 | 3 | 5 | 50 + 1 | 1 | 3 | 7 | 50 + 1 | 1 | 3 | 9 | 50 + 1 | 1 | 3 | | 250 + 1 | 1 | | 0 | 100 + 1 | 1 | | 1 | 100 + 1 | 1 | | 2 | 100 + 1 | 1 | | 3 | 100 + 1 | 1 | | 4 | 100 + 1 | 1 | | 5 | 100 + 1 | 1 | | 6 | 100 + 1 | 1 | | 7 | 100 + 1 | 1 | | 8 | 100 + 1 | 1 | | 9 | 100 + 1 | 1 | | | 1000 + 2 | 2 | 0 | 0 | 50 + 2 | 2 | 0 | 2 | 50 + 2 | 2 | 0 | 4 | 50 + 2 | 2 | 0 | 6 | 50 + 2 | 2 | 0 | 8 | 50 + 2 | 2 | 0 | | 250 + 2 | 2 | 1 | 1 | 50 + 2 | 2 | 1 | 3 | 50 + 2 | 2 | 1 | 5 | 50 + 2 | 2 | 1 | 7 | 50 + 2 | 2 | 1 | 9 | 50 + 2 | 2 | 1 | | 250 + 2 | 2 | 2 | 0 | 50 + 2 | 2 | 2 | 2 | 50 + 2 | 2 | 2 | 4 | 50 + 2 | 2 | 2 | 6 | 50 + 2 | 2 | 2 | 8 | 50 + 2 | 2 | 2 | | 250 + 2 | 2 | 3 | 1 | 50 + 2 | 2 | 3 | 3 | 50 + 2 | 2 | 3 | 5 | 50 + 2 | 2 | 3 | 7 | 50 + 2 | 2 | 3 | 9 | 50 + 2 | 2 | 3 | | 250 + 2 | 2 | | 0 | 100 + 2 | 2 | | 1 | 100 + 2 | 2 | | 2 | 100 + 2 | 2 | | 3 | 100 + 2 | 2 | | 4 | 100 + 2 | 2 | | 5 | 100 + 2 | 2 | | 6 | 100 + 2 | 2 | | 7 | 100 + 2 | 2 | | 8 | 100 + 2 | 2 | | 9 | 100 + 2 | 2 | | | 1000 +(70 rows) + +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); + array +------------------------------------------------------------------------------------------------------------------------------------------------------ + {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"} + {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"} +(2 rows) + +-- end diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index f39b73abc2a62d99c96ab057b87a99fc9eea687e..91780cdcc7398ac2ca7f9a05e45bab4bb2c93998 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -84,7 +84,7 @@ test: select_into select_distinct select_distinct_on select_implicit select_havi # ---------- # Another group of parallel tests # ---------- -test: brin gin gist spgist privileges security_label collate matview lock replica_identity rowsecurity object_address tablesample +test: brin gin gist spgist privileges security_label collate matview lock replica_identity rowsecurity object_address tablesample groupingsets # ---------- # Another group of parallel tests diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 9441b97e3a69e315cfbb1c76abeb9d9adee06588..a2e0cebbdb500a0b6dd8d3d7436e7afe6eca021e 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -86,6 +86,7 @@ test: union test: case test: join test: aggregates +test: groupingsets test: transactions ignore: random test: random diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql new file mode 100644 index 0000000000000000000000000000000000000000..0bffb8531c2c6f757c221d229af1ff9861fcdf07 --- /dev/null +++ b/src/test/regress/sql/groupingsets.sql @@ -0,0 +1,165 @@ +-- +-- grouping sets +-- + +-- test data sources + +create temp view gstest1(a,b,v) + as values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14), + (2,3,15), + (3,3,16),(3,4,17), + (4,1,18),(4,1,19); + +create temp table gstest2 (a integer, b integer, c integer, d integer, + e integer, f integer, g integer, h integer); +copy gstest2 from stdin; +1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 2 +1 1 1 1 1 1 2 2 +1 1 1 1 1 2 2 2 +1 1 1 1 2 2 2 2 +1 1 1 2 2 2 2 2 +1 1 2 2 2 2 2 2 +1 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 +\. + +create temp table gstest3 (a integer, b integer, c integer, d integer); +copy gstest3 from stdin; +1 1 1 1 +2 2 2 2 +\. +alter table gstest3 add primary key (a); + +create temp table gstest_empty (a integer, b integer, v integer); + +create function gstest_data(v integer, out a integer, out b integer) + returns setof record + as $f$ + begin + return query select v, i from generate_series(1,3) i; + end; + $f$ language plpgsql; + +-- basic functionality + +-- simple rollup with multiple plain aggregates, with and without ordering +-- (and with ordering differing from grouping) +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by rollup (a,b); +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by rollup (a,b) order by a,b; +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by rollup (a,b) order by b desc, a; +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by rollup (a,b) order by coalesce(a,0)+coalesce(b,0); + +-- various types of ordered aggs +select a, b, grouping(a,b), + array_agg(v order by v), + string_agg(v::text, ':' order by v desc), + percentile_disc(0.5) within group (order by v), + rank(1,2,12) within group (order by a,b,v) + from gstest1 group by rollup (a,b) order by a,b; + +-- test usage of grouped columns in direct args of aggs +select grouping(a), a, array_agg(b), + rank(a) within group (order by b nulls first), + rank(a) within group (order by b nulls last) + from (values (1,1),(1,4),(1,5),(3,1),(3,2)) v(a,b) + group by rollup (a) order by a; + +-- nesting with window functions +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by rollup (a,b) order by rsum, a, b; + +-- empty input: first is 0 rows, second 1, third 3 etc. +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),()); +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); +select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); + +-- empty input with joins tests some important code paths +select t1.a, t2.b, sum(t1.v), count(*) from gstest_empty t1, gstest_empty t2 + group by grouping sets ((t1.a,t2.b),()); + +-- simple joins, var resolution, GROUPING on join vars +select t1.a, t2.b, grouping(t1.a, t2.b), sum(t1.v), max(t2.a) + from gstest1 t1, gstest2 t2 + group by grouping sets ((t1.a, t2.b), ()); + +select t1.a, t2.b, grouping(t1.a, t2.b), sum(t1.v), max(t2.a) + from gstest1 t1 join gstest2 t2 on (t1.a=t2.a) + group by grouping sets ((t1.a, t2.b), ()); + +select a, b, grouping(a, b), sum(t1.v), max(t2.c) + from gstest1 t1 join gstest2 t2 using (a,b) + group by grouping sets ((a, b), ()); + +-- check that functionally dependent cols are not nulled +select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); + +-- simple rescan tests + +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by rollup (a,b); + +select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by rollup (a,b)) s; + +-- min max optimisation should still work with GROUP BY () +explain (costs off) + select min(unique1) from tenk1 GROUP BY (); + +-- Views with GROUPING SET queries +CREATE VIEW gstest_view AS select a, b, grouping(a,b), sum(c), count(*), max(c) + from gstest2 group by rollup ((a,b,c),(c,d)); + +select pg_get_viewdef('gstest_view'::regclass, true); + +-- Nested queries with 3 or more levels of nesting +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f); +select(select (select grouping(e,f) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f); +select(select (select grouping(c) from (values (1)) v2(c) GROUP BY c) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f); + +-- Combinations of operations +select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d); +select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); + +-- Tests for chained aggregates +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1); +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; +select a, b, sum(c) from (values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),(2,3,15),(3,3,16),(3,4,17),(4,1,18),(4,1,19)) v(a,b,c) group by rollup (a,b); +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + + +-- Agg level check. This query should error out. +select (select grouping(a,b) from gstest2) from gstest2 group by a,b; + +--Nested queries +select a, b, sum(c), count(*) from gstest2 group by grouping sets (rollup(a,b),a); + +-- HAVING queries +select ten, sum(distinct four) from onek a +group by grouping sets((ten,four),(ten)) +having exists (select 1 from onek b where sum(distinct a.four) = b.four); + +-- FILTER queries +select ten, sum(distinct four) filter (where four::text ~ '123') from onek a +group by rollup(ten); + +-- More rescan tests +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); + +-- end