From 420c1661630c96ad10f58ca967d5f561bb404cf9 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 2 Jul 2016 13:23:02 -0400
Subject: [PATCH] Fix failure to mark all aggregates with appropriate
 transtype.

In commit 915b703e1 I gave get_agg_clause_costs() the responsibility of
marking Aggref nodes with the appropriate aggtranstype.  I failed to notice
that where it was being called from, it might see only a subset of the
Aggref nodes that were in the original targetlist.  Specifically, if there
are duplicate aggregate calls in the tlist, either make_sort_input_target
or make_window_input_target might put just a single instance into the
grouping_target, and then only that instance would get marked.  Fix by
moving the call back into grouping_planner(), before we start building
assorted PathTargets from the query tlist.  Per report from Stefan Huehner.

Report: <20160702131056.GD3165@huehner.biz>
---
 src/backend/optimizer/plan/planner.c | 54 +++++++++++++++++-----------
 src/test/regress/expected/limit.out  | 24 +++++++++++++
 src/test/regress/sql/limit.sql       |  8 +++++
 3 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a66317367c3..ddf1109de76 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -114,6 +114,7 @@ static Size estimate_hashagg_tablesize(Path *path,
 static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
+					  const AggClauseCosts *agg_costs,
 					  List *rollup_lists,
 					  List *rollup_groupclauses);
 static RelOptInfo *create_window_paths(PlannerInfo *root,
@@ -1499,6 +1500,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		PathTarget *grouping_target;
 		PathTarget *scanjoin_target;
 		bool		have_grouping;
+		AggClauseCosts agg_costs;
 		WindowFuncLists *wflists = NULL;
 		List	   *activeWindows = NIL;
 		List	   *rollup_lists = NIL;
@@ -1623,6 +1625,28 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		 */
 		root->processed_tlist = tlist;
 
+		/*
+		 * Collect statistics about aggregates for estimating costs, and mark
+		 * all the aggregates with resolved aggtranstypes.  We must do this
+		 * before slicing and dicing the tlist into various pathtargets, else
+		 * some copies of the Aggref nodes might escape being marked with the
+		 * correct transtypes.
+		 *
+		 * Note: currently, we do not detect duplicate aggregates here.  This
+		 * may result in somewhat-overestimated cost, which is fine for our
+		 * purposes since all Paths will get charged the same.  But at some
+		 * point we might wish to do that detection in the planner, rather
+		 * than during executor startup.
+		 */
+		MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
+		if (parse->hasAggs)
+		{
+			get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
+								 &agg_costs);
+			get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE,
+								 &agg_costs);
+		}
+
 		/*
 		 * Locate any window functions in the tlist.  (We don't need to look
 		 * anywhere else, since expressions used in ORDER BY will be in there
@@ -1822,6 +1846,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			current_rel = create_grouping_paths(root,
 												current_rel,
 												grouping_target,
+												&agg_costs,
 												rollup_lists,
 												rollup_groupclauses);
 		}
@@ -3244,6 +3269,7 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
  *
  * input_rel: contains the source-data Paths
  * target: the pathtarget for the result Paths to compute
+ * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
  * rollup_groupclauses: list of grouping clauses for grouping sets,
  *		or NIL if not doing grouping sets
@@ -3260,6 +3286,7 @@ static RelOptInfo *
 create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
+					  const AggClauseCosts *agg_costs,
 					  List *rollup_lists,
 					  List *rollup_groupclauses)
 {
@@ -3267,7 +3294,6 @@ create_grouping_paths(PlannerInfo *root,
 	Path	   *cheapest_path = input_rel->cheapest_total_path;
 	RelOptInfo *grouped_rel;
 	PathTarget *partial_grouping_target = NULL;
-	AggClauseCosts agg_costs;
 	AggClauseCosts agg_partial_costs;	/* parallel only */
 	AggClauseCosts agg_final_costs;		/* parallel only */
 	Size		hashaggtablesize;
@@ -3364,20 +3390,6 @@ create_grouping_paths(PlannerInfo *root,
 		return grouped_rel;
 	}
 
-	/*
-	 * Collect statistics about aggregates for estimating costs.  Note: we do
-	 * not detect duplicate aggregates here; a somewhat-overestimated cost is
-	 * okay for our purposes.
-	 */
-	MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
-	if (parse->hasAggs)
-	{
-		get_agg_clause_costs(root, (Node *) target->exprs, AGGSPLIT_SIMPLE,
-							 &agg_costs);
-		get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE,
-							 &agg_costs);
-	}
-
 	/*
 	 * Estimate number of groups.
 	 */
@@ -3414,7 +3426,7 @@ create_grouping_paths(PlannerInfo *root,
 	 */
 	can_hash = (parse->groupClause != NIL &&
 				parse->groupingSets == NIL &&
-				agg_costs.numOrderedAggs == 0 &&
+				agg_costs->numOrderedAggs == 0 &&
 				grouping_is_hashable(parse->groupClause));
 
 	/*
@@ -3446,7 +3458,7 @@ create_grouping_paths(PlannerInfo *root,
 		/* We don't know how to do grouping sets in parallel. */
 		try_parallel_aggregation = false;
 	}
-	else if (agg_costs.hasNonPartial || agg_costs.hasNonSerial)
+	else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
 	{
 		/* Insufficient support for partial mode. */
 		try_parallel_aggregation = false;
@@ -3627,7 +3639,7 @@ create_grouping_paths(PlannerInfo *root,
 												  (List *) parse->havingQual,
 													  rollup_lists,
 													  rollup_groupclauses,
-													  &agg_costs,
+													  agg_costs,
 													  dNumGroups));
 				}
 				else if (parse->hasAggs)
@@ -3645,7 +3657,7 @@ create_grouping_paths(PlannerInfo *root,
 											 AGGSPLIT_SIMPLE,
 											 parse->groupClause,
 											 (List *) parse->havingQual,
-											 &agg_costs,
+											 agg_costs,
 											 dNumGroups));
 				}
 				else if (parse->groupClause)
@@ -3727,7 +3739,7 @@ create_grouping_paths(PlannerInfo *root,
 	if (can_hash)
 	{
 		hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
-													  &agg_costs,
+													  agg_costs,
 													  dNumGroups);
 
 		/*
@@ -3751,7 +3763,7 @@ create_grouping_paths(PlannerInfo *root,
 									 AGGSPLIT_SIMPLE,
 									 parse->groupClause,
 									 (List *) parse->havingQual,
-									 &agg_costs,
+									 agg_costs,
 									 dNumGroups));
 		}
 
diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out
index 659a1015b48..9c3eecfc3bd 100644
--- a/src/test/regress/expected/limit.out
+++ b/src/test/regress/expected/limit.out
@@ -296,3 +296,27 @@ order by s2 desc;
   0 |  0
 (3 rows)
 
+-- test for failure to set all aggregates' aggtranstype
+explain (verbose, costs off)
+select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
+  from tenk1 group by thousand order by thousand limit 3;
+                                                    QUERY PLAN                                                     
+-------------------------------------------------------------------------------------------------------------------
+ Limit
+   Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() * '0'::double precision))), thousand
+   ->  GroupAggregate
+         Output: sum(tenthous), ((sum(tenthous))::double precision + (random() * '0'::double precision)), thousand
+         Group Key: tenk1.thousand
+         ->  Index Only Scan using tenk1_thous_tenthous on public.tenk1
+               Output: thousand, tenthous
+(7 rows)
+
+select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
+  from tenk1 group by thousand order by thousand limit 3;
+  s1   |  s2   
+-------+-------
+ 45000 | 45000
+ 45010 | 45010
+ 45020 | 45020
+(3 rows)
+
diff --git a/src/test/regress/sql/limit.sql b/src/test/regress/sql/limit.sql
index ef5686c520b..8015f81fc2b 100644
--- a/src/test/regress/sql/limit.sql
+++ b/src/test/regress/sql/limit.sql
@@ -91,3 +91,11 @@ order by s2 desc;
 
 select generate_series(0,2) as s1, generate_series((random()*.1)::int,2) as s2
 order by s2 desc;
+
+-- test for failure to set all aggregates' aggtranstype
+explain (verbose, costs off)
+select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
+  from tenk1 group by thousand order by thousand limit 3;
+
+select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
+  from tenk1 group by thousand order by thousand limit 3;
-- 
GitLab