From 174fab993414dc80c594219aac7ba35669780b84 Mon Sep 17 00:00:00 2001
From: Andrew Gierth <rhodiumtoad@postgresql.org>
Date: Thu, 17 Jan 2019 05:33:01 +0000
Subject: [PATCH] Postpone aggregate checks until after collation is assigned.

Previously, parseCheckAggregates was run before
assign_query_collations, but this causes problems if any expression
has already had a collation assigned by some transform function (e.g.
transformCaseExpr) before parseCheckAggregates runs. The differing
collations would cause expressions not to be recognized as equal to
the ones in the GROUP BY clause, leading to spurious errors about
unaggregated column references.

The result was that CASE expr WHEN val ... would fail when "expr"
contained a GROUPING() expression or matched one of the group by
expressions, and where collatable types were involved; whereas the
supposedly identical CASE WHEN expr = val ... would succeed.

Backpatch all the way; this appears to have been wrong ever since
collations were introduced.

Per report from Guillaume Lelarge, analysis and patch by me.

Discussion: https://postgr.es/m/CAECtzeVSO_US8C2Khgfv54ZMUOBR4sWq+6_bLrETnWExHT=rFg@mail.gmail.com
Discussion: https://postgr.es/m/87muo0k0c7.fsf@news-spur.riddles.org.uk
---
 src/backend/parser/analyze.c             | 18 ++++++++++++------
 src/test/regress/expected/aggregates.out | 19 +++++++++++++++++++
 src/test/regress/sql/aggregates.sql      |  8 ++++++++
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index a3b2300408a..e18e23409c8 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -391,11 +391,13 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt)
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 	qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
 	qry->hasAggs = pstate->p_hasAggs;
-	if (pstate->p_hasAggs)
-		parseCheckAggregates(pstate, qry);
 
 	assign_query_collations(pstate, qry);
 
+	/* this must be done after collations, for reliable comparison of exprs */
+	if (pstate->p_hasAggs)
+		parseCheckAggregates(pstate, qry);
+
 	return qry;
 }
 
@@ -1010,8 +1012,6 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 	qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
 	qry->hasAggs = pstate->p_hasAggs;
-	if (pstate->p_hasAggs || qry->groupClause || qry->havingQual)
-		parseCheckAggregates(pstate, qry);
 
 	foreach(l, stmt->lockingClause)
 	{
@@ -1021,6 +1021,10 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 
 	assign_query_collations(pstate, qry);
 
+	/* this must be done after collations, for reliable comparison of exprs */
+	if (pstate->p_hasAggs || qry->groupClause || qry->havingQual)
+		parseCheckAggregates(pstate, qry);
+
 	return qry;
 }
 
@@ -1470,8 +1474,6 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 	qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
 	qry->hasAggs = pstate->p_hasAggs;
-	if (pstate->p_hasAggs || qry->groupClause || qry->havingQual)
-		parseCheckAggregates(pstate, qry);
 
 	foreach(l, lockingClause)
 	{
@@ -1481,6 +1483,10 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
 
 	assign_query_collations(pstate, qry);
 
+	/* this must be done after collations, for reliable comparison of exprs */
+	if (pstate->p_hasAggs || qry->groupClause || qry->havingQual)
+		parseCheckAggregates(pstate, qry);
+
 	return qry;
 }
 
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index a43f674a9e3..887df315f04 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1655,3 +1655,22 @@ select least_agg(variadic array[q1,q2]) from int8_tbl;
  -4567890123456789
 (1 row)
 
+-- check collation-sensitive matching between grouping expressions
+select v||'a', case v||'a' when 'aa' then 1 else 0 end, count(*)
+  from unnest(array['a','b']) u(v)
+ group by v||'a' order by 1;
+ ?column? | case | count 
+----------+------+-------
+ aa       |    1 |     1
+ ba       |    0 |     1
+(2 rows)
+
+select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*)
+  from unnest(array['a','b']) u(v)
+ group by v||'a' order by 1;
+ ?column? | case | count 
+----------+------+-------
+ aa       |    1 |     1
+ ba       |    0 |     1
+(2 rows)
+
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index c12a2d8b7b6..a6310dd18f5 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -612,3 +612,11 @@ drop view aggordview1;
 -- variadic aggregates
 select least_agg(q1,q2) from int8_tbl;
 select least_agg(variadic array[q1,q2]) from int8_tbl;
+
+-- check collation-sensitive matching between grouping expressions
+select v||'a', case v||'a' when 'aa' then 1 else 0 end, count(*)
+  from unnest(array['a','b']) u(v)
+ group by v||'a' order by 1;
+select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*)
+  from unnest(array['a','b']) u(v)
+ group by v||'a' order by 1;
-- 
GitLab