From 96ca8ffebcb43df1c575ce6831324c553a31c891 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 30 Oct 2001 19:58:58 +0000
Subject: [PATCH] Fix problems with subselects used in GROUP BY expressions,
 per gripe from Philip Warner.  Side effect of change is that GROUP BY
 expressions will not be re-evaluated at multiple plan levels anymore, whereas
 this sometimes happened with old code.

---
 src/backend/optimizer/plan/planner.c |  40 ++++----
 src/backend/optimizer/plan/setrefs.c | 137 +++++++++++++++------------
 src/backend/optimizer/util/clauses.c |  89 ++++++++++++-----
 src/include/optimizer/clauses.h      |   4 +-
 4 files changed, 167 insertions(+), 103 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d13035ca9ca..faca5728e6b 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.111 2001/10/28 06:25:45 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.112 2001/10/30 19:58:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -173,6 +173,17 @@ subquery_planner(Query *parse, double tuple_fraction)
 	parse->havingQual = preprocess_expression(parse, parse->havingQual,
 											  EXPRKIND_HAVING);
 
+	/*
+	 * Check for ungrouped variables passed to subplans in targetlist and
+	 * HAVING clause (but not in WHERE or JOIN/ON clauses, since those are
+	 * evaluated before grouping).  We can't do this any earlier because
+	 * we must use the preprocessed targetlist for comparisons of grouped
+	 * expressions.
+	 */
+	if (parse->hasSubLinks &&
+		(parse->groupClause != NIL || parse->hasAggs))
+		check_subplans_for_ungrouped_vars(parse);
+
 	/*
 	 * A HAVING clause without aggregates is equivalent to a WHERE clause
 	 * (except it can only refer to grouped fields).  Transfer any
@@ -623,23 +634,10 @@ preprocess_expression(Query *parse, Node *expr, int kind)
 #endif
 	}
 
+	/* Expand SubLinks to SubPlans */
 	if (parse->hasSubLinks)
-	{
-		/* Expand SubLinks to SubPlans */
 		expr = SS_process_sublinks(expr);
 
-		if (kind != EXPRKIND_WHERE &&
-			(parse->groupClause != NIL || parse->hasAggs))
-		{
-			/*
-			 * Check for ungrouped variables passed to subplans.  Note we
-			 * do NOT do this for subplans in WHERE (or JOIN/ON); it's
-			 * legal there because WHERE is evaluated pre-GROUP.
-			 */
-			check_subplans_for_ungrouped_vars(expr, parse);
-		}
-	}
-
 	/* Replace uplevel vars with Param nodes */
 	if (PlannerQueryLevel > 1)
 		expr = SS_replace_correlation_vars(expr);
@@ -1122,12 +1120,11 @@ grouping_planner(Query *parse, double tuple_fraction)
 
 		/*
 		 * If there are aggregates then the Group node should just return
-		 * the same set of vars as the subplan did (but we can exclude any
-		 * GROUP BY expressions).  If there are no aggregates then the
-		 * Group node had better compute the final tlist.
+		 * the same set of vars as the subplan did.  If there are no aggs
+		 * then the Group node had better compute the final tlist.
 		 */
 		if (parse->hasAggs)
-			group_tlist = flatten_tlist(result_plan->targetlist);
+			group_tlist = new_unsorted_tlist(result_plan->targetlist);
 		else
 			group_tlist = tlist;
 
@@ -1234,7 +1231,10 @@ grouping_planner(Query *parse, double tuple_fraction)
  * where the a+b target will be used by the Sort/Group steps, and the
  * other targets will be used for computing the final results.	(In the
  * above example we could theoretically suppress the a and b targets and
- * use only a+b, but it's not really worth the trouble.)
+ * pass down only c,d,a+b, but it's not really worth the trouble to
+ * eliminate simple var references from the subplan.  We will avoid doing
+ * the extra computation to recompute a+b at the outer level; see
+ * replace_vars_with_subplan_refs() in setrefs.c.)
  *
  * 'parse' is the query being processed.
  * 'tlist' is the query's target list.
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index e7f8361b9a7..652e7c294a0 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.71 2001/03/22 03:59:37 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.72 2001/10/30 19:58:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -33,7 +33,8 @@ typedef struct
 typedef struct
 {
 	Index		subvarno;
-	List	   *subplanTargetList;
+	List	   *subplan_targetlist;
+	bool		tlist_has_non_vars;
 } replace_vars_with_subplan_refs_context;
 
 static void fix_expr_references(Plan *plan, Node *node);
@@ -43,7 +44,8 @@ static Node *join_references_mutator(Node *node,
 						join_references_context *context);
 static Node *replace_vars_with_subplan_refs(Node *node,
 							   Index subvarno,
-							   List *subplanTargetList);
+							   List *subplan_targetlist,
+							   bool tlist_has_non_vars);
 static Node *replace_vars_with_subplan_refs_mutator(Node *node,
 						replace_vars_with_subplan_refs_context *context);
 static bool fix_opids_walker(Node *node, void *context);
@@ -278,75 +280,62 @@ set_join_references(Join *join)
  * qual expressions with elements of the subplan's tlist (which was
  * generated by flatten_tlist() from these selfsame expressions, so it
  * should have all the required variables).  There is an important exception,
- * however: a GROUP BY expression that is also an output expression will
- * have been pushed into the subplan tlist unflattened.  We want to detect
- * this case and reference the subplan output directly.  Therefore, check
- * for equality of the whole tlist expression to any subplan element before
- * we resort to picking the expression apart for individual Vars.
+ * however: GROUP BY and ORDER BY expressions will have been pushed into the
+ * subplan tlist unflattened.  If these values are also needed in the output
+ * then we want to reference the subplan tlist element rather than recomputing
+ * the expression.
  */
 static void
 set_uppernode_references(Plan *plan, Index subvarno)
 {
 	Plan	   *subplan = plan->lefttree;
-	List	   *subplanTargetList,
-			   *outputTargetList,
+	List	   *subplan_targetlist,
+			   *output_targetlist,
 			   *l;
+	bool		tlist_has_non_vars;
 
 	if (subplan != NULL)
-		subplanTargetList = subplan->targetlist;
+		subplan_targetlist = subplan->targetlist;
 	else
-		subplanTargetList = NIL;
+		subplan_targetlist = NIL;
 
-	outputTargetList = NIL;
-	foreach(l, plan->targetlist)
+	/*
+	 * Detect whether subplan tlist has any non-Vars (typically it won't
+	 * because it's been flattened).  This allows us to save comparisons
+	 * in common cases.
+	 */
+	tlist_has_non_vars = false;
+	foreach(l, subplan_targetlist)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
-		TargetEntry *subplantle;
-		Node	   *newexpr;
-
-		subplantle = tlistentry_member(tle->expr, subplanTargetList);
-		if (subplantle)
-		{
-			/* Found a matching subplan output expression */
-			Resdom	   *resdom = subplantle->resdom;
-			Var		   *newvar;
 
-			newvar = makeVar(subvarno,
-							 resdom->resno,
-							 resdom->restype,
-							 resdom->restypmod,
-							 0);
-			/* If we're just copying a simple Var, copy up original info */
-			if (subplantle->expr && IsA(subplantle->expr, Var))
-			{
-				Var		   *subvar = (Var *) subplantle->expr;
-
-				newvar->varnoold = subvar->varnoold;
-				newvar->varoattno = subvar->varoattno;
-			}
-			else
-			{
-				newvar->varnoold = 0;
-				newvar->varoattno = 0;
-			}
-			newexpr = (Node *) newvar;
-		}
-		else
+		if (tle->expr && ! IsA(tle->expr, Var))
 		{
-			/* No matching expression, so replace individual Vars */
-			newexpr = replace_vars_with_subplan_refs(tle->expr,
-													 subvarno,
-													 subplanTargetList);
+			tlist_has_non_vars = true;
+			break;
 		}
-		outputTargetList = lappend(outputTargetList,
-								   makeTargetEntry(tle->resdom, newexpr));
 	}
-	plan->targetlist = outputTargetList;
+
+	output_targetlist = NIL;
+	foreach(l, plan->targetlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+		Node	   *newexpr;
+
+		newexpr = replace_vars_with_subplan_refs(tle->expr,
+												 subvarno,
+												 subplan_targetlist,
+												 tlist_has_non_vars);
+		output_targetlist = lappend(output_targetlist,
+									makeTargetEntry(tle->resdom, newexpr));
+	}
+	plan->targetlist = output_targetlist;
 
 	plan->qual = (List *)
 		replace_vars_with_subplan_refs((Node *) plan->qual,
 									   subvarno,
-									   subplanTargetList);
+									   subplan_targetlist,
+									   tlist_has_non_vars);
 }
 
 /*
@@ -439,9 +428,16 @@ join_references_mutator(Node *node,
  * --- so this routine should only be applied to nodes whose subplans'
  * targetlists were generated via flatten_tlist() or some such method.
  *
- * 'node': the tree to be fixed (a targetlist or qual list)
+ * If tlist_has_non_vars is true, then we try to match whole subexpressions
+ * against elements of the subplan tlist, so that we can avoid recomputing
+ * expressions that were already computed by the subplan.  (This is relatively
+ * expensive, so we don't want to try it in the common case where the
+ * subplan tlist is just a flattened list of Vars.)
+ *
+ * 'node': the tree to be fixed (a target item or qual)
  * 'subvarno': varno to be assigned to all Vars
- * 'subplanTargetList': target list for subplan
+ * 'subplan_targetlist': target list for subplan
+ * 'tlist_has_non_vars': true if subplan_targetlist contains non-Var exprs
  *
  * The resulting tree is a copy of the original in which all Var nodes have
  * varno = subvarno, varattno = resno of corresponding subplan target.
@@ -450,12 +446,14 @@ join_references_mutator(Node *node,
 static Node *
 replace_vars_with_subplan_refs(Node *node,
 							   Index subvarno,
-							   List *subplanTargetList)
+							   List *subplan_targetlist,
+							   bool tlist_has_non_vars)
 {
 	replace_vars_with_subplan_refs_context context;
 
 	context.subvarno = subvarno;
-	context.subplanTargetList = subplanTargetList;
+	context.subplan_targetlist = subplan_targetlist;
+	context.tlist_has_non_vars = tlist_has_non_vars;
 	return replace_vars_with_subplan_refs_mutator(node, &context);
 }
 
@@ -468,17 +466,38 @@ replace_vars_with_subplan_refs_mutator(Node *node,
 	if (IsA(node, Var))
 	{
 		Var		   *var = (Var *) node;
-		Var		   *newvar = (Var *) copyObject(var);
 		Resdom	   *resdom;
+		Var		   *newvar;
 
-		resdom = tlist_member((Node *) var, context->subplanTargetList);
+		resdom = tlist_member((Node *) var, context->subplan_targetlist);
 		if (!resdom)
 			elog(ERROR, "replace_vars_with_subplan_refs: variable not in subplan target list");
-
+		newvar = (Var *) copyObject(var);
 		newvar->varno = context->subvarno;
 		newvar->varattno = resdom->resno;
 		return (Node *) newvar;
 	}
+	/* Try matching more complex expressions too, if tlist has any */
+	if (context->tlist_has_non_vars)
+	{
+		Resdom	   *resdom;
+
+		resdom = tlist_member(node, context->subplan_targetlist);
+		if (resdom)
+		{
+			/* Found a matching subplan output expression */
+			Var		   *newvar;
+
+			newvar = makeVar(context->subvarno,
+							 resdom->resno,
+							 resdom->restype,
+							 resdom->restypmod,
+							 0);
+			newvar->varnoold = 0; /* wasn't ever a plain Var */
+			newvar->varoattno = 0;
+			return (Node *) newvar;
+		}
+	}
 	return expression_tree_mutator(node,
 								   replace_vars_with_subplan_refs_mutator,
 								   (void *) context);
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index c92581082b2..859c409709c 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.89 2001/10/25 05:49:34 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.90 2001/10/30 19:58:58 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -39,12 +39,18 @@
 	((Node *) makeConst(BOOLOID, 1, (Datum) (val), \
 						(isnull), true, false, false))
 
+typedef struct
+{
+	Query	   *query;
+	List	   *groupClauses;
+} check_subplans_for_ungrouped_vars_context;
+
 static bool contain_agg_clause_walker(Node *node, void *context);
 static bool pull_agg_clause_walker(Node *node, List **listptr);
 static bool contain_subplans_walker(Node *node, void *context);
 static bool pull_subplans_walker(Node *node, List **listptr);
 static bool check_subplans_for_ungrouped_vars_walker(Node *node,
-										 Query *context);
+						check_subplans_for_ungrouped_vars_context *context);
 static bool contain_noncachable_functions_walker(Node *node, void *context);
 static Node *eval_const_expressions_mutator(Node *node, void *context);
 static Expr *simplify_op_or_func(Expr *expr, List *args);
@@ -517,36 +523,81 @@ pull_subplans_walker(Node *node, List **listptr)
  * planner runs.  So we do it here, after we have processed sublinks into
  * subplans.  This ought to be cleaned up someday.
  *
- * 'clause' is the expression tree to be searched for subplans.
- * 'query' provides the GROUP BY list, the target list that the group clauses
- * refer to, and the range table.
+ * A deficiency in this scheme is that any outer reference var must be
+ * grouped by itself; we don't recognize groupable expressions within
+ * subselects.  For example, consider
+ *		SELECT
+ *			(SELECT x FROM bar where y = (foo.a + foo.b))
+ *		FROM foo
+ *		GROUP BY a + b;
+ * This query will be rejected although it could be allowed.
  */
 void
-check_subplans_for_ungrouped_vars(Node *clause,
-								  Query *query)
+check_subplans_for_ungrouped_vars(Query *query)
 {
+	check_subplans_for_ungrouped_vars_context context;
+	List	*gl;
+
+	context.query = query;
+	/*
+	 * Build a list of the acceptable GROUP BY expressions for use in
+	 * the walker (to avoid repeated scans of the targetlist within the
+	 * recursive routine).
+	 */
+	context.groupClauses = NIL;
+	foreach(gl, query->groupClause)
+	{
+		GroupClause *grpcl = lfirst(gl);
+		Node	   *expr;
+
+		expr = get_sortgroupclause_expr(grpcl, query->targetList);
+		context.groupClauses = lcons(expr, context.groupClauses);
+	}
+
 	/*
-	 * No special setup needed; context for walker is just the Query
-	 * pointer
+	 * Recursively scan the targetlist and the HAVING clause.
+	 * WHERE and JOIN/ON conditions are not examined, since they are
+	 * evaluated before grouping.
 	 */
-	check_subplans_for_ungrouped_vars_walker(clause, query);
+	check_subplans_for_ungrouped_vars_walker((Node *) query->targetList,
+											 &context);
+	check_subplans_for_ungrouped_vars_walker(query->havingQual,
+											 &context);
+
+	freeList(context.groupClauses);
 }
 
 static bool
 check_subplans_for_ungrouped_vars_walker(Node *node,
-										 Query *context)
+						check_subplans_for_ungrouped_vars_context *context)
 {
+	List	   *gl;
+
 	if (node == NULL)
 		return false;
+	if (IsA(node, Const) || IsA(node, Param))
+		return false;			/* constants are always acceptable */
 
 	/*
 	 * If we find an aggregate function, do not recurse into its
 	 * arguments.  Subplans invoked within aggregate calls are allowed to
-	 * receive ungrouped variables.
+	 * receive ungrouped variables.  (This test and the next one should
+	 * match the logic in parse_agg.c's check_ungrouped_columns().)
 	 */
 	if (IsA(node, Aggref))
 		return false;
 
+	/*
+	 * Check to see if subexpression as a whole matches any GROUP BY item.
+	 * We need to do this at every recursion level so that we recognize
+	 * GROUPed-BY expressions before reaching variables within them.
+	 */
+	foreach(gl, context->groupClauses)
+	{
+		if (equal(node, lfirst(gl)))
+			return false;		/* acceptable, do not descend more */
+	}
+
 	/*
 	 * We can ignore Vars other than in subplan args lists, since the
 	 * parser already checked 'em.
@@ -564,7 +615,6 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
 			Node	   *thisarg = lfirst(t);
 			Var		   *var;
 			bool		contained_in_group_clause;
-			List	   *gl;
 
 			/*
 			 * We do not care about args that are not local variables;
@@ -582,14 +632,9 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
 			 * Else, see if it is a grouping column.
 			 */
 			contained_in_group_clause = false;
-			foreach(gl, context->groupClause)
+			foreach(gl, context->groupClauses)
 			{
-				GroupClause *gcl = lfirst(gl);
-				Node	   *groupexpr;
-
-				groupexpr = get_sortgroupclause_expr(gcl,
-													 context->targetList);
-				if (equal(thisarg, groupexpr))
+				if (equal(thisarg, lfirst(gl)))
 				{
 					contained_in_group_clause = true;
 					break;
@@ -603,8 +648,8 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
 				char	   *attname;
 
 				Assert(var->varno > 0 &&
-					   (int) var->varno <= length(context->rtable));
-				rte = rt_fetch(var->varno, context->rtable);
+					   (int) var->varno <= length(context->query->rtable));
+				rte = rt_fetch(var->varno, context->query->rtable);
 				attname = get_rte_attribute_name(rte, var->varattno);
 				elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query",
 					 rte->eref->relname, attname);
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index aa97bcc862c..da8429ca094 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: clauses.h,v 1.47 2001/10/28 06:26:07 momjian Exp $
+ * $Id: clauses.h,v 1.48 2001/10/30 19:58:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -44,7 +44,7 @@ extern List *pull_agg_clause(Node *clause);
 
 extern bool contain_subplans(Node *clause);
 extern List *pull_subplans(Node *clause);
-extern void check_subplans_for_ungrouped_vars(Node *clause, Query *query);
+extern void check_subplans_for_ungrouped_vars(Query *query);
 
 extern bool contain_noncachable_functions(Node *clause);
 
-- 
GitLab