From e649796f128bd8702ba5744d36f4e8cb81f0b754 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 6 Jun 2003 15:04:03 +0000
Subject: [PATCH] Implement outer-level aggregates to conform to the SQL spec,
 with extensions to support our historical behavior.  An aggregate belongs to
 the closest query level of any of the variables in its argument, or the
 current query level if there are no variables (e.g., COUNT(*)). The
 implementation involves adding an agglevelsup field to Aggref, and treating
 outer aggregates like outer variables at planning time.

---
 doc/src/sgml/syntax.sgml                 |  27 ++-
 src/backend/catalog/heap.c               |  10 +-
 src/backend/commands/tablecmds.c         |   6 +-
 src/backend/commands/typecmds.c          |   6 +-
 src/backend/executor/nodeAgg.c           |   5 +-
 src/backend/executor/nodeSubplan.c       |   6 +-
 src/backend/nodes/copyfuncs.c            |   3 +-
 src/backend/nodes/equalfuncs.c           |   3 +-
 src/backend/nodes/outfuncs.c             |   3 +-
 src/backend/nodes/readfuncs.c            |   3 +-
 src/backend/optimizer/plan/planner.c     |  22 +-
 src/backend/optimizer/plan/subselect.c   | 215 ++++++++++++-------
 src/backend/optimizer/util/clauses.c     |  25 ++-
 src/backend/optimizer/util/var.c         | 113 +++++++++-
 src/backend/parser/analyze.c             |  12 +-
 src/backend/parser/parse_agg.c           | 250 ++++++++++++++---------
 src/backend/parser/parse_clause.c        |   5 +-
 src/backend/parser/parse_func.c          |   8 +-
 src/backend/rewrite/rewriteManip.c       |  62 +++++-
 src/include/catalog/catversion.h         |   4 +-
 src/include/nodes/primnodes.h            |   3 +-
 src/include/optimizer/subselect.h        |   5 +-
 src/include/optimizer/var.h              |   3 +-
 src/include/parser/parse_agg.h           |   4 +-
 src/test/regress/expected/aggregates.out |  20 ++
 src/test/regress/sql/aggregates.sql      |  13 ++
 26 files changed, 607 insertions(+), 229 deletions(-)

diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index b6ebb1cc434..a1c0767e4cc 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/syntax.sgml,v 1.77 2003/03/25 16:15:38 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/syntax.sgml,v 1.78 2003/06/06 15:04:01 tgl Exp $
 -->
 
 <chapter id="sql-syntax">
@@ -1094,6 +1094,29 @@ sqrt(2)
     linkend="functions-aggregate">.  Other aggregate functions may be added
     by the user. 
    </para>
+
+   <para>
+    An aggregate expression may only appear in the result list or
+    <literal>HAVING</> clause of a <command>SELECT</> command.
+    It is forbidden in other clauses, such as <literal>WHERE</>,
+    because those clauses are logically evaluated before the results
+    of aggregates are formed.
+   </para>
+
+   <para>
+    When an aggregate expression appears in a subquery (see
+    <xref linkend="sql-syntax-scalar-subqueries"> and
+    <xref linkend="functions-subquery">), the aggregate is normally
+    evaluated over the rows of the subquery.  But an exception occurs
+    if the aggregate's argument contains only outer-level variables:
+    the aggregate then belongs to the nearest such outer level, and is
+    evaluated over the rows of that query.  The aggregate expression
+    as a whole is then an outer reference for the subquery it appears in,
+    and acts as a constant over any one evaluation of that subquery.
+    The restriction about
+    appearing only in the result list or <literal>HAVING</> clause
+    applies with respect to the query level that the aggregate belongs to.
+   </para>
   </sect2>
 
   <sect2 id="sql-syntax-type-casts">
@@ -1164,7 +1187,7 @@ CAST ( <replaceable>expression</replaceable> AS <replaceable>type</replaceable>
    </para>
   </sect2>
 
-  <sect2>
+  <sect2 id="sql-syntax-scalar-subqueries">
    <title>Scalar Subqueries</title>
 
    <para>
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index fe57ab7bad3..713c47eb608 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.245 2003/05/28 16:03:55 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.246 2003/06/06 15:04:01 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1619,9 +1619,9 @@ AddRelationRawConstraints(Relation rel,
 		/*
 		 * No subplans or aggregates, either...
 		 */
-		if (contain_subplans(expr))
+		if (pstate->p_hasSubLinks)
 			elog(ERROR, "cannot use subselect in CHECK constraint expression");
-		if (contain_agg_clause(expr))
+		if (pstate->p_hasAggs)
 			elog(ERROR, "cannot use aggregate function in CHECK constraint expression");
 
 		/*
@@ -1738,9 +1738,9 @@ cookDefault(ParseState *pstate,
 	/*
 	 * No subplans or aggregates, either...
 	 */
-	if (contain_subplans(expr))
+	if (pstate->p_hasSubLinks)
 		elog(ERROR, "cannot use subselects in DEFAULT clause");
-	if (contain_agg_clause(expr))
+	if (pstate->p_hasAggs)
 		elog(ERROR, "cannot use aggregate functions in DEFAULT clause");
 
 	/*
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 349fb8f3917..c463c8bd1b7 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/tablecmds.c,v 1.73 2003/05/28 16:03:56 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/tablecmds.c,v 1.74 2003/06/06 15:04:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2870,9 +2870,9 @@ AlterTableAddCheckConstraint(Relation rel, Constraint *constr)
 	/*
 	 * No subplans or aggregates, either...
 	 */
-	if (contain_subplans(expr))
+	if (pstate->p_hasSubLinks)
 		elog(ERROR, "cannot use subselect in CHECK constraint expression");
-	if (contain_agg_clause(expr))
+	if (pstate->p_hasAggs)
 		elog(ERROR, "cannot use aggregate function in CHECK constraint expression");
 
 	/*
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 2036b9e714a..5a16e53e983 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/typecmds.c,v 1.36 2003/05/09 23:01:45 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/typecmds.c,v 1.37 2003/06/06 15:04:01 tgl Exp $
  *
  * DESCRIPTION
  *	  The "DefineFoo" routines take the parse tree and pick out the
@@ -1720,9 +1720,9 @@ domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid,
 	/*
 	 * No subplans or aggregates, either...
 	 */
-	if (contain_subplans(expr))
+	if (pstate->p_hasSubLinks)
 		elog(ERROR, "cannot use subselect in CHECK constraint expression");
-	if (contain_agg_clause(expr))
+	if (pstate->p_hasAggs)
 		elog(ERROR, "cannot use aggregate function in CHECK constraint expression");
 
 	/*
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 603df5ed1c4..f2499cb4e5e 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -45,7 +45,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.105 2003/05/30 20:23:10 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.106 2003/06/06 15:04:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1179,6 +1179,9 @@ ExecInitAgg(Agg *node, EState *estate)
 		Datum		textInitVal;
 		int			i;
 
+		/* Planner should have assigned aggregate to correct level */
+		Assert(aggref->agglevelsup == 0);
+
 		/* Look for a previous duplicate aggregate */
 		for (i = 0; i <= aggno; i++)
 		{
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 5d9bdc84241..ff5d03faf8c 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.45 2003/04/08 23:20:01 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.46 2003/06/06 15:04:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -240,7 +240,9 @@ ExecScanSubPlan(SubPlanState *node,
 	oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
 
 	/*
-	 * Set Params of this plan from parent plan correlation Vars
+	 * Set Params of this plan from parent plan correlation values.
+	 * (Any calculation we have to do is done in the parent econtext,
+	 * since the Param values don't need to have per-query lifetime.)
 	 */
 	pvar = node->args;
 	foreach(lst, subplan->parParam)
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 4a5f858d7b0..12b82fc5bcb 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.251 2003/05/28 16:03:56 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.252 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -725,6 +725,7 @@ _copyAggref(Aggref *from)
 	COPY_SCALAR_FIELD(aggfnoid);
 	COPY_SCALAR_FIELD(aggtype);
 	COPY_NODE_FIELD(target);
+	COPY_SCALAR_FIELD(agglevelsup);
 	COPY_SCALAR_FIELD(aggstar);
 	COPY_SCALAR_FIELD(aggdistinct);
 
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index ff5400f6ee8..40211231c6f 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.194 2003/05/28 16:03:56 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.195 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -202,6 +202,7 @@ _equalAggref(Aggref *a, Aggref *b)
 	COMPARE_SCALAR_FIELD(aggfnoid);
 	COMPARE_SCALAR_FIELD(aggtype);
 	COMPARE_NODE_FIELD(target);
+	COMPARE_SCALAR_FIELD(agglevelsup);
 	COMPARE_SCALAR_FIELD(aggstar);
 	COMPARE_SCALAR_FIELD(aggdistinct);
 
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index a1b238c93cf..cec7f09f0a9 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.206 2003/05/28 16:03:56 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.207 2003/06/06 15:04:02 tgl Exp $
  *
  * NOTES
  *	  Every node type that can appear in stored rules' parsetrees *must*
@@ -613,6 +613,7 @@ _outAggref(StringInfo str, Aggref *node)
 	WRITE_OID_FIELD(aggfnoid);
 	WRITE_OID_FIELD(aggtype);
 	WRITE_NODE_FIELD(target);
+	WRITE_UINT_FIELD(agglevelsup);
 	WRITE_BOOL_FIELD(aggstar);
 	WRITE_BOOL_FIELD(aggdistinct);
 }
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 68daca4b555..7d3a1506a81 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.153 2003/05/06 00:20:32 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.154 2003/06/06 15:04:02 tgl Exp $
  *
  * NOTES
  *	  Path and Plan nodes do not have any readfuncs support, because we
@@ -413,6 +413,7 @@ _readAggref(void)
 	READ_OID_FIELD(aggfnoid);
 	READ_OID_FIELD(aggtype);
 	READ_NODE_FIELD(target);
+	READ_UINT_FIELD(agglevelsup);
 	READ_BOOL_FIELD(aggstar);
 	READ_BOOL_FIELD(aggdistinct);
 
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index eca7a908f7a..fdb5519862f 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.153 2003/05/06 00:20:32 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.154 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,7 +75,7 @@ planner(Query *parse, bool isCursor, int cursorOptions)
 	double		tuple_fraction;
 	Plan	   *result_plan;
 	Index		save_PlannerQueryLevel;
-	List	   *save_PlannerParamVar;
+	List	   *save_PlannerParamList;
 
 	/*
 	 * The planner can be called recursively (an example is when
@@ -91,11 +91,11 @@ planner(Query *parse, bool isCursor, int cursorOptions)
 	 * subquery_planner, not here.
 	 */
 	save_PlannerQueryLevel = PlannerQueryLevel;
-	save_PlannerParamVar = PlannerParamVar;
+	save_PlannerParamList = PlannerParamList;
 
 	/* Initialize state for handling outer-level references and params */
 	PlannerQueryLevel = 0;		/* will be 1 in top-level subquery_planner */
-	PlannerParamVar = NIL;
+	PlannerParamList = NIL;
 
 	/* Determine what fraction of the plan is likely to be scanned */
 	if (isCursor)
@@ -130,14 +130,14 @@ planner(Query *parse, bool isCursor, int cursorOptions)
 	}
 
 	/* executor wants to know total number of Params used overall */
-	result_plan->nParamExec = length(PlannerParamVar);
+	result_plan->nParamExec = length(PlannerParamList);
 
 	/* final cleanup of the plan */
 	set_plan_references(result_plan, parse->rtable);
 
 	/* restore state for outer planner, if any */
 	PlannerQueryLevel = save_PlannerQueryLevel;
-	PlannerParamVar = save_PlannerParamVar;
+	PlannerParamList = save_PlannerParamList;
 
 	return result_plan;
 }
@@ -261,8 +261,7 @@ subquery_planner(Query *parse, double tuple_fraction)
 	 *
 	 * Note that both havingQual and parse->jointree->quals are in
 	 * implicitly-ANDed-list form at this point, even though they are
-	 * declared as Node *.	Also note that contain_agg_clause does not
-	 * recurse into sub-selects, which is exactly what we need here.
+	 * declared as Node *.
 	 */
 	newHaving = NIL;
 	foreach(lst, (List *) parse->havingQual)
@@ -397,6 +396,11 @@ preprocess_expression(Query *parse, Node *expr, int kind)
 	if (parse->hasSubLinks)
 		expr = SS_process_sublinks(expr, (kind == EXPRKIND_QUAL));
 
+	/*
+	 * XXX do not insert anything here unless you have grokked the comments
+	 * in SS_replace_correlation_vars ...
+	 */
+
 	/* Replace uplevel vars with Param nodes */
 	if (PlannerQueryLevel > 1)
 		expr = SS_replace_correlation_vars(expr);
@@ -1356,7 +1360,7 @@ make_subplanTargetList(Query *parse,
 	 * If we're not grouping or aggregating, nothing to do here;
 	 * query_planner should receive the unmodified target list.
 	 */
-	if (!parse->hasAggs && !parse->groupClause && !parse->havingQual)
+	if (!parse->hasAggs && !parse->groupClause)
 	{
 		*need_tlist_eval = true;
 		return tlist;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 4be69d77cfd..930c3133030 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.75 2003/04/29 22:13:09 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.76 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,6 +28,7 @@
 #include "parser/parse_expr.h"
 #include "parser/parse_oper.h"
 #include "parser/parse_relation.h"
+#include "rewrite/rewriteManip.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
@@ -35,27 +36,42 @@
 
 Index		PlannerQueryLevel;	/* level of current query */
 List	   *PlannerInitPlan;	/* init subplans for current query */
-List	   *PlannerParamVar;	/* to get Var from Param->paramid */
+List	   *PlannerParamList;	/* to keep track of cross-level Params */
 
 int			PlannerPlanId = 0;	/* to assign unique ID to subquery plans */
 
-/*--------------------
- * PlannerParamVar is a list of Var nodes, wherein the n'th entry
- * (n counts from 0) corresponds to Param->paramid = n.  The Var nodes
- * are ordinary except for one thing: their varlevelsup field does NOT
- * have the usual interpretation of "subplan levels out from current".
- * Instead, it contains the absolute plan level, with the outermost
- * plan being level 1 and nested plans having higher level numbers.
- * This nonstandardness is useful because we don't have to run around
- * and update the list elements when we enter or exit a subplan
- * recursion level.  But we must pay attention not to confuse this
- * meaning with the normal meaning of varlevelsup.
+/*
+ * PlannerParamList keeps track of the PARAM_EXEC slots that we have decided
+ * we need for the query.  At runtime these slots are used to pass values
+ * either down into subqueries (for outer references in subqueries) or up out
+ * of subqueries (for the results of a subplan).  The n'th entry in the list
+ * (n counts from 0) corresponds to Param->paramid = n.
+ *
+ * Each ParamList item shows the absolute query level it is associated with,
+ * where the outermost query is level 1 and nested subqueries have higher
+ * numbers.  The item the parameter slot represents can be one of three kinds:
+ *
+ * A Var: the slot represents a variable of that level that must be passed
+ * down because subqueries have outer references to it.  The varlevelsup
+ * value in the Var will always be zero.
+ *
+ * An Aggref (with an expression tree representing its argument): the slot
+ * represents an aggregate expression that is an outer reference for some
+ * subquery.  The Aggref itself has agglevelsup = 0, and its argument tree
+ * is adjusted to match in level.
  *
- * We also need to create Param slots that don't correspond to any outer Var.
- * For these, we set varno = 0 and varlevelsup = 0, so that they can't
- * accidentally match an outer Var.
- *--------------------
+ * A Param: the slot holds the result of a subplan (it is a setParam item
+ * for that subplan).  The absolute level shown for such items corresponds
+ * to the parent query of the subplan.
+ *
+ * Note: we detect duplicate Var parameters and coalesce them into one slot,
+ * but we do not do this for Aggref or Param slots.
  */
+typedef struct PlannerParamItem
+{
+	Node	   *item;			/* the Var, Aggref, or Param */
+	Index		abslevel;		/* its absolute query level */
+} PlannerParamItem;
 
 
 typedef struct finalize_primnode_context
@@ -77,43 +93,26 @@ static Bitmapset *finalize_plan(Plan *plan, List *rtable,
 static bool finalize_primnode(Node *node, finalize_primnode_context *context);
 
 
-/*
- * Create a new entry in the PlannerParamVar list, and return its index.
- *
- * var contains the data to use, except for varlevelsup which
- * is set from the absolute level value given by varlevel.  NOTE that
- * the passed var is scribbled on and placed directly into the list!
- * Generally, caller should have just created or copied it.
- */
-static int
-new_param(Var *var, Index varlevel)
-{
-	var->varlevelsup = varlevel;
-
-	PlannerParamVar = lappend(PlannerParamVar, var);
-
-	return length(PlannerParamVar) - 1;
-}
-
 /*
  * Generate a Param node to replace the given Var,
  * which is expected to have varlevelsup > 0 (ie, it is not local).
  */
 static Param *
-replace_var(Var *var)
+replace_outer_var(Var *var)
 {
-	List	   *ppv;
 	Param	   *retval;
-	Index		varlevel;
+	List	   *ppl;
+	PlannerParamItem *pitem;
+	Index		abslevel;
 	int			i;
 
 	Assert(var->varlevelsup > 0 && var->varlevelsup < PlannerQueryLevel);
-	varlevel = PlannerQueryLevel - var->varlevelsup;
+	abslevel = PlannerQueryLevel - var->varlevelsup;
 
 	/*
-	 * If there's already a PlannerParamVar entry for this same Var, just
+	 * If there's already a PlannerParamList entry for this same Var, just
 	 * use it.	NOTE: in sufficiently complex querytrees, it is possible
-	 * for the same varno/varlevel to refer to different RTEs in different
+	 * for the same varno/abslevel to refer to different RTEs in different
 	 * parts of the parsetree, so that different fields might end up
 	 * sharing the same Param number.  As long as we check the vartype as
 	 * well, I believe that this sort of aliasing will cause no trouble.
@@ -121,22 +120,33 @@ replace_var(Var *var)
 	 * execution in each part of the tree.
 	 */
 	i = 0;
-	foreach(ppv, PlannerParamVar)
+	foreach(ppl, PlannerParamList)
 	{
-		Var		   *pvar = lfirst(ppv);
+		pitem = (PlannerParamItem *) lfirst(ppl);
+		if (pitem->abslevel == abslevel && IsA(pitem->item, Var))
+		{
+			Var	   *pvar = (Var *) pitem->item;
 
-		if (pvar->varno == var->varno &&
-			pvar->varattno == var->varattno &&
-			pvar->varlevelsup == varlevel &&
-			pvar->vartype == var->vartype)
-			break;
+			if (pvar->varno == var->varno &&
+				pvar->varattno == var->varattno &&
+				pvar->vartype == var->vartype)
+				break;
+		}
 		i++;
 	}
 
-	if (!ppv)
+	if (!ppl)
 	{
 		/* Nope, so make a new one */
-		i = new_param((Var *) copyObject(var), varlevel);
+		var = (Var *) copyObject(var);
+		var->varlevelsup = 0;
+
+		pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
+		pitem->item = (Node *) var;
+		pitem->abslevel = abslevel;
+
+		PlannerParamList = lappend(PlannerParamList, pitem);
+		/* i is already the correct index for the new item */
 	}
 
 	retval = makeNode(Param);
@@ -147,19 +157,68 @@ replace_var(Var *var)
 	return retval;
 }
 
+/*
+ * Generate a Param node to replace the given Aggref
+ * which is expected to have agglevelsup > 0 (ie, it is not local).
+ */
+static Param *
+replace_outer_agg(Aggref *agg)
+{
+	Param	   *retval;
+	PlannerParamItem *pitem;
+	Index		abslevel;
+	int			i;
+
+	Assert(agg->agglevelsup > 0 && agg->agglevelsup < PlannerQueryLevel);
+	abslevel = PlannerQueryLevel - agg->agglevelsup;
+
+	/*
+	 * It does not seem worthwhile to try to match duplicate outer aggs.
+	 * Just make a new slot every time.
+	 */
+	agg = (Aggref *) copyObject(agg);
+	IncrementVarSublevelsUp((Node *) agg, - ((int) agg->agglevelsup), 0);
+	Assert(agg->agglevelsup == 0);
+
+	pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
+	pitem->item = (Node *) agg;
+	pitem->abslevel = abslevel;
+
+	PlannerParamList = lappend(PlannerParamList, pitem);
+	i = length(PlannerParamList) - 1;
+
+	retval = makeNode(Param);
+	retval->paramkind = PARAM_EXEC;
+	retval->paramid = (AttrNumber) i;
+	retval->paramtype = agg->aggtype;
+
+	return retval;
+}
+
 /*
  * Generate a new Param node that will not conflict with any other.
+ *
+ * This is used to allocate PARAM_EXEC slots for subplan outputs.
+ *
+ * paramtypmod is currently unused but might be wanted someday.
  */
 static Param *
 generate_new_param(Oid paramtype, int32 paramtypmod)
 {
-	Var		   *var = makeVar(0, 0, paramtype, paramtypmod, 0);
-	Param	   *retval = makeNode(Param);
+	Param	   *retval;
+	PlannerParamItem *pitem;
 
+	retval = makeNode(Param);
 	retval->paramkind = PARAM_EXEC;
-	retval->paramid = (AttrNumber) new_param(var, 0);
+	retval->paramid = (AttrNumber) length(PlannerParamList);
 	retval->paramtype = paramtype;
 
+	pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
+	pitem->item = (Node *) retval;
+	pitem->abslevel = PlannerQueryLevel;
+
+	PlannerParamList = lappend(PlannerParamList, pitem);
+
 	return retval;
 }
 
@@ -256,10 +315,9 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
 	tmpset = bms_copy(plan->extParam);
 	while ((paramid = bms_first_member(tmpset)) >= 0)
 	{
-		Var		   *var = nth(paramid, PlannerParamVar);
+		PlannerParamItem *pitem = nth(paramid, PlannerParamList);
 
-		/* note varlevelsup is absolute level number */
-		if (var->varlevelsup == PlannerQueryLevel)
+		if (pitem->abslevel == PlannerQueryLevel)
 			node->parParam = lappendi(node->parParam, paramid);
 	}
 	bms_free(tmpset);
@@ -408,17 +466,14 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
 		args = NIL;
 		foreach(lst, node->parParam)
 		{
-			Var		   *var = nth(lfirsti(lst), PlannerParamVar);
-
-			var = (Var *) copyObject(var);
+			PlannerParamItem *pitem = nth(lfirsti(lst), PlannerParamList);
 
 			/*
-			 * Must fix absolute-level varlevelsup from the
-			 * PlannerParamVar entry.  But since var is at current subplan
-			 * level, this is easy:
+			 * The Var or Aggref has already been adjusted to have the
+			 * correct varlevelsup or agglevelsup.  We probably don't even
+			 * need to copy it again, but be safe.
 			 */
-			var->varlevelsup = 0;
-			args = lappend(args, var);
+			args = lappend(args, copyObject(pitem->item));
 		}
 		node->args = args;
 
@@ -682,6 +737,20 @@ convert_IN_to_join(Query *parse, SubLink *sublink)
 
 /*
  * Replace correlation vars (uplevel vars) with Params.
+ *
+ * Uplevel aggregates are replaced, too.
+ *
+ * Note: it is critical that this runs immediately after SS_process_sublinks.
+ * Since we do not recurse into the arguments of uplevel aggregates, they will
+ * get copied to the appropriate subplan args list in the parent query with
+ * uplevel vars not replaced by Params, but only adjusted in level (see
+ * replace_outer_agg).  That's exactly what we want for the vars of the parent
+ * level --- but if an aggregate's argument contains any further-up variables,
+ * they have to be replaced with Params in their turn.  That will happen when
+ * the parent level runs SS_replace_correlation_vars.  Therefore it must do
+ * so after expanding its sublinks to subplans.  And we don't want any steps
+ * in between, else those steps would never get applied to the aggregate
+ * argument expressions, either in the parent or the child level.
  */
 Node *
 SS_replace_correlation_vars(Node *expr)
@@ -698,7 +767,12 @@ replace_correlation_vars_mutator(Node *node, void *context)
 	if (IsA(node, Var))
 	{
 		if (((Var *) node)->varlevelsup > 0)
-			return (Node *) replace_var((Var *) node);
+			return (Node *) replace_outer_var((Var *) node);
+	}
+	if (IsA(node, Aggref))
+	{
+		if (((Aggref *) node)->agglevelsup > 0)
+			return (Node *) replace_outer_agg((Aggref *) node);
 	}
 	return expression_tree_mutator(node,
 								   replace_correlation_vars_mutator,
@@ -785,19 +859,18 @@ SS_finalize_plan(Plan *plan, List *rtable)
 	 * We do this once to save time in the per-plan recursion steps.
 	 */
 	paramid = 0;
-	foreach(lst, PlannerParamVar)
+	foreach(lst, PlannerParamList)
 	{
-		Var		   *var = (Var *) lfirst(lst);
+		PlannerParamItem *pitem = (PlannerParamItem *) lfirst(lst);
 
-		/* note varlevelsup is absolute level number */
-		if (var->varlevelsup < PlannerQueryLevel)
+		if (pitem->abslevel < PlannerQueryLevel)
 		{
 			/* valid outer-level parameter */
 			outer_params = bms_add_member(outer_params, paramid);
 			valid_params = bms_add_member(valid_params, paramid);
 		}
-		else if (var->varlevelsup == PlannerQueryLevel &&
-				 var->varno == 0 && var->varattno == 0)
+		else if (pitem->abslevel == PlannerQueryLevel &&
+				 IsA(pitem->item, Param))
 		{
 			/* valid local parameter (i.e., a setParam of my child) */
 			valid_params = bms_add_member(valid_params, paramid);
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 0c2e652c795..3f4ced5a553 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.138 2003/05/28 22:32:49 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.139 2003/06/06 15:04:02 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -308,6 +308,13 @@ make_ands_implicit(Expr *clause)
  *	  Recursively search for Aggref nodes within a clause.
  *
  *	  Returns true if any aggregate found.
+ *
+ * This does not descend into subqueries, and so should be used only after
+ * reduction of sublinks to subplans, or in contexts where it's known there
+ * are no subqueries.  There mustn't be outer-aggregate references either.
+ *
+ * (If you want something like this but able to deal with subqueries,
+ * see rewriteManip.c's checkExprHasAggs().)
  */
 bool
 contain_agg_clause(Node *clause)
@@ -321,8 +328,12 @@ contain_agg_clause_walker(Node *node, void *context)
 	if (node == NULL)
 		return false;
 	if (IsA(node, Aggref))
+	{
+		Assert(((Aggref *) node)->agglevelsup == 0);
 		return true;			/* abort the tree traversal and return
 								 * true */
+	}
+	Assert(!IsA(node, SubLink));
 	return expression_tree_walker(node, contain_agg_clause_walker, context);
 }
 
@@ -331,6 +342,10 @@ contain_agg_clause_walker(Node *node, void *context)
  *	  Recursively search for DISTINCT Aggref nodes within a clause.
  *
  *	  Returns true if any DISTINCT aggregate found.
+ *
+ * This does not descend into subqueries, and so should be used only after
+ * reduction of sublinks to subplans, or in contexts where it's known there
+ * are no subqueries.  There mustn't be outer-aggregate references either.
  */
 bool
 contain_distinct_agg_clause(Node *clause)
@@ -345,10 +360,12 @@ contain_distinct_agg_clause_walker(Node *node, void *context)
 		return false;
 	if (IsA(node, Aggref))
 	{
+		Assert(((Aggref *) node)->agglevelsup == 0);
 		if (((Aggref *) node)->aggdistinct)
 			return true;		/* abort the tree traversal and return
 								 * true */
 	}
+	Assert(!IsA(node, SubLink));
 	return expression_tree_walker(node, contain_distinct_agg_clause_walker, context);
 }
 
@@ -357,6 +374,10 @@ contain_distinct_agg_clause_walker(Node *node, void *context)
  *	  Recursively count the Aggref nodes in an expression tree.
  *
  *	  Note: this also checks for nested aggregates, which are an error.
+ *
+ * This does not descend into subqueries, and so should be used only after
+ * reduction of sublinks to subplans, or in contexts where it's known there
+ * are no subqueries.  There mustn't be outer-aggregate references either.
  */
 int
 count_agg_clause(Node *clause)
@@ -374,6 +395,7 @@ count_agg_clause_walker(Node *node, int *count)
 		return false;
 	if (IsA(node, Aggref))
 	{
+		Assert(((Aggref *) node)->agglevelsup == 0);
 		(*count)++;
 
 		/*
@@ -388,6 +410,7 @@ count_agg_clause_walker(Node *node, int *count)
 		 */
 		return false;
 	}
+	Assert(!IsA(node, SubLink));
 	return expression_tree_walker(node, count_agg_clause_walker,
 								  (void *) count);
 }
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index a365b7b159e..bdd5baf521a 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.50 2003/05/28 22:32:50 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.51 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -35,6 +35,12 @@ typedef struct
 	int			sublevels_up;
 } contain_var_reference_context;
 
+typedef struct
+{
+	int			min_varlevel;
+	int			sublevels_up;
+} find_minimum_var_level_context;
+
 typedef struct
 {
 	FastList	varlist;
@@ -54,6 +60,8 @@ static bool contain_var_reference_walker(Node *node,
 static bool contain_var_clause_walker(Node *node, void *context);
 static bool contain_vars_of_level_walker(Node *node, int *sublevels_up);
 static bool contain_vars_above_level_walker(Node *node, int *sublevels_up);
+static bool find_minimum_var_level_walker(Node *node,
+					   find_minimum_var_level_context *context);
 static bool pull_var_clause_walker(Node *node,
 					   pull_var_clause_context *context);
 static Node *flatten_join_alias_vars_mutator(Node *node,
@@ -325,6 +333,109 @@ contain_vars_above_level_walker(Node *node, int *sublevels_up)
 }
 
 
+/*
+ * find_minimum_var_level
+ *	  Recursively scan a clause to find the lowest variable level it
+ *	  contains --- for example, zero is returned if there are any local
+ *	  variables, one if there are no local variables but there are
+ *	  one-level-up outer references, etc.  Subqueries are scanned to see
+ *	  if they possess relevant outer references.  (But any local variables
+ *	  within subqueries are not relevant.)
+ *
+ *	  -1 is returned if the clause has no variables at all.
+ *
+ * Will recurse into sublinks.  Also, may be invoked directly on a Query.
+ */
+int
+find_minimum_var_level(Node *node)
+{
+	find_minimum_var_level_context context;
+
+	context.min_varlevel = -1;	/* signifies nothing found yet */
+	context.sublevels_up = 0;
+
+	(void) query_or_expression_tree_walker(node,
+										   find_minimum_var_level_walker,
+										   (void *) &context,
+										   0);
+
+	return context.min_varlevel;
+}
+
+static bool
+find_minimum_var_level_walker(Node *node,
+							  find_minimum_var_level_context *context)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, Var))
+	{
+		int		varlevelsup = ((Var *) node)->varlevelsup;
+
+		/* convert levelsup to frame of reference of original query */
+		varlevelsup -= context->sublevels_up;
+		/* ignore local vars of subqueries */
+		if (varlevelsup >= 0)
+		{
+			if (context->min_varlevel < 0 ||
+				context->min_varlevel > varlevelsup)
+			{
+				context->min_varlevel = varlevelsup;
+				/*
+				 * As soon as we find a local variable, we can abort the
+				 * tree traversal, since min_varlevel is then certainly 0.
+				 */
+				if (varlevelsup == 0)
+					return true;
+			}
+		}
+	}
+	/*
+	 * An Aggref must be treated like a Var of its level.  Normally we'd get
+	 * the same result from looking at the Vars in the aggregate's argument,
+	 * but this fails in the case of a Var-less aggregate call (COUNT(*)).
+	 */
+	if (IsA(node, Aggref))
+	{
+		int		agglevelsup = ((Aggref *) node)->agglevelsup;
+
+		/* convert levelsup to frame of reference of original query */
+		agglevelsup -= context->sublevels_up;
+		/* ignore local aggs of subqueries */
+		if (agglevelsup >= 0)
+		{
+			if (context->min_varlevel < 0 ||
+				context->min_varlevel > agglevelsup)
+			{
+				context->min_varlevel = agglevelsup;
+				/*
+				 * As soon as we find a local aggregate, we can abort the
+				 * tree traversal, since min_varlevel is then certainly 0.
+				 */
+				if (agglevelsup == 0)
+					return true;
+			}
+		}
+	}
+	if (IsA(node, Query))
+	{
+		/* Recurse into subselects */
+		bool		result;
+
+		context->sublevels_up++;
+		result = query_tree_walker((Query *) node,
+								   find_minimum_var_level_walker,
+								   (void *) context,
+								   0);
+		context->sublevels_up--;
+		return result;
+	}
+	return expression_tree_walker(node,
+								  find_minimum_var_level_walker,
+								  (void *) context);
+}
+
+
 /*
  * pull_var_clause
  *	  Recursively pulls all var nodes from an expression clause.
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 79b36caad75..9ac8132f08a 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- *	$Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.272 2003/05/28 16:03:56 tgl Exp $
+ *	$Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.273 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1593,7 +1593,7 @@ transformRuleStmt(ParseState *pstate, RuleStmt *stmt,
 		elog(ERROR, "Rule WHERE condition may not contain references to other relations");
 
 	/* aggregates not allowed (but subselects are okay) */
-	if (contain_agg_clause(stmt->whereClause))
+	if (pstate->p_hasAggs)
 		elog(ERROR, "Rule WHERE condition may not contain aggregate functions");
 
 	/* save info about sublinks in where clause */
@@ -1808,7 +1808,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 	qry->hasAggs = pstate->p_hasAggs;
-	if (pstate->p_hasAggs || qry->groupClause || qry->havingQual)
+	if (pstate->p_hasAggs || qry->groupClause)
 		parseCheckAggregates(pstate, qry);
 
 	if (stmt->forUpdate != NIL)
@@ -2013,7 +2013,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
 
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 	qry->hasAggs = pstate->p_hasAggs;
-	if (pstate->p_hasAggs || qry->groupClause || qry->havingQual)
+	if (pstate->p_hasAggs || qry->groupClause)
 		parseCheckAggregates(pstate, qry);
 
 	if (forUpdate != NIL)
@@ -2536,9 +2536,9 @@ transformExecuteStmt(ParseState *pstate, ExecuteStmt *stmt)
 			expr = transformExpr(pstate, expr);
 
 			/* Cannot contain subselects or aggregates */
-			if (contain_subplans(expr))
+			if (pstate->p_hasSubLinks)
 				elog(ERROR, "Cannot use subselects in EXECUTE parameters");
-			if (contain_agg_clause(expr))
+			if (pstate->p_hasAggs)
 				elog(ERROR, "Cannot use aggregates in EXECUTE parameters");
 
 			given_type_id = exprType(expr);
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 92ea3b9bd9e..49d952bf8af 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_agg.c,v 1.52 2003/04/03 18:04:09 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_agg.c,v 1.53 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,6 +19,7 @@
 #include "optimizer/var.h"
 #include "parser/parse_agg.h"
 #include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
 
 
 typedef struct
@@ -34,6 +35,147 @@ static void check_ungrouped_columns(Node *node, ParseState *pstate,
 static bool check_ungrouped_columns_walker(Node *node,
 							   check_ungrouped_columns_context *context);
 
+
+/*
+ * transformAggregateCall -
+ *		Finish initial transformation of an aggregate call
+ *
+ * parse_func.c has recognized the function as an aggregate, and has set
+ * up all the fields of the Aggref except agglevelsup.  Here we must
+ * determine which query level the aggregate actually belongs to, set
+ * agglevelsup accordingly, and mark p_hasAggs true in the corresponding
+ * pstate level.
+ */
+void
+transformAggregateCall(ParseState *pstate, Aggref *agg)
+{
+	int			min_varlevel;
+
+	/*
+	 * The aggregate's level is the same as the level of the lowest-level
+	 * variable or aggregate in its argument; or if it contains no variables
+	 * at all, we presume it to be local.
+	 */
+	min_varlevel = find_minimum_var_level((Node *) agg->target);
+
+	/*
+	 * An aggregate can't directly contain another aggregate call of the
+	 * same level (though outer aggs are okay).  We can skip this check
+	 * if we didn't find any local vars or aggs.
+	 */
+	if (min_varlevel == 0)
+	{
+		if (checkExprHasAggs((Node *) agg->target))
+			elog(ERROR, "aggregate function calls may not be nested");
+	}
+
+	if (min_varlevel < 0)
+		min_varlevel = 0;
+	agg->agglevelsup = min_varlevel;
+
+	/* Mark the correct pstate as having aggregates */
+	while (min_varlevel-- > 0)
+		pstate = pstate->parentParseState;
+	pstate->p_hasAggs = true;
+}
+
+
+/*
+ * parseCheckAggregates
+ *	Check for aggregates where they shouldn't be and improper grouping.
+ *
+ *	Ideally this should be done earlier, but it's difficult to distinguish
+ *	aggregates from plain functions at the grammar level.  So instead we
+ *	check here.  This function should be called after the target list and
+ *	qualifications are finalized.
+ */
+void
+parseCheckAggregates(ParseState *pstate, Query *qry)
+{
+	List	   *groupClauses = NIL;
+	bool		have_non_var_grouping = false;
+	List	   *lst;
+	bool		hasJoinRTEs;
+	Node	   *clause;
+
+	/* This should only be called if we found aggregates or grouping */
+	Assert(pstate->p_hasAggs || qry->groupClause);
+
+	/*
+	 * Aggregates must never appear in WHERE or JOIN/ON clauses.
+	 *
+	 * (Note this check should appear first to deliver an appropriate error
+	 * message; otherwise we are likely to complain about some innocent
+	 * variable in the target list, which is outright misleading if the
+	 * problem is in WHERE.)
+	 */
+	if (checkExprHasAggs(qry->jointree->quals))
+		elog(ERROR, "Aggregates not allowed in WHERE clause");
+	if (checkExprHasAggs((Node *) qry->jointree->fromlist))
+		elog(ERROR, "Aggregates not allowed in JOIN conditions");
+
+	/*
+	 * No aggregates allowed in GROUP BY clauses, either.
+	 *
+	 * While we are at it, build a list of the acceptable GROUP BY
+	 * expressions for use by check_ungrouped_columns() (this avoids
+	 * repeated scans of the targetlist within the recursive routine...).
+	 * And detect whether any of the expressions aren't simple Vars.
+	 */
+	foreach(lst, qry->groupClause)
+	{
+		GroupClause *grpcl = (GroupClause *) lfirst(lst);
+		Node	   *expr;
+
+		expr = get_sortgroupclause_expr(grpcl, qry->targetList);
+		if (expr == NULL)
+			continue;			/* probably cannot happen */
+		if (checkExprHasAggs(expr))
+			elog(ERROR, "Aggregates not allowed in GROUP BY clause");
+		groupClauses = lcons(expr, groupClauses);
+		if (!IsA(expr, Var))
+			have_non_var_grouping = true;
+	}
+
+	/*
+	 * If there are join alias vars involved, we have to flatten them
+	 * to the underlying vars, so that aliased and unaliased vars will be
+	 * correctly taken as equal.  We can skip the expense of doing this
+	 * if no rangetable entries are RTE_JOIN kind.
+	 */
+	hasJoinRTEs = false;
+	foreach(lst, pstate->p_rtable)
+	{
+		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lst);
+
+		if (rte->rtekind == RTE_JOIN)
+		{
+			hasJoinRTEs = true;
+			break;
+		}
+	}
+
+	if (hasJoinRTEs)
+		groupClauses = (List *) flatten_join_alias_vars(qry,
+														(Node *) groupClauses);
+
+	/*
+	 * Check the targetlist and HAVING clause for ungrouped variables.
+	 */
+	clause = (Node *) qry->targetList;
+	if (hasJoinRTEs)
+		clause = flatten_join_alias_vars(qry, clause);
+	check_ungrouped_columns(clause, pstate,
+							groupClauses, have_non_var_grouping);
+
+	clause = (Node *) qry->havingQual;
+	if (hasJoinRTEs)
+		clause = flatten_join_alias_vars(qry, clause);
+	check_ungrouped_columns(clause, pstate,
+							groupClauses, have_non_var_grouping);
+}
+
+
 /*
  * check_ungrouped_columns -
  *	  Scan the given expression tree for ungrouped variables (variables
@@ -81,10 +223,15 @@ check_ungrouped_columns_walker(Node *node,
 		return false;			/* constants are always acceptable */
 
 	/*
-	 * If we find an aggregate function, do not recurse into its
-	 * arguments; ungrouped vars in the arguments are not an error.
+	 * If we find an aggregate call of the original level, do not recurse
+	 * into its arguments; ungrouped vars in the arguments are not an error.
+	 * We can also skip looking at the arguments of aggregates of higher
+	 * levels, since they could not possibly contain Vars that are of concern
+	 * to us (see transformAggregateCall).  We do need to look into the
+	 * arguments of aggregates of lower levels, however.
 	 */
-	if (IsA(node, Aggref))
+	if (IsA(node, Aggref) &&
+		(int) ((Aggref *) node)->agglevelsup >= context->sublevels_up)
 		return false;
 
 	/*
@@ -165,98 +312,3 @@ check_ungrouped_columns_walker(Node *node,
 	return expression_tree_walker(node, check_ungrouped_columns_walker,
 								  (void *) context);
 }
-
-/*
- * parseCheckAggregates
- *	Check for aggregates where they shouldn't be and improper grouping.
- *
- *	Ideally this should be done earlier, but it's difficult to distinguish
- *	aggregates from plain functions at the grammar level.  So instead we
- *	check here.  This function should be called after the target list and
- *	qualifications are finalized.
- */
-void
-parseCheckAggregates(ParseState *pstate, Query *qry)
-{
-	List	   *groupClauses = NIL;
-	bool		have_non_var_grouping = false;
-	List	   *lst;
-	bool		hasJoinRTEs;
-	Node	   *clause;
-
-	/* This should only be called if we found aggregates, GROUP, or HAVING */
-	Assert(pstate->p_hasAggs || qry->groupClause || qry->havingQual);
-
-	/*
-	 * Aggregates must never appear in WHERE or JOIN/ON clauses.
-	 *
-	 * (Note this check should appear first to deliver an appropriate error
-	 * message; otherwise we are likely to complain about some innocent
-	 * variable in the target list, which is outright misleading if the
-	 * problem is in WHERE.)
-	 */
-	if (contain_agg_clause(qry->jointree->quals))
-		elog(ERROR, "Aggregates not allowed in WHERE clause");
-	if (contain_agg_clause((Node *) qry->jointree->fromlist))
-		elog(ERROR, "Aggregates not allowed in JOIN conditions");
-
-	/*
-	 * No aggregates allowed in GROUP BY clauses, either.
-	 *
-	 * While we are at it, build a list of the acceptable GROUP BY
-	 * expressions for use by check_ungrouped_columns() (this avoids
-	 * repeated scans of the targetlist within the recursive routine...).
-	 * And detect whether any of the expressions aren't simple Vars.
-	 */
-	foreach(lst, qry->groupClause)
-	{
-		GroupClause *grpcl = (GroupClause *) lfirst(lst);
-		Node	   *expr;
-
-		expr = get_sortgroupclause_expr(grpcl, qry->targetList);
-		if (expr == NULL)
-			continue;			/* probably cannot happen */
-		if (contain_agg_clause(expr))
-			elog(ERROR, "Aggregates not allowed in GROUP BY clause");
-		groupClauses = lcons(expr, groupClauses);
-		if (!IsA(expr, Var))
-			have_non_var_grouping = true;
-	}
-
-	/*
-	 * If there are join alias vars involved, we have to flatten them
-	 * to the underlying vars, so that aliased and unaliased vars will be
-	 * correctly taken as equal.  We can skip the expense of doing this
-	 * if no rangetable entries are RTE_JOIN kind.
-	 */
-	hasJoinRTEs = false;
-	foreach(lst, pstate->p_rtable)
-	{
-		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lst);
-
-		if (rte->rtekind == RTE_JOIN)
-		{
-			hasJoinRTEs = true;
-			break;
-		}
-	}
-
-	if (hasJoinRTEs)
-		groupClauses = (List *) flatten_join_alias_vars(qry,
-														(Node *) groupClauses);
-
-	/*
-	 * Check the targetlist and HAVING clause for ungrouped variables.
-	 */
-	clause = (Node *) qry->targetList;
-	if (hasJoinRTEs)
-		clause = flatten_join_alias_vars(qry, clause);
-	check_ungrouped_columns(clause, pstate,
-							groupClauses, have_non_var_grouping);
-
-	clause = (Node *) qry->havingQual;
-	if (hasJoinRTEs)
-		clause = flatten_join_alias_vars(qry, clause);
-	check_ungrouped_columns(clause, pstate,
-							groupClauses, have_non_var_grouping);
-}
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 1c8cb8bc0e3..a29eb007fb7 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.113 2003/04/29 22:13:10 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.114 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -30,6 +30,7 @@
 #include "parser/parse_relation.h"
 #include "parser/parse_target.h"
 #include "parser/parse_type.h"
+#include "rewrite/rewriteManip.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
 
@@ -494,7 +495,7 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r)
 	 */
 	if (pstate->p_hasAggs)
 	{
-		if (contain_agg_clause(funcexpr))
+		if (checkExprHasAggs(funcexpr))
 			elog(ERROR, "cannot use aggregate function in FROM function expression");
 	}
 
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index e9a40e03179..6f858e17a8b 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_func.c,v 1.148 2003/05/26 00:11:27 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_func.c,v 1.149 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,6 +20,7 @@
 #include "catalog/pg_proc.h"
 #include "lib/stringinfo.h"
 #include "nodes/makefuncs.h"
+#include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_func.h"
@@ -336,12 +337,13 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 		aggref->aggstar = agg_star;
 		aggref->aggdistinct = agg_distinct;
 
+		/* parse_agg.c does additional aggregate-specific processing */
+		transformAggregateCall(pstate, aggref);
+
 		retval = (Node *) aggref;
 
 		if (retset)
 			elog(ERROR, "Aggregates may not return sets");
-
-		pstate->p_hasAggs = true;
 	}
 
 	return retval;
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 3943b9d2378..0d3dbe7d6e6 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteManip.c,v 1.71 2003/02/08 20:20:55 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteManip.c,v 1.72 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,7 +22,13 @@
 #include "utils/lsyscache.h"
 
 
-static bool checkExprHasAggs_walker(Node *node, void *context);
+typedef struct
+{
+	int			sublevels_up;
+} checkExprHasAggs_context;
+
+static bool checkExprHasAggs_walker(Node *node,
+									checkExprHasAggs_context *context);
 static bool checkExprHasSubLink_walker(Node *node, void *context);
 static Relids offset_relid_set(Relids relids, int offset);
 static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid);
@@ -32,29 +38,55 @@ static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid);
  * checkExprHasAggs -
  *	Queries marked hasAggs might not have them any longer after
  *	rewriting. Check it.
+ *
+ * The objective of this routine is to detect whether there are aggregates
+ * belonging to the initial query level.  Aggregates belonging to subqueries
+ * or outer queries do NOT cause a true result.  We must recurse into
+ * subqueries to detect outer-reference aggregates that logically belong to
+ * the initial query level.
  */
 bool
 checkExprHasAggs(Node *node)
 {
+	checkExprHasAggs_context context;
+
+	context.sublevels_up = 0;
 	/*
-	 * If a Query is passed, examine it --- but we will not recurse into
-	 * sub-Queries.
+	 * Must be prepared to start with a Query or a bare expression tree;
+	 * if it's a Query, we don't want to increment sublevels_up.
 	 */
 	return query_or_expression_tree_walker(node,
 										   checkExprHasAggs_walker,
-										   NULL,
-										   QTW_IGNORE_RT_SUBQUERIES);
+										   (void *) &context,
+										   0);
 }
 
 static bool
-checkExprHasAggs_walker(Node *node, void *context)
+checkExprHasAggs_walker(Node *node, checkExprHasAggs_context *context)
 {
 	if (node == NULL)
 		return false;
 	if (IsA(node, Aggref))
-		return true;			/* abort the tree traversal and return
+	{
+		if (((Aggref *) node)->agglevelsup == context->sublevels_up)
+			return true;		/* abort the tree traversal and return
 								 * true */
-	return expression_tree_walker(node, checkExprHasAggs_walker, context);
+		/* else fall through to examine argument */
+	}
+	if (IsA(node, Query))
+	{
+		/* Recurse into subselects */
+		bool		result;
+
+		context->sublevels_up++;
+		result = query_tree_walker((Query *) node,
+								   checkExprHasAggs_walker,
+								   (void *) context, 0);
+		context->sublevels_up--;
+		return result;
+	}
+	return expression_tree_walker(node, checkExprHasAggs_walker,
+								  (void *) context);
 }
 
 /*
@@ -380,6 +412,8 @@ adjust_relid_set(Relids relids, int oldrelid, int newrelid)
  * that sublink are not affected, only outer references to vars that belong
  * to the expression's original query level or parents thereof.
  *
+ * Aggref nodes are adjusted similarly.
+ *
  * NOTE: although this has the form of a walker, we cheat and modify the
  * Var nodes in-place.	The given expression tree should have been copied
  * earlier to ensure that no unwanted side-effects occur!
@@ -403,7 +437,15 @@ IncrementVarSublevelsUp_walker(Node *node,
 
 		if (var->varlevelsup >= context->min_sublevels_up)
 			var->varlevelsup += context->delta_sublevels_up;
-		return false;
+		return false;			/* done here */
+	}
+	if (IsA(node, Aggref))
+	{
+		Aggref	   *agg = (Aggref *) node;
+
+		if (agg->agglevelsup >= context->min_sublevels_up)
+			agg->agglevelsup += context->delta_sublevels_up;
+		/* fall through to recurse into argument */
 	}
 	if (IsA(node, Query))
 	{
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 443af8e8d6a..209bd5ff242 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.197 2003/05/28 16:03:59 tgl Exp $
+ * $Id: catversion.h,v 1.198 2003/06/06 15:04:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200305271
+#define CATALOG_VERSION_NO	200306051
 
 #endif
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 558621c9006..12af0fd0925 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: primnodes.h,v 1.82 2003/05/06 00:20:33 tgl Exp $
+ * $Id: primnodes.h,v 1.83 2003/06/06 15:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -223,6 +223,7 @@ typedef struct Aggref
 	Oid			aggfnoid;		/* pg_proc Oid of the aggregate */
 	Oid			aggtype;		/* type Oid of result of the aggregate */
 	Expr	   *target;			/* expression we are aggregating on */
+	Index		agglevelsup;	/* > 0 if agg belongs to outer query */
 	bool		aggstar;		/* TRUE if argument was really '*' */
 	bool		aggdistinct;	/* TRUE if it's agg(DISTINCT ...) */
 } Aggref;
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 9dce88e0a8d..c3c7462d04a 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -5,18 +5,19 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: subselect.h,v 1.18 2003/02/09 00:30:41 tgl Exp $
+ * $Id: subselect.h,v 1.19 2003/06/06 15:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef SUBSELECT_H
 #define SUBSELECT_H
 
+#include "nodes/parsenodes.h"
 #include "nodes/plannodes.h"
 
 extern Index PlannerQueryLevel; /* level of current query */
 extern List *PlannerInitPlan;	/* init subplans for current query */
-extern List *PlannerParamVar;	/* to get Var from Param->paramid */
+extern List *PlannerParamList;	/* to keep track of cross-level Params */
 extern int	PlannerPlanId;		/* to assign unique ID to subquery plans */
 
 extern Node *convert_IN_to_join(Query *parse, SubLink *sublink);
diff --git a/src/include/optimizer/var.h b/src/include/optimizer/var.h
index 3c84020ef92..82124627f78 100644
--- a/src/include/optimizer/var.h
+++ b/src/include/optimizer/var.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: var.h,v 1.26 2003/02/08 20:20:55 tgl Exp $
+ * $Id: var.h,v 1.27 2003/06/06 15:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -24,6 +24,7 @@ extern bool contain_whole_tuple_var(Node *node, int varno, int levelsup);
 extern bool contain_var_clause(Node *node);
 extern bool contain_vars_of_level(Node *node, int levelsup);
 extern bool contain_vars_above_level(Node *node, int levelsup);
+extern int	find_minimum_var_level(Node *node);
 extern List *pull_var_clause(Node *node, bool includeUpperVars);
 extern Node *flatten_join_alias_vars(Query *root, Node *node);
 
diff --git a/src/include/parser/parse_agg.h b/src/include/parser/parse_agg.h
index 111d726bc9c..bc1e601cc22 100644
--- a/src/include/parser/parse_agg.h
+++ b/src/include/parser/parse_agg.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parse_agg.h,v 1.25 2003/01/17 03:25:04 tgl Exp $
+ * $Id: parse_agg.h,v 1.26 2003/06/06 15:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -15,6 +15,8 @@
 
 #include "parser/parse_node.h"
 
+extern void transformAggregateCall(ParseState *pstate, Aggref *agg);
+
 extern void parseCheckAggregates(ParseState *pstate, Query *qry);
 
 #endif   /* PARSE_AGG_H */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 9378ce7c9bc..a0009eed690 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -137,3 +137,23 @@ SELECT newcnt(four) AS cnt_1000 FROM onek;
      1000
 (1 row)
 
+-- test for outer-level aggregates
+-- this should work
+select ten, sum(distinct four) from onek a
+group by ten
+having exists (select 1 from onek b where sum(distinct a.four) = b.four);
+ ten | sum 
+-----+-----
+   0 |   2
+   2 |   2
+   4 |   2
+   6 |   2
+   8 |   2
+(5 rows)
+
+-- this should fail because subquery has an agg of its own in WHERE
+select ten, sum(distinct four) from onek a
+group by ten
+having exists (select 1 from onek b
+               where sum(distinct a.four + b.four) = b.four);
+ERROR:  Aggregates not allowed in WHERE clause
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 00cc6daf9f5..38335bcf083 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -49,3 +49,16 @@ SELECT newsum(four) AS sum_1500 FROM onek;
 
 SELECT newcnt(four) AS cnt_1000 FROM onek;
 
+
+-- test for outer-level aggregates
+
+-- this should work
+select ten, sum(distinct four) from onek a
+group by ten
+having exists (select 1 from onek b where sum(distinct a.four) = b.four);
+
+-- this should fail because subquery has an agg of its own in WHERE
+select ten, sum(distinct four) from onek a
+group by ten
+having exists (select 1 from onek b
+               where sum(distinct a.four + b.four) = b.four);
-- 
GitLab