From a8ae19ec3d13452de931736126d0786a148ee643 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Dec 1999 01:27:21 +0000
Subject: [PATCH] aggregate(DISTINCT ...) works, per SQL spec. Note this forces
 initdb because of change of Aggref node in stored rules.

---
 src/backend/executor/nodeAgg.c           | 562 +++++++++++++++--------
 src/backend/nodes/copyfuncs.c            |   4 +-
 src/backend/nodes/equalfuncs.c           |   6 +-
 src/backend/nodes/outfuncs.c             |  11 +-
 src/backend/nodes/readfuncs.c            |  10 +-
 src/backend/optimizer/util/clauses.c     |  38 +-
 src/backend/parser/parse_agg.c           |  42 +-
 src/backend/utils/adt/ruleutils.c        |  12 +-
 src/backend/utils/sort/tuplesort.c       | 263 ++++++++++-
 src/include/catalog/catversion.h         |   4 +-
 src/include/nodes/primnodes.h            |   8 +-
 src/include/optimizer/clauses.h          |   3 +-
 src/include/utils/tuplesort.h            |  19 +-
 src/test/regress/expected/aggregates.out |  36 ++
 src/test/regress/expected/rules.out      |   4 +-
 src/test/regress/sql/aggregates.sql      |   6 +
 16 files changed, 765 insertions(+), 263 deletions(-)

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 0956af455f1..0a95c92347f 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -3,15 +3,35 @@
  * nodeAgg.c
  *	  Routines to handle aggregate nodes.
  *
- * Copyright (c) 1994, Regents of the University of California
+ *	  ExecAgg evaluates each aggregate in the following steps: (initcond1,
+ *	  initcond2 are the initial values and sfunc1, sfunc2, and finalfunc are
+ *	  the transition functions.)
+ *
+ *		 value1 = initcond1
+ *		 value2 = initcond2
+ *		 foreach input_value do
+ *			value1 = sfunc1(value1, input_value)
+ *			value2 = sfunc2(value2)
+ *		 value1 = finalfunc(value1, value2)
+ *
+ *	  If initcond1 is NULL then the first non-NULL input_value is
+ *	  assigned directly to value1.  sfunc1 isn't applied until value1
+ *	  is non-NULL.
+ *
+ *	  sfunc1 is never applied when the current tuple's input_value is NULL.
+ *	  sfunc2 is applied for each tuple if the aggref is marked 'usenulls',
+ *	  otherwise it is only applied when input_value is not NULL.
+ *	  (usenulls was formerly used for COUNT(*), but is no longer needed for
+ *	  that purpose; as of 10/1999 the support for usenulls is dead code.
+ *	  I have not removed it because it seems like a potentially useful
+ *	  feature for user-defined aggregates.  We'd just need to add a
+ *	  flag column to pg_aggregate and a parameter to CREATE AGGREGATE...)
  *
  *
- * NOTE
- *	  The implementation of Agg node has been reworked to handle legal
- *	  SQL aggregates. (Do not expect POSTQUEL semantics.)	 -- ay 2/95
+ * Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.59 1999/10/30 02:35:14 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.60 1999/12/13 01:26:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,11 +40,15 @@
 
 #include "access/heapam.h"
 #include "catalog/pg_aggregate.h"
+#include "catalog/pg_operator.h"
 #include "executor/executor.h"
 #include "executor/nodeAgg.h"
 #include "optimizer/clauses.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
 #include "parser/parse_type.h"
 #include "utils/syscache.h"
+#include "utils/tuplesort.h"
 
 /*
  * AggStatePerAggData - per-aggregate working state for the Agg scan
@@ -36,6 +60,9 @@ typedef struct AggStatePerAggData
 	 * thereafter:
 	 */
 
+	/* Link to Aggref node this working state is for */
+	Aggref	   *aggref;
+
 	/* Oids of transfer functions */
 	Oid			xfn1_oid;
 	Oid			xfn2_oid;
@@ -47,6 +74,18 @@ typedef struct AggStatePerAggData
 	FmgrInfo	xfn1;
 	FmgrInfo	xfn2;
 	FmgrInfo	finalfn;
+	/*
+	 * Type of input data and Oid of sort operator to use for it;
+	 * only set/used when aggregate has DISTINCT flag.  (These are not
+	 * used directly by nodeAgg, but must be passed to the Tuplesort object.)
+	 */
+	Oid			inputType;
+	Oid			sortOperator;
+	/*
+	 * fmgr lookup data for input type's equality operator --- only set/used
+	 * when aggregate has DISTINCT flag.
+	 */
+	FmgrInfo	equalfn;
 	/*
 	 * initial values from pg_aggregate entry
 	 */
@@ -55,19 +94,29 @@ typedef struct AggStatePerAggData
 	bool		initValue1IsNull,
 				initValue2IsNull;
 	/*
-	 * We need the len and byval info for the agg's transition status types
-	 * in order to know how to copy/delete values.
+	 * We need the len and byval info for the agg's input and transition
+	 * data types in order to know how to copy/delete values.
 	 */
-	int			transtype1Len,
+	int			inputtypeLen,
+				transtype1Len,
 				transtype2Len;
-	bool		transtype1ByVal,
+	bool		inputtypeByVal,
+				transtype1ByVal,
 				transtype2ByVal;
 
 	/*
 	 * These values are working state that is initialized at the start
-	 * of an input tuple group and updated for each input tuple:
+	 * of an input tuple group and updated for each input tuple.
+	 *
+	 * For a simple (non DISTINCT) aggregate, we just feed the input values
+	 * straight to the transition functions.  If it's DISTINCT, we pass the
+	 * input values into a Tuplesort object; then at completion of the input
+	 * tuple group, we scan the sorted values, eliminate duplicates, and run
+	 * the transition functions on the rest.
 	 */
 
+	Tuplesortstate *sortstate;	/* sort object, if a DISTINCT agg */
+
 	Datum		value1,			/* current transfer values 1 and 2 */
 				value2;
 	bool		value1IsNull,
@@ -82,28 +131,248 @@ typedef struct AggStatePerAggData
 } AggStatePerAggData;
 
 
+static void initialize_aggregate (AggStatePerAgg peraggstate);
+static void advance_transition_functions (AggStatePerAgg peraggstate,
+										  Datum newVal, bool isNull);
+static void finalize_aggregate (AggStatePerAgg peraggstate,
+								Datum *resultVal, bool *resultIsNull);
+static Datum copyDatum(Datum val, int typLen, bool typByVal);
+
+
 /*
- * Helper routine to make a copy of a Datum.
- *
- * NB: input had better not be a NULL; might cause null-pointer dereference.
+ * Initialize one aggregate for a new set of input values.
  */
-static Datum
-copyDatum(Datum val, int typLen, bool typByVal)
+static void
+initialize_aggregate (AggStatePerAgg peraggstate)
 {
-	if (typByVal)
-		return val;
+	Aggref		   *aggref = peraggstate->aggref;
+
+	/*
+	 * Start a fresh sort operation for each DISTINCT aggregate.
+	 */
+	if (aggref->aggdistinct)
+	{
+		/* In case of rescan, maybe there could be an uncompleted
+		 * sort operation?  Clean it up if so.
+		 */
+		if (peraggstate->sortstate)
+			tuplesort_end(peraggstate->sortstate);
+
+		peraggstate->sortstate =
+			tuplesort_begin_datum(peraggstate->inputType,
+								  peraggstate->sortOperator,
+								  false);
+	}
+
+	/*
+	 * (Re)set value1 and value2 to their initial values.
+	 */
+	if (OidIsValid(peraggstate->xfn1_oid) &&
+		! peraggstate->initValue1IsNull)
+		peraggstate->value1 = copyDatum(peraggstate->initValue1, 
+										peraggstate->transtype1Len,
+										peraggstate->transtype1ByVal);
+	else
+		peraggstate->value1 = (Datum) NULL;
+	peraggstate->value1IsNull = peraggstate->initValue1IsNull;
+
+	if (OidIsValid(peraggstate->xfn2_oid) &&
+		! peraggstate->initValue2IsNull)
+		peraggstate->value2 = copyDatum(peraggstate->initValue2, 
+										peraggstate->transtype2Len,
+										peraggstate->transtype2ByVal);
 	else
+		peraggstate->value2 = (Datum) NULL;
+	peraggstate->value2IsNull = peraggstate->initValue2IsNull;
+
+	/* ------------------------------------------
+	 * If the initial value for the first transition function
+	 * doesn't exist in the pg_aggregate table then we will let
+	 * the first value returned from the outer procNode become
+	 * the initial value. (This is useful for aggregates like
+	 * max{} and min{}.)  The noInitValue flag signals that we
+	 * still need to do this.
+	 * ------------------------------------------
+	 */
+	peraggstate->noInitValue = peraggstate->initValue1IsNull;
+}
+
+/*
+ * Given a new input value, advance the transition functions of an aggregate.
+ *
+ * Note: if the agg does not have usenulls set, null inputs will be filtered
+ * out before reaching here.
+ */
+static void
+advance_transition_functions (AggStatePerAgg peraggstate,
+							  Datum newVal, bool isNull)
+{
+	Datum		args[2];
+
+	if (OidIsValid(peraggstate->xfn1_oid) && !isNull)
 	{
-		char   *newVal;
+		if (peraggstate->noInitValue)
+		{
+			/*
+			 * value1 has not been initialized. This is the first non-NULL
+			 * input value. We use it as the initial value for value1.
+			 *
+			 * XXX We assume, without having checked, that the agg's input
+			 * type is binary-compatible with its transtype1!
+			 *
+			 * We have to copy the datum since the tuple from which it came
+			 * will be freed on the next iteration of the scan.
+			 */
+			peraggstate->value1 = copyDatum(newVal,
+											peraggstate->transtype1Len,
+											peraggstate->transtype1ByVal);
+			peraggstate->value1IsNull = false;
+			peraggstate->noInitValue = false;
+		}
+		else
+		{
+			/* apply transition function 1 */
+			args[0] = peraggstate->value1;
+			args[1] = newVal;
+			newVal = (Datum) fmgr_c(&peraggstate->xfn1,
+									(FmgrValues *) args,
+									&isNull);
+			if (! peraggstate->transtype1ByVal)
+				pfree(peraggstate->value1);
+			peraggstate->value1 = newVal;
+		}
+	}
 
-		if (typLen == -1)		/* variable length type? */
-			typLen = VARSIZE((struct varlena *) DatumGetPointer(val));
-		newVal = (char *) palloc(typLen);
-		memcpy(newVal, DatumGetPointer(val), typLen);
-		return PointerGetDatum(newVal);
+	if (OidIsValid(peraggstate->xfn2_oid))
+	{
+		/* apply transition function 2 */
+		args[0] = peraggstate->value2;
+		isNull = false;			/* value2 cannot be null, currently */
+		newVal = (Datum) fmgr_c(&peraggstate->xfn2,
+								(FmgrValues *) args,
+								&isNull);
+		if (! peraggstate->transtype2ByVal)
+			pfree(peraggstate->value2);
+		peraggstate->value2 = newVal;
 	}
 }
 
+/*
+ * Compute the final value of one aggregate.
+ */
+static void
+finalize_aggregate (AggStatePerAgg peraggstate,
+					Datum *resultVal, bool *resultIsNull)
+{
+	Aggref	   *aggref = peraggstate->aggref;
+	char	   *args[2];
+
+	/*
+	 * If it's a DISTINCT aggregate, all we've done so far is to stuff the
+	 * input values into the sort object.  Complete the sort, then run
+	 * the transition functions on the non-duplicate values.  Note that
+	 * DISTINCT always suppresses nulls, per SQL spec, regardless of usenulls.
+	 */
+	if (aggref->aggdistinct)
+	{
+		Datum		oldVal = (Datum) 0;
+		bool		haveOldVal = false;
+		Datum		newVal;
+		bool		isNull;
+
+		tuplesort_performsort(peraggstate->sortstate);
+		while (tuplesort_getdatum(peraggstate->sortstate, true,
+								  &newVal, &isNull))
+		{
+			if (isNull)
+				continue;
+			if (haveOldVal)
+			{
+				Datum	equal;
+
+				equal = (Datum) (*fmgr_faddr(&peraggstate->equalfn)) (oldVal,
+																	  newVal);
+				if (DatumGetInt32(equal) != 0)
+				{
+					if (! peraggstate->inputtypeByVal)
+						pfree(DatumGetPointer(newVal));
+					continue;
+				}
+			}
+			advance_transition_functions(peraggstate, newVal, false);
+			if (haveOldVal && ! peraggstate->inputtypeByVal)
+				pfree(DatumGetPointer(oldVal));
+			oldVal = newVal;
+			haveOldVal = true;
+		}
+		if (haveOldVal && ! peraggstate->inputtypeByVal)
+			pfree(DatumGetPointer(oldVal));
+		tuplesort_end(peraggstate->sortstate);
+		peraggstate->sortstate = NULL;
+	}
+
+	/*
+	 * Now apply the agg's finalfn, or substitute the appropriate transition
+	 * value if there is no finalfn.
+	 *
+	 * XXX For now, only apply finalfn if we got at least one
+	 * non-null input value.  This prevents zero divide in AVG().
+	 * If we had cleaner handling of null inputs/results in functions,
+	 * we could probably take out this hack and define the result
+	 * for no inputs as whatever finalfn returns for null input.
+	 */
+	if (OidIsValid(peraggstate->finalfn_oid) &&
+		! peraggstate->noInitValue)
+	{
+		if (peraggstate->finalfn.fn_nargs > 1)
+		{
+			args[0] = (char *) peraggstate->value1;
+			args[1] = (char *) peraggstate->value2;
+		}
+		else if (OidIsValid(peraggstate->xfn1_oid))
+			args[0] = (char *) peraggstate->value1;
+		else if (OidIsValid(peraggstate->xfn2_oid))
+			args[0] = (char *) peraggstate->value2;
+		else
+			elog(ERROR, "ExecAgg: no valid transition functions??");
+		*resultIsNull = false;
+		*resultVal = (Datum) fmgr_c(&peraggstate->finalfn,
+									(FmgrValues *) args,
+									resultIsNull);
+	}
+	else if (OidIsValid(peraggstate->xfn1_oid))
+	{
+		/* Return value1 */
+		*resultVal = peraggstate->value1;
+		*resultIsNull = peraggstate->value1IsNull;
+		/* prevent pfree below */
+		peraggstate->value1IsNull = true;
+	}
+	else if (OidIsValid(peraggstate->xfn2_oid))
+	{
+		/* Return value2 */
+		*resultVal = peraggstate->value2;
+		*resultIsNull = peraggstate->value2IsNull;
+		/* prevent pfree below */
+		peraggstate->value2IsNull = true;
+	}
+	else
+		elog(ERROR, "ExecAgg: no valid transition functions??");
+
+	/*
+	 * Release any per-group working storage, unless we're passing
+	 * it back as the result of the aggregate.
+	 */
+	if (OidIsValid(peraggstate->xfn1_oid) &&
+		! peraggstate->value1IsNull &&
+		! peraggstate->transtype1ByVal)
+		pfree(peraggstate->value1);
+	
+	if (OidIsValid(peraggstate->xfn2_oid) &&
+		! peraggstate->value2IsNull &&
+		! peraggstate->transtype2ByVal)
+		pfree(peraggstate->value2);
+}
 
 /* ---------------------------------------
  *
@@ -118,30 +387,6 @@ copyDatum(Datum val, int typLen, bool typByVal)
  *	  the expression context to be used when ExecProject evaluates the
  *	  result tuple.
  *
- *	  ExecAgg evaluates each aggregate in the following steps: (initcond1,
- *	  initcond2 are the initial values and sfunc1, sfunc2, and finalfunc are
- *	  the transition functions.)
- *
- *		 value1 = initcond1
- *		 value2 = initcond2
- *		 foreach tuple do
- *			value1 = sfunc1(value1, aggregated_value)
- *			value2 = sfunc2(value2)
- *		 value1 = finalfunc(value1, value2)
- *
- *	  If initcond1 is NULL then the first non-NULL aggregated_value is
- *	  assigned directly to value1.  sfunc1 isn't applied until value1
- *	  is non-NULL.
- *
- *	  sfunc1 is never applied when the current tuple's aggregated_value
- *	  is NULL.  sfunc2 is applied for each tuple if the aggref is marked
- *	  'usenulls', otherwise it is only applied when aggregated_value is
- *	  not NULL.  (usenulls was formerly used for COUNT(*), but is no longer
- *	  needed for that purpose; as of 10/1999 the support for usenulls is
- *	  dead code.  I have not removed it because it seems like a potentially
- *	  useful feature for user-defined aggregates.  We'd just need to add a
- *	  flag column to pg_aggregate and a parameter to CREATE AGGREGATE...)
- *
  *	  If the outer subplan is a Group node, ExecAgg returns as many tuples
  *	  as there are groups.
  *
@@ -161,7 +406,6 @@ ExecAgg(Agg *node)
 	TupleTableSlot *resultSlot;
 	HeapTuple	inputTuple;
 	int			aggno;
-	List	   *alist;
 	bool		isDone;
 	bool		isNull;
 
@@ -190,42 +434,11 @@ ExecAgg(Agg *node)
 		/*
 		 * Initialize working state for a new input tuple group
 		 */
-		aggno = -1;
-		foreach(alist, aggstate->aggs)
+		for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 		{
-			AggStatePerAgg	peraggstate = &peragg[++aggno];
+			AggStatePerAgg	peraggstate = &peragg[aggno];
 
-			/*
-			 * (Re)set value1 and value2 to their initial values.
-			 */
-			if (OidIsValid(peraggstate->xfn1_oid) &&
-				! peraggstate->initValue1IsNull)
-				peraggstate->value1 = copyDatum(peraggstate->initValue1, 
-												peraggstate->transtype1Len,
-												peraggstate->transtype1ByVal);
-			else
-				peraggstate->value1 = (Datum) NULL;
-			peraggstate->value1IsNull = peraggstate->initValue1IsNull;
-
-			if (OidIsValid(peraggstate->xfn2_oid) &&
-				! peraggstate->initValue2IsNull)
-				peraggstate->value2 = copyDatum(peraggstate->initValue2, 
-												peraggstate->transtype2Len,
-												peraggstate->transtype2ByVal);
-			else
-				peraggstate->value2 = (Datum) NULL;
-			peraggstate->value2IsNull = peraggstate->initValue2IsNull;
-
-			/* ------------------------------------------
-			 * If the initial value for the first transition function
-			 * doesn't exist in the pg_aggregate table then we will let
-			 * the first value returned from the outer procNode become
-			 * the initial value. (This is useful for aggregates like
-			 * max{} and min{}.)  The noInitValue flag signals that we
-			 * still need to do this.
-			 * ------------------------------------------
-			 */
-			peraggstate->noInitValue = peraggstate->initValue1IsNull;
+			initialize_aggregate(peraggstate);
 		}
 
 		inputTuple = NULL;		/* no saved input tuple yet */
@@ -243,13 +456,11 @@ ExecAgg(Agg *node)
 				break;
 			econtext->ecxt_scantuple = outerslot;
 
-			aggno = -1;
-			foreach(alist, aggstate->aggs)
+			for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 			{
-				Aggref		   *aggref = (Aggref *) lfirst(alist);
-				AggStatePerAgg	peraggstate = &peragg[++aggno];
+				AggStatePerAgg	peraggstate = &peragg[aggno];
+				Aggref		   *aggref = peraggstate->aggref;
 				Datum			newVal;
-				Datum			args[2];
 
 				newVal = ExecEvalExpr(aggref->target, econtext,
 									  &isNull, &isDone);
@@ -257,53 +468,12 @@ ExecAgg(Agg *node)
 				if (isNull && !aggref->usenulls)
 					continue;	/* ignore this tuple for this agg */
 
-				if (OidIsValid(peraggstate->xfn1_oid) && !isNull)
-				{
-					if (peraggstate->noInitValue)
-					{
-						/*
-						 * value1 has not been initialized. This is the
-						 * first non-NULL input value. We use it as the
-						 * initial value for value1.  XXX We assume,
-						 * without having checked, that the agg's input type
-						 * is binary-compatible with its transtype1!
-						 *
-						 * We have to copy the datum since the tuple from
-						 * which it came will be freed on the next iteration
-						 * of the scan.  
-						 */
-						peraggstate->value1 = copyDatum(newVal,
-												peraggstate->transtype1Len,
-												peraggstate->transtype1ByVal);
-						peraggstate->value1IsNull = false;
-						peraggstate->noInitValue = false;
-					}
-					else
-					{
-						/* apply transition function 1 */
-						args[0] = peraggstate->value1;
-						args[1] = newVal;
-						newVal = (Datum) fmgr_c(&peraggstate->xfn1,
-												(FmgrValues *) args,
-												&isNull);
-						if (! peraggstate->transtype1ByVal)
-							pfree(peraggstate->value1);
-						peraggstate->value1 = newVal;
-					}
-				}
-
-				if (OidIsValid(peraggstate->xfn2_oid))
-				{
-					/* apply transition function 2 */
-					args[0] = peraggstate->value2;
-					isNull = false;	/* value2 cannot be null, currently */
-					newVal = (Datum) fmgr_c(&peraggstate->xfn2,
-											(FmgrValues *) args,
-											&isNull);
-					if (! peraggstate->transtype2ByVal)
-						pfree(peraggstate->value2);
-					peraggstate->value2 = newVal;
-				}
+				if (aggref->aggdistinct)
+					tuplesort_putdatum(peraggstate->sortstate,
+									   newVal, isNull);
+				else
+					advance_transition_functions(peraggstate,
+												 newVal, isNull);
 			}
 
 			/*
@@ -320,70 +490,12 @@ ExecAgg(Agg *node)
 		 * Done scanning input tuple group.
 		 * Finalize each aggregate calculation.
 		 */
-		aggno = -1;
-		foreach(alist, aggstate->aggs)
+		for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 		{
-			AggStatePerAgg	peraggstate = &peragg[++aggno];
-			char		   *args[2];
-
-			/*
-			 * XXX For now, only apply finalfn if we got at least one
-			 * non-null input value.  This prevents zero divide in AVG().
-			 * If we had cleaner handling of null inputs/results in functions,
-			 * we could probably take out this hack and define the result
-			 * for no inputs as whatever finalfn returns for null input.
-			 */
-			if (OidIsValid(peraggstate->finalfn_oid) &&
-				! peraggstate->noInitValue)
-			{
-				if (peraggstate->finalfn.fn_nargs > 1)
-				{
-					args[0] = (char *) peraggstate->value1;
-					args[1] = (char *) peraggstate->value2;
-				}
-				else if (OidIsValid(peraggstate->xfn1_oid))
-					args[0] = (char *) peraggstate->value1;
-				else if (OidIsValid(peraggstate->xfn2_oid))
-					args[0] = (char *) peraggstate->value2;
-				else
-					elog(ERROR, "ExecAgg: no valid transition functions??");
-				aggnulls[aggno] = false;
-				aggvalues[aggno] = (Datum) fmgr_c(&peraggstate->finalfn,
-												  (FmgrValues *) args,
-												  &(aggnulls[aggno]));
-			}
-			else if (OidIsValid(peraggstate->xfn1_oid))
-			{
-				/* Return value1 */
-				aggvalues[aggno] = peraggstate->value1;
-				aggnulls[aggno] = peraggstate->value1IsNull;
-				/* prevent pfree below */
-				peraggstate->value1IsNull = true;
-			}
-			else if (OidIsValid(peraggstate->xfn2_oid))
-			{
-				/* Return value2 */
-				aggvalues[aggno] = peraggstate->value2;
-				aggnulls[aggno] = peraggstate->value2IsNull;
-				/* prevent pfree below */
-				peraggstate->value2IsNull = true;
-			}
-			else
-				elog(ERROR, "ExecAgg: no valid transition functions??");
-
-			/*
-			 * Release any per-group working storage, unless we're passing
-			 * it back as the result of the aggregate.
-			 */
-			if (OidIsValid(peraggstate->xfn1_oid) &&
-				! peraggstate->value1IsNull &&
-				! peraggstate->transtype1ByVal)
-				pfree(peraggstate->value1);
+			AggStatePerAgg	peraggstate = &peragg[aggno];
 
-			if (OidIsValid(peraggstate->xfn2_oid) &&
-				! peraggstate->value2IsNull &&
-				! peraggstate->transtype2ByVal)
-				pfree(peraggstate->value2);
+			finalize_aggregate(peraggstate,
+							   & aggvalues[aggno], & aggnulls[aggno]);
 		}
 
 		/*
@@ -458,14 +570,14 @@ ExecAgg(Agg *node)
 
 		/*
 		 * Form a projection tuple using the aggregate results and the
-		 * representative input tuple.  Store it in the result tuple slot,
-		 * and return it if it meets my qual condition.
+		 * representative input tuple.  Store it in the result tuple slot.
 		 */
 		resultSlot = ExecProject(projInfo, &isDone);
 
 		/*
 		 * If the completed tuple does not match the qualifications,
 		 * it is ignored and we loop back to try to process another group.
+		 * Otherwise, return the tuple.
 		 */
 	}
 	while (! ExecQual(node->plan.qual, econtext));
@@ -505,6 +617,11 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
 
 	/*
 	 * find aggregates in targetlist and quals
+	 *
+	 * Note: pull_agg_clauses also checks that no aggs contain other agg
+	 * calls in their arguments.  This would make no sense under SQL semantics
+	 * anyway (and it's forbidden by the spec).  Because that is true, we
+	 * don't need to worry about evaluating the aggs in any particular order.
 	 */
 	aggstate->aggs = nconc(pull_agg_clause((Node *) node->plan.targetlist),
 						   pull_agg_clause((Node *) node->plan.qual));
@@ -588,6 +705,9 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
 		/* Mark Aggref node with its associated index in the result array */
 		aggref->aggno = aggno;
 
+		/* Fill in the peraggstate data */
+		peraggstate->aggref = aggref;
+
 		aggTuple = SearchSysCacheTuple(AGGNAME,
 									   PointerGetDatum(aggname),
 									   ObjectIdGetDatum(aggref->basetype),
@@ -644,6 +764,29 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
 		{
 			fmgr_info(finalfn_oid, &peraggstate->finalfn);
 		}
+
+		if (aggref->aggdistinct)
+		{
+			Oid			inputType = exprType(aggref->target);
+			Operator	eq_operator;
+			Form_pg_operator pgopform;
+
+			peraggstate->inputType = inputType;
+			typeInfo = typeidType(inputType);
+			peraggstate->inputtypeLen = typeLen(typeInfo);
+			peraggstate->inputtypeByVal = typeByVal(typeInfo);
+
+			eq_operator = oper("=", inputType, inputType, true);
+			if (!HeapTupleIsValid(eq_operator))
+			{
+				elog(ERROR, "Unable to identify an equality operator for type '%s'",
+					 typeidTypeName(inputType));
+			}
+			pgopform = (Form_pg_operator) GETSTRUCT(eq_operator);
+			fmgr_info(pgopform->oprcode, &(peraggstate->equalfn));
+			peraggstate->sortOperator = any_ordering_op(inputType);
+			peraggstate->sortstate = NULL;
+		}
 	}
 
 	return TRUE;
@@ -690,3 +833,26 @@ ExecReScanAgg(Agg *node, ExprContext *exprCtxt, Plan *parent)
 		ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node);
 
 }
+
+
+/*
+ * Helper routine to make a copy of a Datum.
+ *
+ * NB: input had better not be a NULL; might cause null-pointer dereference.
+ */
+static Datum
+copyDatum(Datum val, int typLen, bool typByVal)
+{
+	if (typByVal)
+		return val;
+	else
+	{
+		char   *newVal;
+
+		if (typLen == -1)		/* variable length type? */
+			typLen = VARSIZE((struct varlena *) DatumGetPointer(val));
+		newVal = (char *) palloc(typLen);
+		memcpy(newVal, DatumGetPointer(val), typLen);
+		return PointerGetDatum(newVal);
+	}
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 1b2726f8226..884926b9b62 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.97 1999/11/23 20:06:52 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.98 1999/12/13 01:26:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -832,6 +832,8 @@ _copyAggref(Aggref *from)
 	newnode->aggtype = from->aggtype;
 	Node_Copy(from, newnode, target);
 	newnode->usenulls = from->usenulls;
+	newnode->aggstar = from->aggstar;
+	newnode->aggdistinct = from->aggdistinct;
 	newnode->aggno = from->aggno; /* probably not needed */
 
 	return newnode;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index b35b2712754..f70fe508ae5 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.52 1999/11/23 20:06:52 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.53 1999/12/13 01:26:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -219,6 +219,10 @@ _equalAggref(Aggref *a, Aggref *b)
 		return false;
 	if (a->usenulls != b->usenulls)
 		return false;
+	if (a->aggstar != b->aggstar)
+		return false;
+	if (a->aggdistinct != b->aggdistinct)
+		return false;
 	/* ignore aggno, which is only a private field for the executor */
 	return true;
 }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 78bda61b30f..7907f1b62ef 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- *	$Id: outfuncs.c,v 1.99 1999/12/10 07:37:31 tgl Exp $
+ *	$Id: outfuncs.c,v 1.100 1999/12/13 01:26:53 tgl Exp $
  *
  * NOTES
  *	  Every (plan) node in POSTGRES has an associated "out" routine which
@@ -680,14 +680,17 @@ static void
 _outAggref(StringInfo str, Aggref *node)
 {
 	appendStringInfo(str,
-				 " AGGREG :aggname %s :basetype %u :aggtype %u :target ",
+					 " AGGREG :aggname %s :basetype %u :aggtype %u :target ",
 					 stringStringInfo(node->aggname),
 					 node->basetype,
 					 node->aggtype);
 	_outNode(str, node->target);
 
-	appendStringInfo(str, " :usenulls %s ",
-					 node->usenulls ? "true" : "false");
+	appendStringInfo(str, " :usenulls %s :aggstar %s :aggdistinct %s ",
+					 node->usenulls ? "true" : "false",
+					 node->aggstar ? "true" : "false",
+					 node->aggdistinct ? "true" : "false");
+	/* aggno is not dumped */
 }
 
 /*
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 99be5199fa9..83683ff3b10 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.75 1999/11/23 20:06:53 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.76 1999/12/13 01:26:54 tgl Exp $
  *
  * NOTES
  *	  Most of the read functions for plan nodes are tested. (In fact, they
@@ -1190,6 +1190,14 @@ _readAggref()
 	token = lsptok(NULL, &length);		/* get usenulls */
 	local_node->usenulls = (token[0] == 't') ? true : false;
 
+	token = lsptok(NULL, &length);		/* eat :aggstar */
+	token = lsptok(NULL, &length);		/* get aggstar */
+	local_node->aggstar = (token[0] == 't') ? true : false;
+
+	token = lsptok(NULL, &length);		/* eat :aggdistinct */
+	token = lsptok(NULL, &length);		/* get aggdistinct */
+	local_node->aggdistinct = (token[0] == 't') ? true : false;
+
 	return local_node;
 }
 
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 63b3ff87d9e..63eebae0603 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.56 1999/12/09 05:58:53 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.57 1999/12/13 01:26:55 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -45,6 +45,7 @@ typedef struct {
 	List	   *targetList;
 } check_subplans_for_ungrouped_vars_context;
 
+static bool contain_agg_clause_walker(Node *node, void *context);
 static bool pull_agg_clause_walker(Node *node, List **listptr);
 static bool check_subplans_for_ungrouped_vars_walker(Node *node,
 					check_subplans_for_ungrouped_vars_context *context);
@@ -393,12 +394,36 @@ pull_constant_clauses(List *quals, List **constantQual)
 	return restqual;
 }
 
+/*
+ * contain_agg_clause
+ *	  Recursively search for Aggref nodes within a clause.
+ *
+ *	  Returns true if any aggregate found.
+ */
+bool
+contain_agg_clause(Node *clause)
+{
+	return contain_agg_clause_walker(clause, NULL);
+}
+
+static bool
+contain_agg_clause_walker(Node *node, void *context)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, Aggref))
+		return true;			/* abort the tree traversal and return true */
+	return expression_tree_walker(node, contain_agg_clause_walker, context);
+}
+
 /*
  * pull_agg_clause
  *	  Recursively pulls all Aggref nodes from an expression tree.
  *
  *	  Returns list of Aggref nodes found.  Note the nodes themselves are not
  *	  copied, only referenced.
+ *
+ *	  Note: this also checks for nested aggregates, which are an error.
  */
 List *
 pull_agg_clause(Node *clause)
@@ -417,9 +442,16 @@ pull_agg_clause_walker(Node *node, List **listptr)
 	if (IsA(node, Aggref))
 	{
 		*listptr = lappend(*listptr, node);
-		/* continue, to iterate over agg's arg as well (do nested aggregates
-		 * actually work?)
+		/*
+		 * Complain if the aggregate's argument contains any aggregates;
+		 * nested agg functions are semantically nonsensical.
+		 */
+		if (contain_agg_clause(((Aggref *) node)->target))
+			elog(ERROR, "Aggregate function calls may not be nested");
+		/*
+		 * Having checked that, we need not recurse into the argument.
 		 */
+		return false;
 	}
 	return expression_tree_walker(node, pull_agg_clause_walker,
 								  (void *) listptr);
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 68280f7f4a0..21f8efe7f67 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_agg.c,v 1.31 1999/12/10 07:37:35 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_agg.c,v 1.32 1999/12/13 01:26:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,38 +28,11 @@ typedef struct {
 	List	   *groupClauses;
 } check_ungrouped_columns_context;
 
-static bool contain_agg_clause(Node *clause);
-static bool contain_agg_clause_walker(Node *node, void *context);
 static void check_ungrouped_columns(Node *node, ParseState *pstate,
 									List *groupClauses);
 static bool check_ungrouped_columns_walker(Node *node,
 										   check_ungrouped_columns_context *context);
 
-/*
- * contain_agg_clause
- *	  Recursively find aggref nodes within a clause.
- *
- *	  Returns true if any aggregate found.
- *
- * NOTE: we assume that the given clause has been transformed suitably for
- * parser output.  This means we can use the planner's expression_tree_walker.
- */
-static bool
-contain_agg_clause(Node *clause)
-{
-	return contain_agg_clause_walker(clause, NULL);
-}
-
-static bool
-contain_agg_clause_walker(Node *node, void *context)
-{
-	if (node == NULL)
-		return false;
-	if (IsA(node, Aggref))
-		return true;			/* abort the tree traversal and return true */
-	return expression_tree_walker(node, contain_agg_clause_walker, context);
-}
-
 /*
  * check_ungrouped_columns -
  *	  Scan the given expression tree for ungrouped variables (variables
@@ -232,7 +205,8 @@ ParseAgg(ParseState *pstate, char *aggname, Oid basetype,
 	 * Since "1" never evaluates as null, we currently have no need of
 	 * the "usenulls" flag, but it should be kept around; in fact, we should
 	 * extend the pg_aggregate table to let usenulls be specified as an
-	 * attribute of user-defined aggregates.
+	 * attribute of user-defined aggregates.  In the meantime, usenulls
+	 * is just always set to "false".
 	 */
 
 	aggform = (Form_pg_aggregate) GETSTRUCT(theAggTuple);
@@ -264,14 +238,8 @@ ParseAgg(ParseState *pstate, char *aggname, Oid basetype,
 	aggref->aggtype = fintype;
 	aggref->target = lfirst(args);
 	aggref->usenulls = usenulls;
-
-	/*
-	 * We should store agg_star and agg_distinct into the Aggref node,
-	 * and let downstream processing deal with them.  Currently, agg_star
-	 * is ignored and agg_distinct is not implemented...
-	 */
-	if (agg_distinct)
-		elog(ERROR, "aggregate(DISTINCT ...) is not implemented yet");
+	aggref->aggstar = agg_star;
+	aggref->aggdistinct = agg_distinct;
 
 	pstate->p_hasAggs = true;
 
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index b62559ccdde..47fd957c994 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -3,7 +3,7 @@
  *			  out of it's tuple
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.34 1999/12/06 02:37:17 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.35 1999/12/13 01:27:01 tgl Exp $
  *
  *	  This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -1352,9 +1352,13 @@ get_rule_expr(Node *node, deparse_context *context)
 			{
 				Aggref	   *aggref = (Aggref *) node;
 
-				appendStringInfo(buf, "%s(",
-								 quote_identifier(aggref->aggname));
-				get_rule_expr(aggref->target, context);
+				appendStringInfo(buf, "%s(%s",
+								 quote_identifier(aggref->aggname),
+								 aggref->aggdistinct ? "DISTINCT " : "");
+				if (aggref->aggstar)
+					appendStringInfo(buf, "*");
+				else
+					get_rule_expr(aggref->target, context);
 				appendStringInfo(buf, ")");
 			}
 			break;
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 5297fde36dc..6e9a23f1cd1 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -3,8 +3,8 @@
  * tuplesort.c
  *	  Generalized tuple sorting routines.
  *
- * This module handles sorting of either heap tuples or index tuples
- * (and could fairly easily support other kinds of sortable objects,
+ * This module handles sorting of heap tuples, index tuples, or single
+ * Datums (and could easily support other kinds of sortable objects,
  * if necessary).  It works efficiently for both small and large amounts
  * of data.  Small amounts are sorted in-memory using qsort().  Large
  * amounts are sorted using temporary files and a standard external sort
@@ -77,7 +77,7 @@
  * Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.2 1999/10/30 17:27:15 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.3 1999/12/13 01:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,7 +87,9 @@
 #include "access/heapam.h"
 #include "access/nbtree.h"
 #include "miscadmin.h"
+#include "parser/parse_type.h"
 #include "utils/logtape.h"
+#include "utils/lsyscache.h"
 #include "utils/tuplesort.h"
 
 /*
@@ -251,6 +253,17 @@ struct Tuplesortstate
 	 */
 	Relation	indexRel;
 	bool		enforceUnique;	/* complain if we find duplicate tuples */
+
+	/*
+	 * These variables are specific to the Datum case; they are set
+	 * by tuplesort_begin_datum and used only by the DatumTuple routines.
+	 */
+	Oid			datumType;
+	Oid			sortOperator;
+	FmgrInfo	sortOpFn;		/* cached lookup data for sortOperator */
+	/* we need typelen and byval in order to know how to copy the Datums. */
+	int			datumTypeLen;
+	bool		datumTypeByVal;
 };
 
 #define COMPARETUP(state,a,b)	((*(state)->comparetup) (state, a, b))
@@ -321,7 +334,22 @@ struct Tuplesortstate
  *--------------------
  */
 
+/*
+ * For sorting single Datums, we build "pseudo tuples" that just carry
+ * the datum's value and null flag.  For pass-by-reference data types,
+ * the actual data value appears after the DatumTupleHeader (MAXALIGNed,
+ * of course), and the value field in the header is just a pointer to it.
+ */
+
+typedef struct
+{
+	Datum		val;
+	bool		isNull;
+} DatumTuple;
+
+
 static Tuplesortstate *tuplesort_begin_common(bool randomAccess);
+static void puttuple_common(Tuplesortstate *state, void *tuple);
 static void inittapes(Tuplesortstate *state);
 static void selectnewtape(Tuplesortstate *state);
 static void mergeruns(Tuplesortstate *state);
@@ -349,6 +377,13 @@ static void writetup_index(Tuplesortstate *state, int tapenum, void *tup);
 static void *readtup_index(Tuplesortstate *state, int tapenum,
 						   unsigned int len);
 static unsigned int tuplesize_index(Tuplesortstate *state, void *tup);
+static int comparetup_datum(Tuplesortstate *state,
+							const void *a, const void *b);
+static void *copytup_datum(Tuplesortstate *state, void *tup);
+static void writetup_datum(Tuplesortstate *state, int tapenum, void *tup);
+static void *readtup_datum(Tuplesortstate *state, int tapenum,
+						   unsigned int len);
+static unsigned int tuplesize_datum(Tuplesortstate *state, void *tup);
 
 /*
  * Since qsort(3) will not pass any context info to qsort_comparetup(),
@@ -369,6 +404,7 @@ static Tuplesortstate *qsort_tuplesortstate;
  * have been supplied.  After performsort, retrieve the tuples in sorted
  * order by calling tuplesort_gettuple until it returns NULL.  (If random
  * access was requested, rescan, markpos, and restorepos can also be called.)
+ * For Datum sorts, putdatum/getdatum are used instead of puttuple/gettuple.
  * Call tuplesort_end to terminate the operation and release memory/disk space.
  */
 
@@ -444,6 +480,32 @@ tuplesort_begin_index(Relation indexRel,
 	return state;
 }
 
+Tuplesortstate *
+tuplesort_begin_datum(Oid datumType,
+					  Oid sortOperator,
+					  bool randomAccess)
+{
+	Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+	Type			typeInfo;
+
+	state->comparetup = comparetup_datum;
+	state->copytup = copytup_datum;
+	state->writetup = writetup_datum;
+	state->readtup = readtup_datum;
+	state->tuplesize = tuplesize_datum;
+
+	state->datumType = datumType;
+	state->sortOperator = sortOperator;
+	/* lookup the function that implements the sort operator */
+	fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+	/* lookup necessary attributes of the datum type */
+	typeInfo = typeidType(datumType);
+	state->datumTypeLen = typeLen(typeInfo);
+	state->datumTypeByVal = typeByVal(typeInfo);
+
+	return state;
+}
+
 /*
  * tuplesort_end
  *
@@ -476,9 +538,60 @@ tuplesort_puttuple(Tuplesortstate *state, void *tuple)
 {
 	/*
 	 * Copy the given tuple into memory we control, and decrease availMem.
+	 * Then call the code shared with the Datum case.
 	 */
 	tuple = COPYTUP(state, tuple);
 
+	puttuple_common(state, tuple);
+}
+
+/*
+ * Accept one Datum while collecting input data for sort.
+ *
+ * If the Datum is pass-by-ref type, the value will be copied.
+ */
+void
+tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
+{
+	DatumTuple	   *tuple;
+
+	/*
+	 * Build pseudo-tuple carrying the datum, and decrease availMem.
+	 */
+	if (isNull || state->datumTypeByVal)
+	{
+		USEMEM(state, sizeof(DatumTuple));
+		tuple = (DatumTuple *) palloc(sizeof(DatumTuple));
+		tuple->val = val;
+		tuple->isNull = isNull;
+	}
+	else
+	{
+		int		datalen = state->datumTypeLen;
+		int		tuplelen;
+		char   *newVal;
+
+		if (datalen == -1)		/* variable length type? */
+			datalen = VARSIZE((struct varlena *) DatumGetPointer(val));
+		tuplelen = datalen + MAXALIGN(sizeof(DatumTuple));
+		USEMEM(state, tuplelen);
+		newVal = (char *) palloc(tuplelen);
+		tuple = (DatumTuple *) newVal;
+		newVal += MAXALIGN(sizeof(DatumTuple));
+		memcpy(newVal, DatumGetPointer(val), datalen);
+		tuple->val = PointerGetDatum(newVal);
+		tuple->isNull = false;
+	}
+
+	puttuple_common(state, (void *) tuple);
+}
+
+/*
+ * Shared code for tuple and datum cases.
+ */
+static void
+puttuple_common(Tuplesortstate *state, void *tuple)
+{
 	switch (state->status)
 	{
 		case TSS_INITIAL:
@@ -753,6 +866,50 @@ tuplesort_gettuple(Tuplesortstate *state, bool forward,
 	}
 }
 
+/*
+ * Fetch the next Datum in either forward or back direction.
+ * Returns FALSE if no more datums.
+ *
+ * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
+ * and is now owned by the caller.
+ */
+bool
+tuplesort_getdatum(Tuplesortstate *state, bool forward,
+				   Datum *val, bool *isNull)
+{
+	DatumTuple	   *tuple;
+	bool			should_free;
+
+	tuple = (DatumTuple *) tuplesort_gettuple(state, forward, &should_free);
+
+	if (tuple == NULL)
+		return false;
+
+	if (tuple->isNull || state->datumTypeByVal)
+	{
+		*val = tuple->val;
+		*isNull = tuple->isNull;
+	}
+	else
+	{
+		int		datalen = state->datumTypeLen;
+		char   *newVal;
+
+		if (datalen == -1)		/* variable length type? */
+			datalen = VARSIZE((struct varlena *) DatumGetPointer(tuple->val));
+		newVal = (char *) palloc(datalen);
+		memcpy(newVal, DatumGetPointer(tuple->val), datalen);
+		*val = PointerGetDatum(newVal);
+		*isNull = false;
+	}
+
+	if (should_free)
+		pfree(tuple);
+
+	return true;
+}
+
+
 /*
  * inittapes - initialize for tape sorting.
  *
@@ -1695,3 +1852,103 @@ tuplesize_index(Tuplesortstate *state, void *tup)
 
 	return tuplen;
 }
+
+
+/*
+ * Routines specialized for DatumTuple case
+ */
+
+static int
+comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
+{
+	DatumTuple *ltup = (DatumTuple *) a;
+	DatumTuple *rtup = (DatumTuple *) b;
+
+	if (ltup->isNull)
+	{
+		if (!rtup->isNull)
+			return 1;			/* NULL sorts after non-NULL */
+		return 0;
+	}
+	else if (rtup->isNull)
+		return -1;
+	else
+	{
+		int		result;
+
+		if (!(result = - (int) (*fmgr_faddr(&state->sortOpFn)) (ltup->val,
+																rtup->val)))
+			result = (int) (*fmgr_faddr(&state->sortOpFn)) (rtup->val,
+															ltup->val);
+		return result;
+	}
+}
+
+static void *
+copytup_datum(Tuplesortstate *state, void *tup)
+{
+	/* Not currently needed */
+	elog(ERROR, "copytup_datum() should not be called");
+	return NULL;
+}
+
+static void
+writetup_datum(Tuplesortstate *state, int tapenum, void *tup)
+{
+	DatumTuple	   *tuple = (DatumTuple *) tup;
+	unsigned int	tuplen = tuplesize_datum(state, tup);
+	unsigned int	writtenlen = tuplen + sizeof(unsigned int);
+
+	LogicalTapeWrite(state->tapeset, tapenum,
+					 (void*) &writtenlen, sizeof(writtenlen));
+	LogicalTapeWrite(state->tapeset, tapenum,
+					 (void*) tuple, tuplen);
+	if (state->randomAccess)	/* need trailing length word? */
+		LogicalTapeWrite(state->tapeset, tapenum,
+						 (void*) &writtenlen, sizeof(writtenlen));
+
+	FREEMEM(state, tuplen);
+	pfree(tuple);
+}
+
+static void *
+readtup_datum(Tuplesortstate *state, int tapenum, unsigned int len)
+{
+	unsigned int	tuplen = len - sizeof(unsigned int);
+	DatumTuple	   *tuple = (DatumTuple *) palloc(tuplen);
+
+	USEMEM(state, tuplen);
+	if (LogicalTapeRead(state->tapeset, tapenum, (void *) tuple,
+						tuplen) != tuplen)
+		elog(ERROR, "tuplesort: unexpected end of data");
+	if (state->randomAccess)	/* need trailing length word? */
+		if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tuplen,
+							sizeof(tuplen)) != sizeof(tuplen))
+			elog(ERROR, "tuplesort: unexpected end of data");
+
+	if (!tuple->isNull && !state->datumTypeByVal)
+		tuple->val = PointerGetDatum(((char *) tuple) +
+									 MAXALIGN(sizeof(DatumTuple)));
+	return (void *) tuple;
+}
+
+static unsigned int
+tuplesize_datum(Tuplesortstate *state, void *tup)
+{
+	DatumTuple	   *tuple = (DatumTuple *) tup;
+
+	if (tuple->isNull || state->datumTypeByVal)
+	{
+		return (unsigned int) sizeof(DatumTuple);
+	}
+	else
+	{
+		int		datalen = state->datumTypeLen;
+		int		tuplelen;
+
+		if (datalen == -1)		/* variable length type? */
+			datalen = VARSIZE((struct varlena *) DatumGetPointer(tuple->val));
+		tuplelen = datalen + MAXALIGN(sizeof(DatumTuple));
+		return (unsigned int) tuplelen;
+	}
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 728c62b1200..62244f88a47 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -36,7 +36,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.4 1999/11/24 16:52:48 momjian Exp $
+ * $Id: catversion.h,v 1.5 1999/12/13 01:27:07 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -51,6 +51,6 @@
  * catalog changes on the same day...)
  */
 
-#define CATALOG_VERSION_NO	199911241
+#define CATALOG_VERSION_NO	199912121
 
 #endif
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 2d585bdcc5a..d3fb8f732a5 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: primnodes.h,v 1.37 1999/11/15 02:00:15 tgl Exp $
+ * $Id: primnodes.h,v 1.38 1999/12/13 01:27:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -297,10 +297,12 @@ typedef struct Iter
 /* ----------------
  * Aggref
  *		aggname			- name of the aggregate
- *		basetype		- base type Oid of the aggregate
+ *		basetype		- base type Oid of the aggregate (ie, input type)
  *		aggtype			- type Oid of final result of the aggregate
  *		target			- attribute or expression we are aggregating on
  *		usenulls		- TRUE to accept null values as inputs
+ *		aggstar			- TRUE if argument was really '*'
+ *		aggdistinct		- TRUE if arguments were labeled DISTINCT
  *		aggno			- workspace for nodeAgg.c executor
  * ----------------
  */
@@ -312,6 +314,8 @@ typedef struct Aggref
 	Oid			aggtype;
 	Node	   *target;
 	bool		usenulls;
+	bool		aggstar;
+	bool		aggdistinct;
 	int			aggno;
 } Aggref;
 
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 829bf434e78..4cd2e486aa4 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: clauses.h,v 1.31 1999/12/09 05:58:55 tgl Exp $
+ * $Id: clauses.h,v 1.32 1999/12/13 01:27:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,6 +38,7 @@ extern Expr *make_ands_explicit(List *andclauses);
 extern List *make_ands_implicit(Expr *clause);
 
 extern List *pull_constant_clauses(List *quals, List **constantQual);
+extern bool contain_agg_clause(Node *clause);
 extern List *pull_agg_clause(Node *clause);
 extern void check_subplans_for_ungrouped_vars(Node *clause,
 											  Query *query,
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 7c5a3209897..4f775f74a55 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -3,8 +3,8 @@
  * tuplesort.h
  *	  Generalized tuple sorting routines.
  *
- * This module handles sorting of either heap tuples or index tuples
- * (and could fairly easily support other kinds of sortable objects,
+ * This module handles sorting of heap tuples, index tuples, or single
+ * Datums (and could easily support other kinds of sortable objects,
  * if necessary).  It works efficiently for both small and large amounts
  * of data.  Small amounts are sorted in-memory using qsort().  Large
  * amounts are sorted using temporary files and a standard external sort
@@ -12,7 +12,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tuplesort.h,v 1.1 1999/10/17 22:15:09 tgl Exp $
+ * $Id: tuplesort.h,v 1.2 1999/12/13 01:27:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,6 +34,7 @@ typedef struct Tuplesortstate Tuplesortstate;
  * code: one for sorting HeapTuples and one for sorting IndexTuples.
  * They differ primarily in the way that the sort key information is
  * supplied.
+ * Yet a third slightly different interface supports sorting bare Datums.
  */
 
 extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
@@ -42,9 +43,15 @@ extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
 extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
 											 bool enforceUnique,
 											 bool randomAccess);
+extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
+											 Oid sortOperator,
+											 bool randomAccess);
 
 extern void tuplesort_puttuple(Tuplesortstate *state, void *tuple);
 
+extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
+							   bool isNull);
+
 extern void tuplesort_performsort(Tuplesortstate *state);
 
 extern void *tuplesort_gettuple(Tuplesortstate *state, bool forward,
@@ -54,11 +61,15 @@ extern void *tuplesort_gettuple(Tuplesortstate *state, bool forward,
 #define tuplesort_getindextuple(state, forward, should_free) \
 	((IndexTuple) tuplesort_gettuple(state, forward, should_free))
 
+extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
+							   Datum *val, bool *isNull);
+
 extern void tuplesort_end(Tuplesortstate *state);
 
 /*
  * These routines may only be called if randomAccess was specified 'true'.
- * Backwards scan in gettuple is likewise only allowed if randomAccess.
+ * Likewise, backwards scan in gettuple/getdatum is only allowed if
+ * randomAccess was specified.
  */
 
 extern void tuplesort_rescan(Tuplesortstate *state);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 84958f66937..5dac6162b59 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -76,6 +76,42 @@ cnt_1000
     1000
 (1 row)
 
+QUERY: SELECT count(DISTINCT four) AS cnt_4 FROM onek;
+cnt_4
+-----
+    4
+(1 row)
+
+QUERY: select ten, count(*), sum(four) from onek group by ten;
+ten|count|sum
+---+-----+---
+  0|  100|100
+  1|  100|200
+  2|  100|100
+  3|  100|200
+  4|  100|100
+  5|  100|200
+  6|  100|100
+  7|  100|200
+  8|  100|100
+  9|  100|200
+(10 rows)
+
+QUERY: select ten, count(four), sum(DISTINCT four) from onek group by ten;
+ten|count|sum
+---+-----+---
+  0|  100|  2
+  1|  100|  4
+  2|  100|  2
+  3|  100|  4
+  4|  100|  2
+  5|  100|  4
+  6|  100|  2
+  7|  100|  4
+  8|  100|  2
+  9|  100|  4
+(10 rows)
+
 QUERY: SELECT newavg(four) AS avg_1 FROM onek;
 avg_1
 -----
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 474ed0bb7f6..5938458a88e 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1075,9 +1075,9 @@ pg_user           |SELECT pg_shadow.usename, pg_shadow.usesysid, pg_shadow.usecr
 pg_views          |SELECT c.relname AS viewname, pg_get_userbyid(c.relowner) AS viewowner, pg_get_viewdef(c.relname) AS definition FROM pg_class c WHERE (c.relhasrules AND (EXISTS (SELECT r.rulename FROM pg_rewrite r WHERE ((r.ev_class = c.oid) AND (r.ev_type = '1'::"char")))));                                                                                                                               
 rtest_v1          |SELECT rtest_t1.a, rtest_t1.b FROM rtest_t1;                                                                                                                                                                                                                                                                                                                                                       
 rtest_vcomp       |SELECT x.part, (x.size * y.factor) AS size_in_cm FROM rtest_comp x, rtest_unitfact y WHERE (x.unit = y.unit);                                                                                                                                                                                                                                                                                      
-rtest_vview1      |SELECT x.a, x.b FROM rtest_view1 x WHERE (0 < (SELECT count(1) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                    
+rtest_vview1      |SELECT x.a, x.b FROM rtest_view1 x WHERE (0 < (SELECT count(*) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                    
 rtest_vview2      |SELECT rtest_view1.a, rtest_view1.b FROM rtest_view1 WHERE rtest_view1.v;                                                                                                                                                                                                                                                                                                                          
-rtest_vview3      |SELECT x.a, x.b FROM rtest_vview2 x WHERE (0 < (SELECT count(1) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                   
+rtest_vview3      |SELECT x.a, x.b FROM rtest_vview2 x WHERE (0 < (SELECT count(*) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                   
 rtest_vview4      |SELECT x.a, x.b, count(y.a) AS refcount FROM rtest_view1 x, rtest_view2 y WHERE (x.a = y.a) GROUP BY x.a, x.b;                                                                                                                                                                                                                                                                                     
 rtest_vview5      |SELECT rtest_view1.a, rtest_view1.b, rtest_viewfunc1(rtest_view1.a) AS refcount FROM rtest_view1;                                                                                                                                                                                                                                                                                                  
 shoe              |SELECT sh.shoename, sh.sh_avail, sh.slcolor, sh.slminlen, (sh.slminlen * un.un_fact) AS slminlen_cm, sh.slmaxlen, (sh.slmaxlen * un.un_fact) AS slmaxlen_cm, sh.slunit FROM shoe_data sh, unit un WHERE (sh.slunit = un.un_name);                                                                                                                                                                  
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 1fc09968072..03ea7de2bbc 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -30,6 +30,12 @@ SELECT max(student.gpa) AS max_3_7 FROM student;
 
 SELECT count(four) AS cnt_1000 FROM onek;
 
+SELECT count(DISTINCT four) AS cnt_4 FROM onek;
+
+select ten, count(*), sum(four) from onek group by ten;
+
+select ten, count(four), sum(DISTINCT four) from onek group by ten;
+
 
 SELECT newavg(four) AS avg_1 FROM onek;
 
-- 
GitLab