From 3379fae6de5994b242cedfa48cf613ecfee3db24 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter_e@gmx.net>
Date: Thu, 13 Nov 2008 15:59:51 +0000
Subject: [PATCH] array_agg aggregate function, as per SQL:2008, but without
 ORDER BY clause

Rearrange the documentation a bit now that array_agg and xmlagg have similar
semantics and issues.

best of Robert Haas, Jeff Davis, Peter Eisentraut
---
 doc/src/sgml/func.sgml                  | 85 ++++++++++++++++++++-----
 src/backend/catalog/sql_features.txt    |  2 +-
 src/backend/utils/adt/array_userfuncs.c | 34 +++++++++-
 src/include/catalog/catversion.h        |  4 +-
 src/include/catalog/pg_aggregate.h      |  5 +-
 src/include/catalog/pg_proc.h           |  9 ++-
 src/include/utils/array.h               |  5 +-
 src/test/regress/expected/arrays.out    | 36 +++++++++++
 src/test/regress/sql/arrays.sql         |  7 ++
 9 files changed, 164 insertions(+), 23 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 85403e2c9f7..20443f2b288 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.457 2008/11/12 13:09:27 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.458 2008/11/13 15:59:50 petere Exp $ -->
 
  <chapter id="functions">
   <title>Functions and Operators</title>
@@ -8353,7 +8353,7 @@ SELECT xmlroot(xmlparse(document '<?xml version="1.1"?><content>abc</content>'),
     </para>
    </sect3>
 
-   <sect3>
+   <sect3 id="functions-xml-xmlagg">
     <title><literal>xmlagg</literal></title>
 
     <indexterm>
@@ -8366,10 +8366,10 @@ SELECT xmlroot(xmlparse(document '<?xml version="1.1"?><content>abc</content>'),
 
     <para>
      The function <function>xmlagg</function> is, unlike the other
-     functions below, an aggregate function.  It concatenates the
+     functions described here, an aggregate function.  It concatenates the
      input values to the aggregate function call,
      like <function>xmlconcat</function> does.
-     See <xref linkend="functions-aggregate"> for general information
+     See <xref linkend="functions-aggregate"> for additional information
      about aggregate functions.
     </para>
 
@@ -8387,11 +8387,9 @@ SELECT xmlagg(x) FROM test;
     </para>
 
     <para>
-     Note that in the current implementation, the order of the
-     concatenation is in principle undefined.  Making the input values
-     to be sorted in some other way will usually work, however.  For
-     instance, in the above example, one could influence the order
-     like so:
+     The influence the order of the concatenation, something like the
+     following approach to sort the input values can be used:
+
 <screen><![CDATA[
 SELECT xmlagg(x) FROM (SELECT * FROM test ORDER BY y DESC) AS tab;
         xmlagg
@@ -8399,11 +8397,8 @@ SELECT xmlagg(x) FROM (SELECT * FROM test ORDER BY y DESC) AS tab;
  <bar/><foo>abc</foo>
 ]]></screen>
 
-     But this approach is not guaranteed to work in all situations and
-     in all versions of PostgreSQL.  A future version of PostgreSQL
-     will probably provide an additional feature to control the order
-     in a proper way (<literal>xmlagg(expr ORDER BY expr, expr,
-     ...</literal>).
+     Again, see <xref linkend="functions-aggregate"> for additional
+     information.
     </para>
    </sect3>
 
@@ -9490,6 +9485,11 @@ SELECT NULLIF(value, '(none)') ...
       </tbody>
      </tgroup>
     </table>
+
+   <para>
+    See also <xref linkend="functions-aggregate"> about the aggregate
+    function <function>array_agg</function> for use with arrays.
+   </para>
   </sect1>
 
  <sect1 id="functions-aggregate">
@@ -9526,6 +9526,22 @@ SELECT NULLIF(value, '(none)') ...
     </thead>
 
     <tbody>
+     <row>
+      <entry>
+       <indexterm>
+        <primary>array_agg</primary>
+       </indexterm>
+       <function>array_agg(<replaceable class="parameter">expression</replaceable>)</function>
+      </entry>
+      <entry>
+       any
+      </entry>
+      <entry>
+       array of the argument type
+      </entry>
+      <entry>input values concatenated into an array</entry>
+     </row>
+
      <row>
       <entry>
        <indexterm>
@@ -9684,6 +9700,22 @@ SELECT NULLIF(value, '(none)') ...
       </entry>
       <entry>sum of <replaceable class="parameter">expression</replaceable> across all input values</entry>
      </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>xmlagg</primary>
+       </indexterm>
+       <function>xmlagg(<replaceable class="parameter">expression</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>xml</type>
+      </entry>
+      <entry>
+       <type>xml</type>
+      </entry>
+      <entry>concatenation of XML values (see also <xref linkend="functions-xml-xmlagg">)</entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
@@ -9692,8 +9724,10 @@ SELECT NULLIF(value, '(none)') ...
    It should be noted that except for <function>count</function>,
    these functions return a null value when no rows are selected.  In
    particular, <function>sum</function> of no rows returns null, not
-   zero as one might expect.  The <function>coalesce</function> function can be
-   used to substitute zero for null when necessary.
+   zero as one might expect, and <function>array_agg</function>
+   returns null rather than an empty array when there are no input
+   rows.  The <function>coalesce</function> function can be used to
+   substitute zero or an empty array for null when necessary.
   </para>
 
   <note>
@@ -9733,6 +9767,25 @@ SELECT count(*) FROM sometable;
    </para>
   </note>
 
+  <para>
+   The aggregate functions <function>array_agg</function>
+   and <function>xmlagg</function>, as well as similar user-defined
+   aggregate functions, produce meaningfully different result values
+   depending on the order of the input values.  In the current
+   implementation, the order of the concatenation is in principle
+   undefined.  Making the input values to be sorted in some other way
+   will usually work, however.  For example:
+
+<screen><![CDATA[
+SELECT xmlagg(x) FROM (SELECT * FROM test ORDER BY y DESC) AS tab;
+]]></screen>
+
+   But this approach is not guaranteed to work in all situations, and
+   it is not strictly SQL-conforming.  A future version of PostgreSQL
+   might provide an additional feature to control the order in a
+   better-defined way (<literal>xmlagg(expr ORDER BY expr, expr,
+   ...</literal>).
+  </para>
 
   <para>
    <xref linkend="functions-aggregate-statistics-table"> shows
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt
index e28066dc687..354fd98811b 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -350,7 +350,7 @@ S094	Arrays of reference types			NO
 S095	Array constructors by query			YES	
 S096	Optional array bounds			YES	
 S097	Array element assignment			NO	
-S098	ARRAY_AGG			NO	
+S098	ARRAY_AGG			NO	ORDER BY clause not supported
 S111	ONLY in query expressions			YES	
 S151	Type predicate			NO	
 S161	Subtype treatment			NO	
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index be2068ac5dc..4eeb64dbd81 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -6,12 +6,13 @@
  * Copyright (c) 2003-2008, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/array_userfuncs.c,v 1.23 2008/01/01 19:45:52 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/array_userfuncs.c,v 1.24 2008/11/13 15:59:50 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
+#include "nodes/execnodes.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -465,3 +466,34 @@ create_singleton_array(FunctionCallInfo fcinfo,
 	return construct_md_array(dvalues, NULL, ndims, dims, lbs, element_type,
 							  typlen, typbyval, typalign);
 }
+
+Datum
+array_agg_transfn(PG_FUNCTION_ARGS)
+{
+	Oid arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+
+	if (arg1_typeid == InvalidOid)
+		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						errmsg("could not determine input data type")));
+
+	/* cannot be called directly because of internal-type argument */
+	Assert(fcinfo->context && IsA(fcinfo->context, AggState));
+
+	PG_RETURN_POINTER(accumArrayResult(PG_ARGISNULL(0) ? NULL : (ArrayBuildState *) PG_GETARG_POINTER(0),
+									   PG_ARGISNULL(1) ? (Datum) 0 : PG_GETARG_DATUM(1),
+									   PG_ARGISNULL(1),
+									   arg1_typeid,
+									   ((AggState *) fcinfo->context)->aggcontext));
+}
+
+Datum
+array_agg_finalfn(PG_FUNCTION_ARGS)
+{
+	/* cannot be called directly because of internal-type argument */
+	Assert(fcinfo->context && IsA(fcinfo->context, AggState));
+
+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();   /* returns null iff no input values */
+
+	PG_RETURN_ARRAYTYPE_P(makeArrayResult((ArrayBuildState *) PG_GETARG_POINTER(0), CurrentMemoryContext));
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 3e7e52b57f0..1f3d9c6485c 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.504 2008/11/12 13:09:27 petere Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.505 2008/11/13 15:59:50 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200811121
+#define CATALOG_VERSION_NO	200811131
 
 #endif
diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h
index a8b4ac2f937..350f4968fcc 100644
--- a/src/include/catalog/pg_aggregate.h
+++ b/src/include/catalog/pg_aggregate.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.66 2008/03/27 03:57:34 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.67 2008/11/13 15:59:50 petere Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -220,6 +220,9 @@ DATA(insert ( 2243 bitor		  -					0	1560	_null_ ));
 /* xml */
 DATA(insert ( 2901 xmlconcat2	  -					0	142		_null_ ));
 
+/* array */
+DATA(insert ( 2335	array_agg_transfn	array_agg_finalfn		0	2281	_null_ ));
+
 /*
  * prototypes for functions in pg_aggregate.c
  */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 0e83606f926..5c01d1b3701 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.526 2008/11/12 13:38:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.527 2008/11/13 15:59:50 petere Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -1022,6 +1022,13 @@ DATA(insert OID = 1193 (  array_fill PGNSP PGUID 12 1 0 0 f f f f i 2 2277 "2283
 DESCR("array constructor with value");
 DATA(insert OID = 1286 (  array_fill PGNSP PGUID 12 1 0 0 f f f f i 3 2277 "2283 1007 1007" _null_ _null_ _null_ array_fill_with_lower_bounds _null_ _null_ _null_ ));
 DESCR("array constructor with value");
+DATA(insert OID = 2333 (  array_agg_transfn   PGNSP PGUID 12 1 0 0 f f f f i 2 2281 "2281 2283" _null_ _null_ _null_ array_agg_transfn _null_ _null_ _null_ ));
+DESCR("array_agg transition function");
+DATA(insert OID = 2334 (  array_agg_finalfn   PGNSP PGUID 12 1 0 0 f f f f i 1 2277 "2281" _null_ _null_ _null_ array_agg_finalfn _null_ _null_ _null_ ));
+DESCR("array_agg final function");
+DATA(insert OID = 2335 (  array_agg        PGNSP PGUID 12 1 0 0 t f f f i 1 2277 "2283" _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+DESCR("concatenate aggregate input into an array");
+
 DATA(insert OID = 760 (  smgrin			   PGNSP PGUID 12 1 0 0 f f t f s 1 210 "2275" _null_ _null_ _null_  smgrin _null_ _null_ _null_ ));
 DESCR("I/O");
 DATA(insert OID = 761 (  smgrout		   PGNSP PGUID 12 1 0 0 f f t f s 1 2275 "210" _null_ _null_ _null_  smgrout _null_ _null_ _null_ ));
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 33d9ad3207e..8b6ef08b276 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/array.h,v 1.70 2008/11/12 13:09:28 petere Exp $
+ * $PostgreSQL: pgsql/src/include/utils/array.h,v 1.71 2008/11/13 15:59:50 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -275,4 +275,7 @@ extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo,
 					   Datum element,
 					   int ndims);
 
+extern Datum array_agg_transfn(PG_FUNCTION_ARGS);
+extern Datum array_agg_finalfn(PG_FUNCTION_ARGS);
+
 #endif   /* ARRAY_H */
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 804d52b7982..1e990aff732 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1125,3 +1125,39 @@ select c, cardinality(c), d, cardinality(d) from arrtest;
  {foo,new_word}    |           2 | {bar,foo}     |           2
 (3 rows)
 
+select array_agg(unique1) from tenk1 where unique1 < 15;
+              array_agg               
+--------------------------------------
+ {4,2,1,6,14,9,8,5,3,13,12,11,7,10,0}
+(1 row)
+
+select array_agg(ten) from tenk1 where unique1 < 15;
+            array_agg            
+---------------------------------
+ {4,2,1,6,4,9,8,5,3,3,2,1,7,0,0}
+(1 row)
+
+select array_agg(nullif(ten, 4)) from tenk1 where unique1 < 15;
+               array_agg               
+---------------------------------------
+ {NULL,2,1,6,NULL,9,8,5,3,3,2,1,7,0,0}
+(1 row)
+
+select cardinality(array_agg(unique1)) from tenk1 where unique1 < 15;
+ cardinality 
+-------------
+          15
+(1 row)
+
+select array_agg(unique1) from (select * from tenk1 order by unique1 asc) as tab where unique1 < 15;
+              array_agg               
+--------------------------------------
+ {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14}
+(1 row)
+
+select array_agg(unique1) from tenk1 where unique1 < -15;
+ array_agg 
+-----------
+ 
+(1 row)
+
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 04b19a4acea..586f65c2dd6 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -395,3 +395,10 @@ select array_length(array[[1,2,3], [4,5,6]], 3);
 select cardinality(array[1,2,3]);
 select cardinality(array[[1,2,3], [4,5,6]]);
 select c, cardinality(c), d, cardinality(d) from arrtest;
+
+select array_agg(unique1) from tenk1 where unique1 < 15;
+select array_agg(ten) from tenk1 where unique1 < 15;
+select array_agg(nullif(ten, 4)) from tenk1 where unique1 < 15;
+select cardinality(array_agg(unique1)) from tenk1 where unique1 < 15;
+select array_agg(unique1) from (select * from tenk1 order by unique1 asc) as tab where unique1 < 15;
+select array_agg(unique1) from tenk1 where unique1 < -15;
-- 
GitLab