From 9d5c0af5862cc0141ea16ca90a9e01250716af37 Mon Sep 17 00:00:00 2001
From: "Marc G. Fournier" <scrappy@hub.org>
Date: Thu, 3 Apr 1997 19:56:47 +0000
Subject: [PATCH] From: Thomas Lockhart <Thomas.G.Lockhart@jpl.nasa.gov>
 Subject: [HACKERS] Aggregate function patches

Here are the aggregate function patches I originally sent in last December.
They fix sum() and avg() behavior for ints and floats when NULL values are
involved.

I was waiting to resubmit these until I had a chance to write a v6.0->v6.1
database upgrade script to ensure that existing v6.0 databases which have
not been reloaded for v6.1 do no break with the new aggregate behavior.
These scripts are included below. It's OK with me if someone wants to do
something different with the upgrade strategy, but something like this
was discussed a few weeks ago.

Also, there were a couple of small items which cropped up in doing a clean
install of 970403 (actually 970402 + 970403 changes since the full 970403
tar file appears to be damaged or at least suspect). They are the first
two patches below and can be omitted if desired (although I think they
aren't dangerous :).
---
 src/backend/executor/nodeAgg.c         |  7 +++-
 src/backend/optimizer/geqo/geqo_eval.c |  6 ++-
 src/backend/tcop/aclchk.c              |  4 +-
 src/include/catalog/pg_aggregate.h     | 28 ++++++-------
 src/update6_0-6_1.sh                   | 56 ++++++++++++++++++++++++++
 src/update6_0-6_1.sql                  | 47 +++++++++++++++++++++
 6 files changed, 127 insertions(+), 21 deletions(-)
 create mode 100644 src/update6_0-6_1.sh
 create mode 100644 src/update6_0-6_1.sql

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 4f1a5902f0a..c6e5b269cde 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -368,7 +368,12 @@ ExecAgg(Agg *node)
 	char *args[2];
 	AggFuncInfo *aggfns = &aggFuncInfo[i];
 	
- 	if (aggfns->finalfn && nTuplesAgged > 0) { 
+	if (noInitValue[i]) {
+	    /*
+	     * No values found for this agg; return current state.
+	     * This seems to fix behavior for avg() aggregate. -tgl 12/96
+	     */
+ 	} else if (aggfns->finalfn && nTuplesAgged > 0) { 
 	    if (aggfns->finalfn_nargs > 1) {
 		args[0] = (char*)value1[i];
 		args[1] = (char*)value2[i];
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 72a10ee9dfc..cec12ae0ea5 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: geqo_eval.c,v 1.6 1997/03/03 23:26:45 scrappy Exp $
+ * $Id: geqo_eval.c,v 1.7 1997/04/03 19:55:35 scrappy Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,7 +23,9 @@
 #include <math.h>
 #ifdef HAVE_LIMITS_H
 # include <limits.h>
-# define MAXINT	INT_MAX
+# ifndef MAXINT
+#  define MAXINT INT_MAX
+# endif
 #else
 # include <values.h>
 #endif 
diff --git a/src/backend/tcop/aclchk.c b/src/backend/tcop/aclchk.c
index 20748e16c6a..7ca6bdd33b8 100644
--- a/src/backend/tcop/aclchk.c
+++ b/src/backend/tcop/aclchk.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *    $Header: /cvsroot/pgsql/src/backend/tcop/Attic/aclchk.c,v 1.7 1997/03/12 20:48:17 scrappy Exp $
+ *    $Header: /cvsroot/pgsql/src/backend/tcop/Attic/aclchk.c,v 1.8 1997/04/03 19:55:12 scrappy Exp $
  *
  * NOTES
  *    See acl.h.
@@ -277,7 +277,7 @@ aclcheck(Acl *acl, AclId id, AclIdType idtype, AclMode mode)
      * the system never creates an empty ACL.
      */
     if (num < 1) {
-#ifdef ACLDEBUG_TRACE || 1
+#if ACLDEBUG_TRACE || 1
 	elog(DEBUG, "aclcheck: zero-length ACL, returning 1");
 #endif
 	return ACLCHECK_OK;
diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h
index b2d9b4afb99..dcf7a79df63 100644
--- a/src/include/catalog/pg_aggregate.h
+++ b/src/include/catalog/pg_aggregate.h
@@ -7,7 +7,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_aggregate.h,v 1.4 1997/04/02 18:36:09 scrappy Exp $
+ * $Id: pg_aggregate.h,v 1.5 1997/04/03 19:56:19 scrappy Exp $
  *
  * NOTES
  *    the genbki.sh script reads this file and generates .bki
@@ -19,7 +19,7 @@
 #define PG_AGGREGATE_H
 
 /* ----------------
- *	postgres.h contains the system type definintions and the
+ *	postgres.h contains the system type definitions and the
  *	CATALOG(), BOOTSTRAP and DATA() sugar words so this file
  *	can be read by both genbki.sh and the C compiler.
  * ----------------
@@ -87,15 +87,15 @@ typedef FormData_pg_aggregate	*Form_pg_aggregate;
  * ---------------
  */
 
-DATA(insert OID = 0 ( avg   PGUID int4pl   int4inc   int4div  23  23  23  23 0  0 ));
-DATA(insert OID = 0 ( avg   PGUID int2pl   int2inc   int2div  21  21  21  21  0  0 ));
-DATA(insert OID = 0 ( avg   PGUID float4pl float4inc float4div  700  700  700  700 0.0 0.0 ));
-DATA(insert OID = 0 ( avg   PGUID float8pl float8inc float8div  701  701  701  701 0.0 0.0 ));
+DATA(insert OID = 0 ( avg   PGUID int4pl   int4inc   int4div     23   23   23   23 _null_ 0 ));
+DATA(insert OID = 0 ( avg   PGUID int2pl   int2inc   int2div     21   21   21   21 _null_ 0 ));
+DATA(insert OID = 0 ( avg   PGUID float4pl float4inc float4div  700  700  700  700 _null_ 0.0 ));
+DATA(insert OID = 0 ( avg   PGUID float8pl float8inc float8div  701  701  701  701 _null_ 0.0 ));
 
-DATA(insert OID = 0 ( sum   PGUID int4pl   - -   23  23  0  23  0   _null_ ));
-DATA(insert OID = 0 ( sum   PGUID int2pl   - -   21  21  0  21  0   _null_ ));
-DATA(insert OID = 0 ( sum   PGUID float4pl - -  700  700 0  700 0.0 _null_ ));
-DATA(insert OID = 0 ( sum   PGUID float8pl - -  701  701 0  701 0.0 _null_ ));
+DATA(insert OID = 0 ( sum   PGUID int4pl        - -   23   23 0   23 _null_ _null_ ));
+DATA(insert OID = 0 ( sum   PGUID int2pl        - -   21   21 0   21 _null_ _null_ ));
+DATA(insert OID = 0 ( sum   PGUID float4pl      - -  700  700 0  700 _null_ _null_ ));
+DATA(insert OID = 0 ( sum   PGUID float8pl      - -  701  701 0  701 _null_ _null_ ));
 
 DATA(insert OID = 0 ( max   PGUID int4larger    - -   23   23 0   23 _null_ _null_ ));
 DATA(insert OID = 0 ( max   PGUID int2larger    - -   21   21 0   21 _null_ _null_ ));
@@ -113,10 +113,10 @@ DATA(insert OID = 0 ( min   PGUID int4smaller   - -  702  702 0  702 _null_ _nul
 DATA(insert OID = 0 ( min   PGUID date_smaller  - - 1082 1082 0 1082 _null_ _null_ ));
 DATA(insert OID = 0 ( min   PGUID float8smaller - - 1084 1084 0 1084 _null_ _null_ ));
 
-DATA(insert OID = 0 ( count PGUID - int4inc - 0 0 23 23  _null_ 0 ));
+DATA(insert OID = 0 ( count PGUID - int4inc - 0 0 23 23 _null_ 0 ));
 
 /*
- * prototypes for fucnctions in pg_aggregate.c
+ * prototypes for functions in pg_aggregate.c
  */
 extern void AggregateCreate(char *aggName, 
 			    char *aggtransfn1Name,
@@ -131,7 +131,3 @@ extern char *AggNameGetInitVal(char *aggName, Oid basetype,
 			       int xfuncno, bool *isNull);
 
 #endif /* PG_AGGREGATE_H */
-
-
-
-
diff --git a/src/update6_0-6_1.sh b/src/update6_0-6_1.sh
new file mode 100644
index 00000000000..44b3c9aa7b9
--- /dev/null
+++ b/src/update6_0-6_1.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# update
+# Script to apply patches to existing databases
+#  to upgrade from Postgres v6.0 to v6.1.
+echo ""
+echo "This utility does a minimal upgrade for existing v6.0 databases."
+echo "Note that several new features and functions in Postgres"
+echo " will not be available unless the databases are reloaded"
+echo " from a clean install of v6.1."
+echo ""
+echo "This update script is not necessary for new or reloaded databases,"
+echo " but will not damage them. You should update existing v6.1beta"
+echo " databases created on or before 1997-04-04 (when the patches for"
+echo " aggregate functions were applied to the v6.1beta source tree)."
+echo ""
+echo "Features present with this simple update include:"
+echo " - aggregate operators sum() and avg() behave correctly with NULLs"
+echo " - the point distance operator '<===>' returns float8 rather than int4"
+echo " - some duplicate function OIDs are renumbered to eliminate conflicts"
+echo ""
+echo "Features unavailable with only this simple update include:"
+echo " - new string handling functions a la Oracle/Ingres"
+echo " - new date and time data types and expanded functionality"
+echo " - some new function overloading to simplify function names"
+echo ""
+echo "Note that if v6.0 databases are not reloaded from a clean install of v6.1"
+echo " or if this update is not applied to existing v6.0 databases:"
+echo " - aggregate functions avg() and sum() may divide-by-zero for int4 data types"
+#
+srcdir=`pwd`
+srcsql="update6_0-6_1.sql"
+CMDSQL="psql"
+SRCSQL="$srcdir/$srcsql"
+#
+if [ -z $SRCSQL ]; then
+	echo "unable to locate $SRCSQL"
+	exit 1
+fi
+#
+echo ""
+echo "updating databases found in $PGDATA/base"
+echo ""
+#
+cd $PGDATA/base
+for d in *
+do
+	echo "updating $d at `date` ..."
+	echo "try $CMDSQL $d < $SRCSQL"
+	$CMDSQL $d < $SRCSQL
+	echo "completed updating $d at `date`"
+done
+#
+echo ""
+echo "completed all updates at `date`"
+echo ""
+exit
diff --git a/src/update6_0-6_1.sql b/src/update6_0-6_1.sql
new file mode 100644
index 00000000000..e60e8d93cbe
--- /dev/null
+++ b/src/update6_0-6_1.sql
@@ -0,0 +1,47 @@
+-- Aggregate functions
+-- Thomas Lockhart
+-- This fixes the behavior of built-in aggregate functions avg() and sum().
+-- Code tested on postgres95 v1.0.9, postgres v6.0, and postgres v6.1b-970315.
+-- Original definitions return zero rather than null for empty set attributes.
+-- Postgres source code says that null behavior for aggregates is not correct,
+--  but does describe the correct behavior for pass-by-reference data types
+--  if it is given null initial values (from pg_aggregate).
+-- Note that pass-by-value data types (e.g. int4) require a simple source code
+--  change in backend/executor/nodeAgg.c to avoid divide-by-zero results.
+-- If this SQL update is applied without making the corresponding source code
+--  patch, then floating point types will work correctly but integer types will
+--  divide-by-zero.
+-- If the source code patch is applied but this SQL update is not, then there
+--  will be divide-by-zero results for floating point types.
+
+-- For aggregate attributes, the correct behavior is as follows:
+-- count(*) should return a count of all tuples, null or otherwise
+-- count(col) should return a count of all non-null values, zero if none
+-- avg(col), sum(col), etc should ignore null fields and return null if there
+--  are no non-null inputs
+-- Ref: the original Date book
+
+update pg_aggregate set agginitval1=null
+ where aggname = 'avg' or aggname = 'sum';
+
+-- Geometric functions
+-- Thomas Lockhart
+-- This replaces the distance operator with one returning a floating point number.
+-- The original operator 'pointdist' returned an integer.
+-- There is no corresponding source code change required for this patch.
+
+update pg_operator set oprresult = 701, oprcode = 'point_distance'::regproc
+ where oprname = '<===>' and oprresult = 23;
+
+-- Date functions
+-- Thomas Lockhart
+-- This fixes conflicting OIDs within the date and time declarations.
+
+update pg_proc set oid = 1138::oid where proname = 'date_larger';
+update pg_proc set oid = 1139::oid where proname = 'date_smaller';
+update pg_proc set oid = 1140::oid where proname = 'date_mi';
+update pg_proc set oid = 1141::oid where proname = 'date_pli';
+update pg_proc set oid = 1142::oid where proname = 'date_mii';
+update pg_proc set oid = 1143::oid where proname = 'timein';
+update pg_proc set oid = 1144::oid where proname = 'timeout';
+update pg_proc set oid = 1145::oid where proname = 'time_eq';
-- 
GitLab