From 7af3a6fc6f9e2486df37eeac0d61addba09a3551 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 6 Jul 2007 04:16:00 +0000
Subject: [PATCH] Fix up hash functions for datetime datatypes so that they
 don't take unwarranted liberties with int8 vs float8 values for these types.
 Specifically, be sure to apply either hashint8 or hashfloat8 depending on
 HAVE_INT64_TIMESTAMP.  Per my gripe of even date.

---
 src/backend/utils/adt/date.c             | 34 +++++++++++++++-----
 src/backend/utils/adt/timestamp.c        | 40 ++++++++++++++++++------
 src/include/catalog/catversion.h         |  4 +--
 src/include/catalog/pg_amproc.h          |  8 ++---
 src/include/catalog/pg_proc.h            |  6 +++-
 src/include/utils/date.h                 |  3 +-
 src/include/utils/timestamp.h            |  3 +-
 src/test/regress/expected/opr_sanity.out | 26 +++++++++------
 src/test/regress/sql/opr_sanity.sql      | 12 +++----
 9 files changed, 95 insertions(+), 41 deletions(-)

diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index a66680f4772..efc965ae4c3 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/date.c,v 1.133 2007/06/15 20:56:50 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/date.c,v 1.134 2007/07/06 04:15:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1197,6 +1197,17 @@ time_cmp(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(0);
 }
 
+Datum
+time_hash(PG_FUNCTION_ARGS)
+{
+	/* We can use either hashint8 or hashfloat8 directly */
+#ifdef HAVE_INT64_TIMESTAMP
+	return hashint8(fcinfo);
+#else
+	return hashfloat8(fcinfo);
+#endif
+}
+
 Datum
 time_larger(PG_FUNCTION_ARGS)
 {
@@ -1960,20 +1971,27 @@ timetz_cmp(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(timetz_cmp_internal(time1, time2));
 }
 
-/*
- * timetz, being an unusual size, needs a specialized hash function.
- */
 Datum
 timetz_hash(PG_FUNCTION_ARGS)
 {
 	TimeTzADT  *key = PG_GETARG_TIMETZADT_P(0);
+	uint32		thash;
 
 	/*
-	 * Specify hash length as sizeof(double) + sizeof(int4), not as
-	 * sizeof(TimeTzADT), so that any garbage pad bytes in the structure won't
-	 * be included in the hash!
+	 * To avoid any problems with padding bytes in the struct,
+	 * we figure the field hashes separately and XOR them.  This also
+	 * provides a convenient framework for dealing with the fact that
+	 * the time field might be either double or int64.
 	 */
-	return hash_any((unsigned char *) key, sizeof(key->time) + sizeof(key->zone));
+#ifdef HAVE_INT64_TIMESTAMP
+	thash = DatumGetUInt32(DirectFunctionCall1(hashint8,
+											   Int64GetDatumFast(key->time)));
+#else
+	thash = DatumGetUInt32(DirectFunctionCall1(hashfloat8,
+											   Float8GetDatumFast(key->time)));
+#endif
+	thash ^= DatumGetUInt32(hash_uint32(key->zone));
+	PG_RETURN_UINT32(thash);
 }
 
 Datum
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index d2019351168..04eaa81140d 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/timestamp.c,v 1.178 2007/06/15 20:56:50 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/timestamp.c,v 1.179 2007/07/06 04:15:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1839,6 +1839,17 @@ timestamp_cmp(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(timestamp_cmp_internal(dt1, dt2));
 }
 
+Datum
+timestamp_hash(PG_FUNCTION_ARGS)
+{
+	/* We can use either hashint8 or hashfloat8 directly */
+#ifdef HAVE_INT64_TIMESTAMP
+	return hashint8(fcinfo);
+#else
+	return hashfloat8(fcinfo);
+#endif
+}
+
 
 /*
  * Crosstype comparison functions for timestamp vs timestamptz
@@ -2110,21 +2121,32 @@ interval_cmp(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(interval_cmp_internal(interval1, interval2));
 }
 
-/*
- * interval, being an unusual size, needs a specialized hash function.
- */
 Datum
 interval_hash(PG_FUNCTION_ARGS)
 {
 	Interval   *key = PG_GETARG_INTERVAL_P(0);
+	uint32		thash;
+	uint32		mhash;
 
 	/*
-	 * Specify hash length as sizeof(double) + sizeof(int4), not as
-	 * sizeof(Interval), so that any garbage pad bytes in the structure won't
-	 * be included in the hash!
+	 * To avoid any problems with padding bytes in the struct,
+	 * we figure the field hashes separately and XOR them.  This also
+	 * provides a convenient framework for dealing with the fact that
+	 * the time field might be either double or int64.
 	 */
-	return hash_any((unsigned char *) key,
-				  sizeof(key->time) + sizeof(key->day) + sizeof(key->month));
+#ifdef HAVE_INT64_TIMESTAMP
+	thash = DatumGetUInt32(DirectFunctionCall1(hashint8,
+											   Int64GetDatumFast(key->time)));
+#else
+	thash = DatumGetUInt32(DirectFunctionCall1(hashfloat8,
+											   Float8GetDatumFast(key->time)));
+#endif
+	thash ^= DatumGetUInt32(hash_uint32(key->day));
+	/* Shift so "k days" and "k months" don't hash to the same thing */
+	mhash = DatumGetUInt32(hash_uint32(key->month));
+	thash ^= mhash << 24;
+	thash ^= mhash >> 8;
+	PG_RETURN_UINT32(thash);
 }
 
 /* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 2975721590a..e85988be2ac 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.413 2007/06/28 00:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.414 2007/07/06 04:15:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200706271
+#define CATALOG_VERSION_NO	200707051
 
 #endif
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 2645e89d663..c31a09f2243 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -22,7 +22,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.65 2007/05/08 18:56:47 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.66 2007/07/06 04:15:59 tgl Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -147,11 +147,11 @@ DATA(insert (	1987   19 19 1 455 ));
 DATA(insert (	1990   26 26 1 453 ));
 DATA(insert (	1992   30 30 1 457 ));
 DATA(insert (	1995   25 25 1 400 ));
-DATA(insert (	1997   1083 1083 1 452 ));
+DATA(insert (	1997   1083 1083 1 1688 ));
 DATA(insert (	1998   1700 1700 1 432 ));
-DATA(insert (	1999   1184 1184 1 452 ));
+DATA(insert (	1999   1184 1184 1 2039 ));
 DATA(insert (	2001   1266 1266 1 1696 ));
-DATA(insert (	2040   1114 1114 1 452 ));
+DATA(insert (	2040   1114 1114 1 2039 ));
 DATA(insert (	2222   16 16 1 454 ));
 DATA(insert (	2223   17 17 1 456 ));
 DATA(insert (	2224   22 22 1 398 ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index ecd1f118c65..024be90487b 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.461 2007/06/28 00:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.462 2007/07/06 04:15:59 tgl Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -2474,6 +2474,8 @@ DESCR("greater-than-or-equal");
 DATA(insert OID = 1693 (  btboolcmp			PGNSP PGUID 12 1 0 f f t f i 2 23 "16 16" _null_ _null_ _null_	btboolcmp - _null_ ));
 DESCR("btree less-equal-greater");
 
+DATA(insert OID = 1688 (  time_hash			PGNSP PGUID 12 1 0 f f t f i 1 23 "1083" _null_ _null_ _null_ time_hash - _null_ ));
+DESCR("hash");
 DATA(insert OID = 1696 (  timetz_hash		PGNSP PGUID 12 1 0 f f t f i 1 23 "1266" _null_ _null_ _null_ timetz_hash - _null_ ));
 DESCR("hash");
 DATA(insert OID = 1697 (  interval_hash		PGNSP PGUID 12 1 0 f f t f i 1 23 "1186" _null_ _null_ _null_ interval_hash - _null_ ));
@@ -3043,6 +3045,8 @@ DATA(insert OID = 2037 (  timezone			PGNSP PGUID 12 1 0 f f t f v 2 1266 "25 126
 DESCR("adjust time with time zone to new zone");
 DATA(insert OID = 2038 (  timezone			PGNSP PGUID 12 1 0 f f t f i 2 1266 "1186 1266" _null_ _null_ _null_	timetz_izone - _null_ ));
 DESCR("adjust time with time zone to new zone");
+DATA(insert OID = 2039 (  timestamp_hash	PGNSP PGUID 12 1 0 f f t f i 1	23 "1114" _null_ _null_ _null_ timestamp_hash - _null_ ));
+DESCR("hash");
 DATA(insert OID = 2041 ( overlaps			PGNSP PGUID 12 1 0 f f f f i 4 16 "1114 1114 1114 1114" _null_ _null_ _null_	overlaps_timestamp - _null_ ));
 DESCR("SQL92 interval comparison");
 DATA(insert OID = 2042 ( overlaps			PGNSP PGUID 14 1 0 f f f f i 4 16 "1114 1186 1114 1186" _null_ _null_ _null_	"select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - _null_ ));
diff --git a/src/include/utils/date.h b/src/include/utils/date.h
index c7f9f732392..fd91f79f3e9 100644
--- a/src/include/utils/date.h
+++ b/src/include/utils/date.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/date.h,v 1.37 2007/06/05 21:31:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/date.h,v 1.38 2007/07/06 04:16:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -148,6 +148,7 @@ extern Datum time_le(PG_FUNCTION_ARGS);
 extern Datum time_gt(PG_FUNCTION_ARGS);
 extern Datum time_ge(PG_FUNCTION_ARGS);
 extern Datum time_cmp(PG_FUNCTION_ARGS);
+extern Datum time_hash(PG_FUNCTION_ARGS);
 extern Datum overlaps_time(PG_FUNCTION_ARGS);
 extern Datum time_larger(PG_FUNCTION_ARGS);
 extern Datum time_smaller(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/timestamp.h b/src/include/utils/timestamp.h
index 629aa365642..5ba8ccd5ac1 100644
--- a/src/include/utils/timestamp.h
+++ b/src/include/utils/timestamp.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/timestamp.h,v 1.70 2007/06/05 21:31:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/timestamp.h,v 1.71 2007/07/06 04:16:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -211,6 +211,7 @@ extern Datum timestamp_ge(PG_FUNCTION_ARGS);
 extern Datum timestamp_gt(PG_FUNCTION_ARGS);
 extern Datum timestamp_finite(PG_FUNCTION_ARGS);
 extern Datum timestamp_cmp(PG_FUNCTION_ARGS);
+extern Datum timestamp_hash(PG_FUNCTION_ARGS);
 extern Datum timestamp_smaller(PG_FUNCTION_ARGS);
 extern Datum timestamp_larger(PG_FUNCTION_ARGS);
 
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 8b5f64473d5..247c8c95837 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -980,10 +980,9 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'btree')
 -- For hash we can also do a little better: the support routines must be
 -- of the form hash(lefttype) returns int4.  There are several cases where
 -- we cheat and use a hash function that is physically compatible with the
--- datatype even though there's no cast, so for now we can't check that.
-SELECT p1.amprocfamily, p1.amprocnum,
-	p2.oid, p2.proname,
-	p3.opfname
+-- datatype even though there's no cast, so this check does find a small
+-- number of entries.
+SELECT p1.amprocfamily, p1.amprocnum, p2.proname, p3.opfname
 FROM pg_amproc AS p1, pg_proc AS p2, pg_opfamily AS p3
 WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
     AND p1.amprocfamily = p3.oid AND p1.amproc = p2.oid AND
@@ -991,11 +990,20 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
      OR proretset
      OR prorettype != 'int4'::regtype
      OR pronargs != 1
---   OR NOT physically_coercible(amproclefttype, proargtypes[0])
-     OR amproclefttype != amprocrighttype);
- amprocfamily | amprocnum | oid | proname | opfname 
---------------+-----------+-----+---------+---------
-(0 rows)
+     OR NOT physically_coercible(amproclefttype, proargtypes[0])
+     OR amproclefttype != amprocrighttype)
+ORDER BY 1;
+ amprocfamily | amprocnum |    proname     |      opfname       
+--------------+-----------+----------------+--------------------
+          435 |         1 | hashint4       | date_ops
+         1999 |         1 | timestamp_hash | timestamptz_ops
+         2222 |         1 | hashchar       | bool_ops
+         2223 |         1 | hashvarlena    | bytea_ops
+         2225 |         1 | hashint4       | xid_ops
+         2226 |         1 | hashint4       | cid_ops
+         2229 |         1 | hashvarlena    | text_pattern_ops
+         2231 |         1 | hashvarlena    | bpchar_pattern_ops
+(8 rows)
 
 -- Support routines that are primary members of opfamilies must be immutable
 -- (else it suggests that the index ordering isn't fixed).  But cross-type
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index f93a71ddf9c..54c60c1a6d9 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -790,11 +790,10 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'btree')
 -- For hash we can also do a little better: the support routines must be
 -- of the form hash(lefttype) returns int4.  There are several cases where
 -- we cheat and use a hash function that is physically compatible with the
--- datatype even though there's no cast, so for now we can't check that.
+-- datatype even though there's no cast, so this check does find a small
+-- number of entries.
 
-SELECT p1.amprocfamily, p1.amprocnum,
-	p2.oid, p2.proname,
-	p3.opfname
+SELECT p1.amprocfamily, p1.amprocnum, p2.proname, p3.opfname
 FROM pg_amproc AS p1, pg_proc AS p2, pg_opfamily AS p3
 WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
     AND p1.amprocfamily = p3.oid AND p1.amproc = p2.oid AND
@@ -802,8 +801,9 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
      OR proretset
      OR prorettype != 'int4'::regtype
      OR pronargs != 1
---   OR NOT physically_coercible(amproclefttype, proargtypes[0])
-     OR amproclefttype != amprocrighttype);
+     OR NOT physically_coercible(amproclefttype, proargtypes[0])
+     OR amproclefttype != amprocrighttype)
+ORDER BY 1;
 
 -- Support routines that are primary members of opfamilies must be immutable
 -- (else it suggests that the index ordering isn't fixed).  But cross-type
-- 
GitLab