From a0fa0117a5ad728b6f85a39cc52006736f54f90e Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 30 Dec 2002 15:21:23 +0000
Subject: [PATCH] Better solution to integer overflow problem in hash
 batch-number computation: reduce the bucket number mod nbatch.  This changes
 the association between original bucket numbers and batches, but that doesn't
 matter.  Minor other cleanups in hashjoin code to help centralize decisions.

---
 src/backend/executor/nodeHash.c       | 72 ++++++++++++---------------
 src/backend/executor/nodeHashjoin.c   | 33 ++----------
 src/backend/optimizer/path/costsize.c | 43 ++++++++--------
 src/include/executor/nodeHash.h       |  3 +-
 4 files changed, 60 insertions(+), 91 deletions(-)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 9e0e4f0b8cb..bea89630993 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,9 +59,6 @@ ExecHash(HashState *node)
 	outerNode = outerPlanState(node);
 
 	hashtable = node->hashtable;
-	if (hashtable == NULL)
-		elog(ERROR, "ExecHash: hash table is NULL.");
-
 	nbatch = hashtable->nbatch;
 
 	if (nbatch > 0)
@@ -284,20 +281,13 @@ ExecHashTableCreate(Hash *node)
 		 * allocate and initialize the file arrays in hashCxt
 		 */
 		hashtable->innerBatchFile = (BufFile **)
-			palloc(nbatch * sizeof(BufFile *));
+			palloc0(nbatch * sizeof(BufFile *));
 		hashtable->outerBatchFile = (BufFile **)
-			palloc(nbatch * sizeof(BufFile *));
+			palloc0(nbatch * sizeof(BufFile *));
 		hashtable->innerBatchSize = (long *)
-			palloc(nbatch * sizeof(long));
+			palloc0(nbatch * sizeof(long));
 		hashtable->outerBatchSize = (long *)
-			palloc(nbatch * sizeof(long));
-		for (i = 0; i < nbatch; i++)
-		{
-			hashtable->innerBatchFile[i] = NULL;
-			hashtable->outerBatchFile[i] = NULL;
-			hashtable->innerBatchSize[i] = 0;
-			hashtable->outerBatchSize[i] = 0;
-		}
+			palloc0(nbatch * sizeof(long));
 		/* The files will not be opened until later... */
 	}
 
@@ -308,13 +298,7 @@ ExecHashTableCreate(Hash *node)
 	MemoryContextSwitchTo(hashtable->batchCxt);
 
 	hashtable->buckets = (HashJoinTuple *)
-		palloc(nbuckets * sizeof(HashJoinTuple));
-
-	if (hashtable->buckets == NULL)
-		elog(ERROR, "Insufficient memory for hash table.");
-
-	for (i = 0; i < nbuckets; i++)
-		hashtable->buckets[i] = NULL;
+		palloc0(nbuckets * sizeof(HashJoinTuple));
 
 	MemoryContextSwitchTo(oldcxt);
 
@@ -414,15 +398,14 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
 		 * totalbuckets/nbuckets; in fact, it is the number of groups we
 		 * will use for the part of the data that doesn't fall into the
 		 * first nbuckets hash buckets.  We try to set it to make all the
-		 * batches the same size.  But we have to keep nbatch small
-		 * enough to avoid integer overflow in ExecHashJoinGetBatch().
+		 * batches the same size.
 		 */
 		dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
 					hash_table_bytes);
-		if (dtmp < INT_MAX / totalbuckets)
+		if (dtmp < INT_MAX)
 			nbatch = (int) dtmp;
 		else
-			nbatch = INT_MAX / totalbuckets;
+			nbatch = INT_MAX;
 		if (nbatch <= 0)
 			nbatch = 1;
 	}
@@ -481,13 +464,14 @@ ExecHashTableInsert(HashJoinTable hashtable,
 					List *hashkeys)
 {
 	int			bucketno = ExecHashGetBucket(hashtable, econtext, hashkeys);
+	int			batchno = ExecHashGetBatch(bucketno, hashtable);
 	TupleTableSlot *slot = econtext->ecxt_innertuple;
 	HeapTuple	heapTuple = slot->val;
 
 	/*
 	 * decide whether to put the tuple in the hash table or a tmp file
 	 */
-	if (bucketno < hashtable->nbuckets)
+	if (batchno < 0)
 	{
 		/*
 		 * put the tuple in hash table
@@ -498,8 +482,6 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		hashTupleSize = MAXALIGN(sizeof(*hashTuple)) + heapTuple->t_len;
 		hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
 													   hashTupleSize);
-		if (hashTuple == NULL)
-			elog(ERROR, "Insufficient memory for hash table.");
 		memcpy((char *) &hashTuple->htup,
 			   (char *) heapTuple,
 			   sizeof(hashTuple->htup));
@@ -515,11 +497,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
 	else
 	{
 		/*
-		 * put the tuple into a tmp file for other batches
+		 * put the tuple into a tmp file for later batches
 		 */
-		int			batchno = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) /
-		(hashtable->totalbuckets - hashtable->nbuckets);
-
 		hashtable->innerBatchSize[batchno]++;
 		ExecHashJoinSaveTuple(heapTuple,
 							  hashtable->innerBatchFile[batchno]);
@@ -592,6 +571,24 @@ ExecHashGetBucket(HashJoinTable hashtable,
 	return bucketno;
 }
 
+/* ----------------------------------------------------------------
+ *		ExecHashGetBatch
+ *
+ *		determine the batch number for a bucketno
+ *
+ * Returns -1 if bucket belongs to initial (or current) batch,
+ * else 0..nbatch-1 corresponding to external batch file number for bucket.
+ * ----------------------------------------------------------------
+ */
+int
+ExecHashGetBatch(int bucketno, HashJoinTable hashtable)
+{
+	if (bucketno < hashtable->nbuckets)
+		return -1;
+
+	return (bucketno - hashtable->nbuckets) % hashtable->nbatch;
+}
+
 /* ----------------------------------------------------------------
  *		ExecScanHashBucket
  *
@@ -727,7 +724,6 @@ ExecHashTableReset(HashJoinTable hashtable, long ntuples)
 {
 	MemoryContext oldcxt;
 	int			nbuckets = hashtable->nbuckets;
-	int			i;
 
 	/*
 	 * Release all the hash buckets and tuples acquired in the prior pass,
@@ -750,13 +746,7 @@ ExecHashTableReset(HashJoinTable hashtable, long ntuples)
 
 	/* Reallocate and reinitialize the hash bucket headers. */
 	hashtable->buckets = (HashJoinTuple *)
-		palloc(nbuckets * sizeof(HashJoinTuple));
-
-	if (hashtable->buckets == NULL)
-		elog(ERROR, "Insufficient memory for hash table.");
-
-	for (i = 0; i < nbuckets; i++)
-		hashtable->buckets[i] = NULL;
+		palloc0(nbuckets * sizeof(HashJoinTuple));
 
 	MemoryContextSwitchTo(oldcxt);
 }
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 8f899b577d7..48cf30c21f4 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.45 2002/12/15 16:17:46 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.46 2002/12/30 15:21:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -27,7 +27,6 @@ static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *node,
 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
 						  BufFile *file,
 						  TupleTableSlot *tupleSlot);
-static int	ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable);
 static int	ExecHashJoinNewBatch(HashJoinState *hjstate);
 
 
@@ -179,17 +178,15 @@ ExecHashJoin(HashJoinState *node)
 			 */
 			if (hashtable->curbatch == 0)
 			{
-				int			batch = ExecHashJoinGetBatch(node->hj_CurBucketNo,
-														 hashtable);
+				int			batchno = ExecHashGetBatch(node->hj_CurBucketNo,
+													   hashtable);
 
-				if (batch > 0)
+				if (batchno >= 0)
 				{
 					/*
 					 * Need to postpone this outer tuple to a later batch.
 					 * Save it in the corresponding outer-batch file.
 					 */
-					int			batchno = batch - 1;
-
 					hashtable->outerBatchSize[batchno]++;
 					ExecHashJoinSaveTuple(outerTupleSlot->val,
 									 hashtable->outerBatchFile[batchno]);
@@ -640,28 +637,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
 	return newbatch;
 }
 
-/* ----------------------------------------------------------------
- *		ExecHashJoinGetBatch
- *
- *		determine the batch number for a bucketno
- *		+----------------+-------+-------+ ... +-------+
- *		0			  nbuckets						 totalbuckets
- * batch		 0			 1		 2	   ...
- * ----------------------------------------------------------------
- */
-static int
-ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable)
-{
-	int			b;
-
-	if (bucketno < hashtable->nbuckets || hashtable->nbatch == 0)
-		return 0;
-
-	b = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) /
-		(hashtable->totalbuckets - hashtable->nbuckets);
-	return b + 1;
-}
-
 /* ----------------------------------------------------------------
  *		ExecHashJoinSaveTuple
  *
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a19dd92c826..29b23948dfe 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.97 2002/12/26 23:38:42 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.98 2002/12/30 15:21:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -85,7 +85,8 @@ bool		enable_mergejoin = true;
 bool		enable_hashjoin = true;
 
 
-static Selectivity estimate_hash_bucketsize(Query *root, Var *var);
+static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
+											int nbuckets);
 static bool cost_qual_eval_walker(Node *node, Cost *total);
 static Selectivity approx_selectivity(Query *root, List *quals);
 static void set_rel_width(Query *root, RelOptInfo *rel);
@@ -882,7 +883,9 @@ cost_hashjoin(Path *path, Query *root,
 											  outer_path->parent->width);
 	double		innerbytes = relation_byte_size(inner_path->parent->rows,
 											  inner_path->parent->width);
-	long		hashtablebytes = SortMem * 1024L;
+	int			virtualbuckets;
+	int			physicalbuckets;
+	int			numbatches;
 	Selectivity innerbucketsize;
 	List	   *hcl;
 
@@ -898,6 +901,13 @@ cost_hashjoin(Path *path, Query *root,
 	startup_cost += cpu_operator_cost * inner_path->parent->rows;
 	run_cost += cpu_operator_cost * outer_path->parent->rows;
 
+	/* Get hash table size that executor would use for inner relation */
+	ExecChooseHashTableSize(inner_path->parent->rows,
+							inner_path->parent->width,
+							&virtualbuckets,
+							&physicalbuckets,
+							&numbatches);
+
 	/*
 	 * Determine bucketsize fraction for inner relation.  We use the
 	 * smallest bucketsize estimated for any individual hashclause;
@@ -931,7 +941,8 @@ cost_hashjoin(Path *path, Query *root,
 			if (thisbucketsize < 0)
 			{
 				/* not cached yet */
-				thisbucketsize = estimate_hash_bucketsize(root, right);
+				thisbucketsize = estimate_hash_bucketsize(root, right,
+														  virtualbuckets);
 				restrictinfo->right_bucketsize = thisbucketsize;
 			}
 		}
@@ -943,7 +954,8 @@ cost_hashjoin(Path *path, Query *root,
 			if (thisbucketsize < 0)
 			{
 				/* not cached yet */
-				thisbucketsize = estimate_hash_bucketsize(root, left);
+				thisbucketsize = estimate_hash_bucketsize(root, left,
+														  virtualbuckets);
 				restrictinfo->left_bucketsize = thisbucketsize;
 			}
 		}
@@ -982,7 +994,7 @@ cost_hashjoin(Path *path, Query *root,
 	 * should be nice and sequential...).  Writing the inner rel counts as
 	 * startup cost, all the rest as run cost.
 	 */
-	if (innerbytes > hashtablebytes)
+	if (numbatches)
 	{
 		double		outerpages = page_size(outer_path->parent->rows,
 										   outer_path->parent->width);
@@ -1019,7 +1031,7 @@ cost_hashjoin(Path *path, Query *root,
  * smart enough to figure out how the restrict clauses might change the
  * distribution, so this will have to do for now.
  *
- * We can get the number of buckets the executor will use for the given
+ * We are passed the number of buckets the executor will use for the given
  * input relation.	If the data were perfectly distributed, with the same
  * number of tuples going into each available bucket, then the bucketsize
  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
@@ -1039,13 +1051,10 @@ cost_hashjoin(Path *path, Query *root,
  * inner rel is well-dispersed (or the alternatives seem much worse).
  */
 static Selectivity
-estimate_hash_bucketsize(Query *root, Var *var)
+estimate_hash_bucketsize(Query *root, Var *var, int nbuckets)
 {
 	Oid			relid;
 	RelOptInfo *rel;
-	int			virtualbuckets;
-	int			physicalbuckets;
-	int			numbatches;
 	HeapTuple	tuple;
 	Form_pg_statistic stats;
 	double		estfract,
@@ -1071,12 +1080,6 @@ estimate_hash_bucketsize(Query *root, Var *var)
 	if (rel->tuples <= 0.0 || rel->rows <= 0.0)
 		return 0.1;				/* ensure we can divide below */
 
-	/* Get hash table size that executor would use for this relation */
-	ExecChooseHashTableSize(rel->rows, rel->width,
-							&virtualbuckets,
-							&physicalbuckets,
-							&numbatches);
-
 	tuple = SearchSysCache(STATRELATT,
 						   ObjectIdGetDatum(relid),
 						   Int16GetDatum(var->varattno),
@@ -1093,7 +1096,7 @@ estimate_hash_bucketsize(Query *root, Var *var)
 			case ObjectIdAttributeNumber:
 			case SelfItemPointerAttributeNumber:
 				/* these are unique, so buckets should be well-distributed */
-				return 1.0 / (double) virtualbuckets;
+				return 1.0 / (double) nbuckets;
 			case TableOidAttributeNumber:
 				/* hashing this is a terrible idea... */
 				return 1.0;
@@ -1134,8 +1137,8 @@ estimate_hash_bucketsize(Query *root, Var *var)
 	 * the number of buckets is less than the expected number of distinct
 	 * values; otherwise it is 1/ndistinct.
 	 */
-	if (ndistinct > (double) virtualbuckets)
-		estfract = 1.0 / (double) virtualbuckets;
+	if (ndistinct > (double) nbuckets)
+		estfract = 1.0 / (double) nbuckets;
 	else
 		estfract = 1.0 / ndistinct;
 
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index c30073ec8cf..02e56355263 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: nodeHash.h,v 1.27 2002/12/05 15:50:37 tgl Exp $
+ * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -30,6 +30,7 @@ extern void ExecHashTableInsert(HashJoinTable hashtable,
 extern int ExecHashGetBucket(HashJoinTable hashtable,
 				  ExprContext *econtext,
 				  List *hashkeys);
+extern int ExecHashGetBatch(int bucketno, HashJoinTable hashtable);
 extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, List *hjclauses,
 				   ExprContext *econtext);
 extern void ExecHashTableReset(HashJoinTable hashtable, long ntuples);
-- 
GitLab