diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 0b2c13917eec2b055df3df8ba57676ba63ab445b..a47dc328c4b07504489f9bf55e887dd5beaa839f 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -131,17 +131,7 @@ MultiExecHash(HashState *node)
 
 	/* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */
 	if (hashtable->nbuckets != hashtable->nbuckets_optimal)
-	{
-		/* We never decrease the number of buckets. */
-		Assert(hashtable->nbuckets_optimal > hashtable->nbuckets);
-
-#ifdef HJDEBUG
-		printf("Increasing nbuckets %d => %d\n",
-			   hashtable->nbuckets, hashtable->nbuckets_optimal);
-#endif
-
 		ExecHashIncreaseNumBuckets(hashtable);
-	}
 
 	/* Account for the buckets in spaceUsed (reported in EXPLAIN ANALYZE) */
 	hashtable->spaceUsed += hashtable->nbuckets * sizeof(HashJoinTuple);
@@ -486,23 +476,31 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 
 	/*
 	 * Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
-	 * memory is filled, assuming a single batch.  The Min() step limits the
-	 * results so that the pointer arrays we'll try to allocate do not exceed
-	 * work_mem.
+	 * memory is filled, assuming a single batch; but limit the value so that
+	 * the pointer arrays we'll try to allocate do not exceed work_mem nor
+	 * MaxAllocSize.
+	 *
+	 * Note that both nbuckets and nbatch must be powers of 2 to make
+	 * ExecHashGetBucketAndBatch fast.
 	 */
-	max_pointers = (work_mem * 1024L) / sizeof(void *);
+	max_pointers = (work_mem * 1024L) / sizeof(HashJoinTuple);
+	max_pointers = Min(max_pointers, MaxAllocSize / sizeof(HashJoinTuple));
 	/* also ensure we avoid integer overflow in nbatch and nbuckets */
+	/* (this step is redundant given the current value of MaxAllocSize) */
 	max_pointers = Min(max_pointers, INT_MAX / 2);
+
 	dbuckets = ceil(ntuples / NTUP_PER_BUCKET);
 	dbuckets = Min(dbuckets, max_pointers);
+	/* don't let nbuckets be really small, though ... */
 	nbuckets = Max((int) dbuckets, 1024);
+	/* ... and force it to be a power of 2. */
 	nbuckets = 1 << my_log2(nbuckets);
-	bucket_bytes = sizeof(HashJoinTuple) * nbuckets;
 
 	/*
 	 * If there's not enough space to store the projected number of tuples and
 	 * the required bucket headers, we will need multiple batches.
 	 */
+	bucket_bytes = sizeof(HashJoinTuple) * nbuckets;
 	if (inner_rel_bytes + bucket_bytes > hash_table_bytes)
 	{
 		/* We'll need multiple batches */
@@ -521,6 +519,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		lbuckets = 1L << my_log2(hash_table_bytes / bucket_size);
 		lbuckets = Min(lbuckets, max_pointers);
 		nbuckets = (int) lbuckets;
+		nbuckets = 1 << my_log2(nbuckets);
 		bucket_bytes = nbuckets * sizeof(HashJoinTuple);
 
 		/*
@@ -760,21 +759,18 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
 	if (hashtable->nbuckets >= hashtable->nbuckets_optimal)
 		return;
 
-	/*
-	 * We already know the optimal number of buckets, so let's just compute
-	 * the log2_nbuckets for it.
-	 */
+#ifdef HJDEBUG
+	printf("Increasing nbuckets %d => %d\n",
+		   hashtable->nbuckets, hashtable->nbuckets_optimal);
+#endif
+
 	hashtable->nbuckets = hashtable->nbuckets_optimal;
-	hashtable->log2_nbuckets = my_log2(hashtable->nbuckets_optimal);
+	hashtable->log2_nbuckets = hashtable->log2_nbuckets_optimal;
 
 	Assert(hashtable->nbuckets > 1);
 	Assert(hashtable->nbuckets <= (INT_MAX / 2));
 	Assert(hashtable->nbuckets == (1 << hashtable->log2_nbuckets));
 
-#ifdef HJDEBUG
-	printf("Increasing nbuckets to %d\n", hashtable->nbuckets);
-#endif
-
 	/*
 	 * Just reallocate the proper number of buckets - we don't need to walk
 	 * through them - we can walk the dense-allocated chunks (just like in
@@ -785,7 +781,7 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
 		(HashJoinTuple *) repalloc(hashtable->buckets,
 								hashtable->nbuckets * sizeof(HashJoinTuple));
 
-	memset(hashtable->buckets, 0, sizeof(void *) * hashtable->nbuckets);
+	memset(hashtable->buckets, 0, hashtable->nbuckets * sizeof(HashJoinTuple));
 
 	/* scan through all tuples in all chunks to rebuild the hash table */
 	for (chunk = hashtable->chunks; chunk != NULL; chunk = chunk->next)
@@ -878,12 +874,16 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		 * NTUP_PER_BUCKET threshold, but only when there's still a single
 		 * batch.
 		 */
-		if ((hashtable->nbatch == 1) &&
-			(hashtable->nbuckets_optimal <= INT_MAX / 2) &&		/* overflow protection */
-			(ntuples >= (hashtable->nbuckets_optimal * NTUP_PER_BUCKET)))
+		if (hashtable->nbatch == 1 &&
+			ntuples > (hashtable->nbuckets_optimal * NTUP_PER_BUCKET))
 		{
-			hashtable->nbuckets_optimal *= 2;
-			hashtable->log2_nbuckets_optimal += 1;
+			/* Guard against integer overflow and alloc size overflow */
+			if (hashtable->nbuckets_optimal <= INT_MAX / 2 &&
+				hashtable->nbuckets_optimal * 2 <= MaxAllocSize / sizeof(HashJoinTuple))
+			{
+				hashtable->nbuckets_optimal *= 2;
+				hashtable->log2_nbuckets_optimal += 1;
+			}
 		}
 
 		/* Account for space used, and back off if we've used too much */
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 9d0b85c77db41bd55f023249a08f5b14d7379ed9..7a51ea6fffb6043fe8fd657963b8164d12e68b02 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -131,7 +131,7 @@ typedef struct HashJoinTableData
 	int			nbuckets_original;		/* # buckets when starting the first
 										 * hash */
 	int			nbuckets_optimal;		/* optimal # buckets (per batch) */
-	int			log2_nbuckets_optimal;	/* same as log2_nbuckets optimal */
+	int			log2_nbuckets_optimal;	/* log2(nbuckets_optimal) */
 
 	/* buckets[i] is head of list of tuples in i'th in-memory bucket */
 	struct HashJoinTupleData **buckets;