From e990b9ce23f2be15d48ed518b30931eddd0e59ca Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 25 Sep 2005 19:37:35 +0000
Subject: [PATCH] The original patch to avoid building a hash join's hashtable
 when the outer relation is empty did not work, per test case from Patrick
 Welche. It tried to use nodeHashjoin.c's high-level mechanisms for fetching
 an outer-relation tuple, but that code expected the hash table to be filled
 already.  As patched, the code failed in corner cases such as having no
 outer-relation tuples for the first hash batch.  Revert and rewrite.

---
 src/backend/executor/nodeHash.c     |  52 ++------
 src/backend/executor/nodeHashjoin.c | 200 ++++++++++++----------------
 src/include/nodes/execnodes.h       |   5 +-
 3 files changed, 94 insertions(+), 163 deletions(-)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index c7119933e5b..5e2be394d86 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.94 2005/06/15 07:27:44 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.95 2005/09/25 19:37:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,22 +37,14 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
 /* ----------------------------------------------------------------
  *		ExecHash
  *
- *		produce the first tuple from our child node (and _only_ the
- *		first tuple). This is of limited general use -- it does not
- *		hash its output, and produces only a single tuple. It is
- *		provided so that hash join can probe the inner hash input to
- *		determine whether it is empty without needing to build the
- *		entire hash table first, which is what MultiExecHash() would
- *		do.
+ *		stub for pro forma compliance
  * ----------------------------------------------------------------
  */
 TupleTableSlot *
 ExecHash(HashState *node)
 {
-	if (TupIsNull(node->firstTuple))
-		node->firstTuple = ExecProcNode(outerPlanState(node));
-
-	return node->firstTuple;
+	elog(ERROR, "Hash node does not support ExecProcNode call convention");
+	return NULL;
 }
 
 /* ----------------------------------------------------------------
@@ -71,7 +63,6 @@ MultiExecHash(HashState *node)
 	TupleTableSlot *slot;
 	ExprContext *econtext;
 	uint32		hashvalue;
-	bool		cleared_first_tuple = false;
 
 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
@@ -94,19 +85,9 @@ MultiExecHash(HashState *node)
 	 */
 	for (;;)
 	{
-		/* use and clear the tuple produced by ExecHash(), if any */
-		if (!TupIsNull(node->firstTuple))
-		{
-			slot = node->firstTuple;
-			node->firstTuple = NULL;
-			cleared_first_tuple = true;
-		}
-		else
-		{
-			slot = ExecProcNode(outerNode);
-			if (TupIsNull(slot))
-				break;
-		}
+		slot = ExecProcNode(outerNode);
+		if (TupIsNull(slot))
+			break;
 		hashtable->totalTuples += 1;
 		/* We have to compute the hash value */
 		econtext->ecxt_innertuple = slot;
@@ -116,19 +97,7 @@ MultiExecHash(HashState *node)
 
 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
-	{
-		/*
-		 * XXX: kludge -- if ExecHash() was invoked, we've already
-		 * included the tuple that it produced in the row output count
-		 * for this node, so subtract 1 from the # of hashed tuples.
-		 */
-		if (cleared_first_tuple)
-			InstrStopNodeMulti(node->ps.instrument,
-							   hashtable->totalTuples - 1);
-		else
-			InstrStopNodeMulti(node->ps.instrument,
-							   hashtable->totalTuples);
-	}
+		InstrStopNodeMulti(node->ps.instrument, hashtable->totalTuples);
 
 	/*
 	 * We do not return the hash table directly because it's not a subtype
@@ -161,7 +130,6 @@ ExecInitHash(Hash *node, EState *estate)
 	hashstate->ps.state = estate;
 	hashstate->hashtable = NULL;
 	hashstate->hashkeys = NIL;	/* will be set by parent HashJoin */
-	hashstate->firstTuple = NULL;
 
 	/*
 	 * Miscellaneous initialization
@@ -221,8 +189,6 @@ ExecEndHash(HashState *node)
 {
 	PlanState  *outerPlan;
 
-	node->firstTuple = NULL;
-
 	/*
 	 * free exprcontext
 	 */
@@ -864,8 +830,6 @@ ExecHashTableReset(HashJoinTable hashtable)
 void
 ExecReScanHash(HashState *node, ExprContext *exprCtxt)
 {
-	node->firstTuple = NULL;
-
 	/*
 	 * if chgParam of subnode is not null then plan will be re-scanned by
 	 * first ExecProcNode.
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 2a44a18adc1..4b0f9377ba8 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.72 2005/06/15 07:27:44 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.73 2005/09/25 19:37:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,7 +31,7 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
 						  uint32 *hashvalue,
 						  TupleTableSlot *tupleSlot);
 static int	ExecHashJoinNewBatch(HashJoinState *hjstate);
-static TupleTableSlot *ExecHashJoinReadOuterPlan(HashJoinState *hjstate);
+
 
 /* ----------------------------------------------------------------
  *		ExecHashJoin
@@ -57,6 +57,8 @@ ExecHashJoin(HashJoinState *node)
 	HashJoinTable hashtable;
 	HeapTuple	curtuple;
 	TupleTableSlot *outerTupleSlot;
+	uint32		hashvalue;
+	int			batchno;
 
 	/*
 	 * get information from HashJoin node
@@ -105,69 +107,57 @@ ExecHashJoin(HashJoinState *node)
 	 */
 	ResetExprContext(econtext);
 
+	/*
+	 * if this is the first call, build the hash table for inner relation
+	 */
 	if (hashtable == NULL)
 	{
 		/*
-		 * This is the first call to the node. When _either_ of the
-		 * hash join inputs are empty, we want to avoid doing
-		 * unnecessary work (e.g. building the hash table for the
-		 * inner join relation). We therefore read a single tuple from
-		 * both inputs before proceeding further. We choose which
-		 * input to probe first based on the startup cost of the plan
-		 * node.
+		 * If the outer relation is completely empty, we can quit without
+		 * building the hash table.  However, for an inner join it is only
+		 * a win to check this when the outer relation's startup cost is less
+		 * than the projected cost of building the hash table.  Otherwise
+		 * it's best to build the hash table first and see if the inner
+		 * relation is empty.  (When it's an outer join, we should always
+		 * make this check, since we aren't going to be able to skip the
+		 * join on the strength of an empty inner relation anyway.)
 		 *
-		 * Note that if we're executing an outer join and the inner
-		 * relation is empty, we still have work to do.
+		 * The only way to make the check is to try to fetch a tuple from
+		 * the outer plan node.  If we succeed, we have to stash it away
+		 * for later consumption by ExecHashJoinOuterGetTuple.
 		 */
-
-		/* Consider probing the inner relation first */
-		if (hashNode->ps.plan->startup_cost <= outerNode->plan->startup_cost &&
-			node->js.jointype != JOIN_LEFT)
+		if (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost ||
+			node->js.jointype == JOIN_LEFT)
 		{
-			/*
-			 * ExecHash() lets us get a single tuple from the inner
-			 * relation without building the entire hash table
-			 */
-			TupleTableSlot *tup = ExecProcNode(&hashNode->ps);
-			if (TupIsNull(tup))
+			node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
+			if (TupIsNull(node->hj_FirstOuterTupleSlot))
 				return NULL;
 		}
+		else
+			node->hj_FirstOuterTupleSlot = NULL;
 
 		/*
-		 * Before we can check the outer relation, we need to build
-		 * the hash table. This is somewhat a waste of time if the
-		 * outer relation is empty, but it would be awkward to avoid.
+		 * create the hash table
 		 */
 		hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
 										node->hj_HashOperators);
 		node->hj_HashTable = hashtable;
-		hashNode->hashtable = hashtable;
-
-		/* Now check the outer relation */
-		outerTupleSlot = ExecHashJoinReadOuterPlan(node);
-
-		if (TupIsNull(outerTupleSlot))
-		{
-			ExecHashTableDestroy(node->hj_HashTable);
-			node->hj_HashTable = NULL;
-			return NULL;
-		}
 
 		/*
-		 * Okay, we can't avoid it, so execute the Hash node to build
-		 * the hash table
+		 * execute the Hash node, to build the hash table
 		 */
+		hashNode->hashtable = hashtable;
 		(void) MultiExecProcNode((PlanState *) hashNode);
 
 		/*
-		 * If the inner relation is empty but its startup cost was
-		 * less than the outer relation's startup cost, we can arrive
-		 * here -- we're done unless this is an outer join
+		 * If the inner relation is completely empty, and we're not doing
+		 * an outer join, we can quit without scanning the outer relation.
 		 */
 		if (hashtable->totalTuples == 0 && node->js.jointype != JOIN_LEFT)
 		{
-			ExecHashTableDestroy(node->hj_HashTable);
+			ExecHashTableDestroy(hashtable);
 			node->hj_HashTable = NULL;
+			node->hj_FirstOuterTupleSlot = NULL;
 			return NULL;
 		}
 
@@ -188,9 +178,46 @@ ExecHashJoin(HashJoinState *node)
 		 */
 		if (node->hj_NeedNewOuter)
 		{
-			outerTupleSlot = ExecHashJoinReadOuterPlan(node);
+			outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
+													   node,
+													   &hashvalue);
 			if (TupIsNull(outerTupleSlot))
-				return NULL; /* end of join */
+			{
+				/* end of join */
+				return NULL;
+			}
+
+			node->js.ps.ps_OuterTupleSlot = outerTupleSlot;
+			econtext->ecxt_outertuple = outerTupleSlot;
+			node->hj_NeedNewOuter = false;
+			node->hj_MatchedOuter = false;
+
+			/*
+			 * now we have an outer tuple, find the corresponding bucket
+			 * for this tuple from the hash table
+			 */
+			node->hj_CurHashValue = hashvalue;
+			ExecHashGetBucketAndBatch(hashtable, hashvalue,
+									  &node->hj_CurBucketNo, &batchno);
+			node->hj_CurTuple = NULL;
+
+			/*
+			 * Now we've got an outer tuple and the corresponding hash
+			 * bucket, but this tuple may not belong to the current batch.
+			 */
+			if (batchno != hashtable->curbatch)
+			{
+				/*
+				 * Need to postpone this outer tuple to a later batch.
+				 * Save it in the corresponding outer-batch file.
+				 */
+				Assert(batchno > hashtable->curbatch);
+				ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
+									  hashvalue,
+									  &hashtable->outerBatchFile[batchno]);
+				node->hj_NeedNewOuter = true;
+				continue;	/* loop around for a new outer tuple */
+			}
 		}
 
 		/*
@@ -400,6 +427,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
 	 * initialize hash-specific info
 	 */
 	hjstate->hj_HashTable = NULL;
+	hjstate->hj_FirstOuterTupleSlot = NULL;
 
 	hjstate->hj_CurHashValue = 0;
 	hjstate->hj_CurBucketNo = 0;
@@ -464,6 +492,7 @@ ExecEndHashJoin(HashJoinState *node)
 	{
 		ExecHashTableDestroy(node->hj_HashTable);
 		node->hj_HashTable = NULL;
+		node->hj_FirstOuterTupleSlot = NULL;
 	}
 
 	/*
@@ -485,79 +514,6 @@ ExecEndHashJoin(HashJoinState *node)
 	ExecEndNode(innerPlanState(node));
 }
 
-/*
- * ExecHashJoinReadOuterPlan
- *
- *		do all the work necessary to produce the next tuple from the
- *		outer hash join relation that is in the current batch. Returns
- *		NULL if there are no more tuples in the outer relation.
- */
-static TupleTableSlot *
-ExecHashJoinReadOuterPlan(HashJoinState *hjstate)
-{
-	PlanState *outerNode;
-	ExprContext *econtext;
-	HashJoinTable hashtable;
-
-	outerNode = outerPlanState(hjstate);
-	econtext = hjstate->js.ps.ps_ExprContext;
-	hashtable = hjstate->hj_HashTable;
-
-	for (;;)
-	{
-		TupleTableSlot *result;
-		uint32		hashvalue;
-		int			batchno;
-
-		result = ExecHashJoinOuterGetTuple(outerNode,
-										   hjstate,
-										   &hashvalue);
-		if (TupIsNull(result))
-		{
-			/* end of join */
-			return NULL;
-		}
-
-		hjstate->js.ps.ps_OuterTupleSlot = result;
-		econtext->ecxt_outertuple = result;
-		hjstate->hj_NeedNewOuter = false;
-		hjstate->hj_MatchedOuter = false;
-
-		/*
-		 * now we have an outer tuple, find the corresponding bucket
-		 * for this tuple from the hash table
-		 */
-		hjstate->hj_CurHashValue = hashvalue;
-		ExecHashGetBucketAndBatch(hashtable, hashvalue,
-								  &hjstate->hj_CurBucketNo, &batchno);
-		hjstate->hj_CurTuple = NULL;
-
-		/*
-		 * Now we've got an outer tuple and the corresponding hash
-		 * bucket, but this tuple may not belong to the current batch.
-		 */
-		if (batchno != hashtable->curbatch)
-		{
-			/*
-			 * Need to postpone this outer tuple to a later batch.
-			 * Save it in the corresponding outer-batch file.
-			 */
-			Assert(batchno > hashtable->curbatch);
-			ExecHashJoinSaveTuple(ExecFetchSlotTuple(result),
-								  hashvalue,
-								  &hashtable->outerBatchFile[batchno]);
-			hjstate->hj_NeedNewOuter = true;
-			continue;	/* Get the next outer tuple */
-		}
-
-		/*
-		 * Otherwise, we have a tuple in the current batch, so we're
-		 * done
-		 */
-		return result;
-	}
-}
-
 /*
  * ExecHashJoinOuterGetTuple
  *
@@ -580,7 +536,15 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
 
 	if (curbatch == 0)
 	{							/* if it is the first pass */
-		slot = ExecProcNode(outerNode);
+		/*
+		 * Check to see if first outer tuple was already fetched by
+		 * ExecHashJoin() and not used yet.
+		 */
+		slot = hjstate->hj_FirstOuterTupleSlot;
+		if (!TupIsNull(slot))
+			hjstate->hj_FirstOuterTupleSlot = NULL;
+		else
+			slot = ExecProcNode(outerNode);
 		if (!TupIsNull(slot))
 		{
 			/*
@@ -840,6 +804,7 @@ ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
 	return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
 }
 
+
 void
 ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
 {
@@ -867,6 +832,7 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
 		/* must destroy and rebuild hash table */
 		ExecHashTableDestroy(node->hj_HashTable);
 		node->hj_HashTable = NULL;
+		node->hj_FirstOuterTupleSlot = NULL;
 
 		/*
 		 * if chgParam of subnode is not null then plan will be re-scanned
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 73648f93ade..5e68eae5270 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.137 2005/08/01 20:31:15 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.138 2005/09/25 19:37:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1097,6 +1097,7 @@ typedef struct MergeJoinState
  *		hj_OuterTupleSlot		tuple slot for outer tuples
  *		hj_HashTupleSlot		tuple slot for hashed tuples
  *		hj_NullInnerTupleSlot	prepared null tuple for left outer joins
+ *		hj_FirstOuterTupleSlot	first tuple retrieved from outer plan
  *		hj_NeedNewOuter			true if need new outer tuple on next call
  *		hj_MatchedOuter			true if found a join match for current outer
  * ----------------
@@ -1120,6 +1121,7 @@ typedef struct HashJoinState
 	TupleTableSlot *hj_OuterTupleSlot;
 	TupleTableSlot *hj_HashTupleSlot;
 	TupleTableSlot *hj_NullInnerTupleSlot;
+	TupleTableSlot *hj_FirstOuterTupleSlot;
 	bool		hj_NeedNewOuter;
 	bool		hj_MatchedOuter;
 } HashJoinState;
@@ -1232,7 +1234,6 @@ typedef struct HashState
 	HashJoinTable hashtable;	/* hash table for the hashjoin */
 	List	   *hashkeys;		/* list of ExprState nodes */
 	/* hashkeys is same as parent's hj_InnerHashKeys */
-	TupleTableSlot *firstTuple;	/* tuple produced by ExecHash() */
 } HashState;
 
 /* ----------------
-- 
GitLab