From d24d75ff194e292fe49c6c84d0124cc61b182d3c Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 30 May 2003 20:23:10 +0000
Subject: [PATCH] Small performance improvement for hash joins and hash
 aggregation: when the plan is ReScanned, we don't have to rebuild the hash
 table if there is no parameter change for its child node.  This idea has been
 used for a long time in Sort and Material nodes, but was not in the hash code
 till now.

---
 src/backend/executor/nodeAgg.c      | 33 +++++++++++-
 src/backend/executor/nodeHashjoin.c | 83 ++++++++++++++++-------------
 2 files changed, 76 insertions(+), 40 deletions(-)

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index bbdda3540a7..603df5ed1c4 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -45,7 +45,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.104 2003/02/09 00:30:39 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.105 2003/05/30 20:23:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1374,6 +1374,31 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
 	ExprContext *econtext = node->ss.ps.ps_ExprContext;
 	int			aggno;
 
+	node->agg_done = false;
+
+	if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
+	{
+		/*
+		 * In the hashed case, if we haven't yet built the hash table
+		 * then we can just return; nothing done yet, so nothing to undo.
+		 * If subnode's chgParam is not NULL then it will be re-scanned by
+		 * ExecProcNode, else no reason to re-scan it at all.
+		 */
+		if (!node->table_filled)
+			return;
+
+		/*
+		 * If we do have the hash table and the subplan does not have any
+		 * parameter changes, then we can just rescan the existing hash
+		 * table; no need to build it again.
+		 */
+		if (((PlanState *) node)->lefttree->chgParam == NULL)
+		{
+			ResetTupleHashIterator(&node->hashiter);
+			return;
+		}
+	}
+
 	/* Make sure we have closed any open tuplesorts */
 	for (aggno = 0; aggno < node->numaggs; aggno++)
 	{
@@ -1384,19 +1409,23 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
 		peraggstate->sortstate = NULL;
 	}
 
-	node->agg_done = false;
+	/* Release first tuple of group, if we have made a copy */
 	if (node->grp_firstTuple != NULL)
 	{
 		heap_freetuple(node->grp_firstTuple);
 		node->grp_firstTuple = NULL;
 	}
+
+	/* Forget current agg values */
 	MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
 	MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
 
+	/* Release all temp storage */
 	MemoryContextReset(node->aggcontext);
 
 	if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
 	{
+		/* Rebuild an empty hash table */
 		build_hash_table(node);
 		node->table_filled = false;
 	}
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 000063a8b7f..17585b2f0fc 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.50 2003/05/05 17:57:47 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.51 2003/05/30 20:23:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -56,9 +56,7 @@ ExecHashJoin(HashJoinState *node)
 	HashJoinTable hashtable;
 	HeapTuple	curtuple;
 	TupleTableSlot *outerTupleSlot;
-	TupleTableSlot *innerTupleSlot;
 	int			i;
-	bool		hashPhaseDone;
 
 	/*
 	 * get information from HashJoin node
@@ -69,7 +67,6 @@ ExecHashJoin(HashJoinState *node)
 	otherqual = node->js.ps.qual;
 	hashNode = (HashState *) innerPlanState(node);
 	outerNode = outerPlanState(node);
-	hashPhaseDone = node->hj_hashdone;
 	dir = estate->es_direction;
 
 	/*
@@ -114,24 +111,20 @@ ExecHashJoin(HashJoinState *node)
 	/*
 	 * if this is the first call, build the hash table for inner relation
 	 */
-	if (!hashPhaseDone)
-	{							/* if the hash phase not completed */
-		if (hashtable == NULL)
-		{						/* if the hash table has not been created */
-
-			/*
-			 * create the hash table
-			 */
-			hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
-			node->hj_HashTable = hashtable;
+	if (!node->hj_hashdone)
+	{
+		/*
+		 * create the hash table
+		 */
+		Assert(hashtable == NULL);
+		hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
+		node->hj_HashTable = hashtable;
 
-			/*
-			 * execute the Hash node, to build the hash table
-			 */
-			hashNode->hashtable = hashtable;
-			innerTupleSlot = ExecProcNode((PlanState *) hashNode);
-		}
-		node->hj_hashdone = true;
+		/*
+		 * execute the Hash node, to build the hash table
+		 */
+		hashNode->hashtable = hashtable;
+		(void) ExecProcNode((PlanState *) hashNode);
 
 		/*
 		 * Open temp files for outer batches, if needed. Note that file
@@ -139,9 +132,9 @@ ExecHashJoin(HashJoinState *node)
 		 */
 		for (i = 0; i < hashtable->nbatch; i++)
 			hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
+
+		node->hj_hashdone = true;
 	}
-	else if (hashtable == NULL)
-		return NULL;
 
 	/*
 	 * Now get an outer tuple and probe into the hash table for matches
@@ -159,11 +152,7 @@ ExecHashJoin(HashJoinState *node)
 													   node);
 			if (TupIsNull(outerTupleSlot))
 			{
-				/*
-				 * when the last batch runs out, clean up and exit
-				 */
-				ExecHashTableDestroy(hashtable);
-				node->hj_HashTable = NULL;
+				/* end of join */
 				return NULL;
 			}
 
@@ -410,8 +399,8 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
 	 */
 
 	hjstate->hj_hashdone = false;
-
 	hjstate->hj_HashTable = (HashJoinTable) NULL;
+
 	hjstate->hj_CurBucketNo = 0;
 	hjstate->hj_CurTuple = (HashJoinTuple) NULL;
 
@@ -461,7 +450,7 @@ void
 ExecEndHashJoin(HashJoinState *node)
 {
 	/*
-	 * free hash table in case we end plan before all tuples are retrieved
+	 * Free hash table
 	 */
 	if (node->hj_HashTable)
 	{
@@ -682,21 +671,41 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple,
 void
 ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
 {
+	/*
+	 * If we haven't yet built the hash table then we can just return;
+	 * nothing done yet, so nothing to undo.
+	 */
 	if (!node->hj_hashdone)
 		return;
-
-	node->hj_hashdone = false;
+	Assert(node->hj_HashTable != NULL);
 
 	/*
-	 * Unfortunately, currently we have to destroy hashtable in all
-	 * cases...
+	 * In a multi-batch join, we currently have to do rescans the hard way,
+	 * primarily because batch temp files may have already been released.
+	 * But if it's a single-batch join, and there is no parameter change
+	 * for the inner subnode, then we can just re-use the existing hash
+	 * table without rebuilding it.
 	 */
-	if (node->hj_HashTable)
+	if (node->hj_HashTable->nbatch == 0 &&
+		((PlanState *) node)->righttree->chgParam == NULL)
+	{
+		/* okay to reuse the hash table; needn't rescan inner, either */
+	}
+	else
 	{
+		/* must destroy and rebuild hash table */
+		node->hj_hashdone = false;
 		ExecHashTableDestroy(node->hj_HashTable);
 		node->hj_HashTable = NULL;
+		/*
+		 * if chgParam of subnode is not null then plan will be re-scanned
+		 * by first ExecProcNode.
+		 */
+		if (((PlanState *) node)->righttree->chgParam == NULL)
+			ExecReScan(((PlanState *) node)->righttree, exprCtxt);
 	}
 
+	/* Always reset intra-tuple state */
 	node->hj_CurBucketNo = 0;
 	node->hj_CurTuple = (HashJoinTuple) NULL;
 
@@ -706,11 +715,9 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
 	node->hj_MatchedOuter = false;
 
 	/*
-	 * if chgParam of subnodes is not null then plans will be re-scanned
+	 * if chgParam of subnode is not null then plan will be re-scanned
 	 * by first ExecProcNode.
 	 */
 	if (((PlanState *) node)->lefttree->chgParam == NULL)
 		ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
-	if (((PlanState *) node)->righttree->chgParam == NULL)
-		ExecReScan(((PlanState *) node)->righttree, exprCtxt);
 }
-- 
GitLab