From b39e91501c101d67b92f3e6965da5dc111195f52 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 28 Jan 2007 23:21:26 +0000
Subject: [PATCH] Improve hash join to discard input tuples immediately if they
 can't match because they contain a null join key (and the join operator is
 known strict).  Improves performance significantly when the inner relation
 contains a lot of nulls, as per bug #2930.

---
 src/backend/executor/nodeHash.c     | 54 +++++++++++++++++++++++------
 src/backend/executor/nodeHashjoin.c | 29 ++++++++++------
 src/include/executor/hashjoin.h     |  4 ++-
 src/include/executor/nodeHash.h     |  8 +++--
 4 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index de64e28293d..dffe8cb0d30 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.108 2007/01/05 22:19:28 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.109 2007/01/28 23:21:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -92,11 +92,14 @@ MultiExecHash(HashState *node)
 		slot = ExecProcNode(outerNode);
 		if (TupIsNull(slot))
 			break;
-		hashtable->totalTuples += 1;
 		/* We have to compute the hash value */
 		econtext->ecxt_innertuple = slot;
-		hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
-		ExecHashTableInsert(hashtable, slot, hashvalue);
+		if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false,
+								 &hashvalue))
+		{
+			ExecHashTableInsert(hashtable, slot, hashvalue);
+			hashtable->totalTuples += 1;
+		}
 	}
 
 	/* must provide our own instrumentation support */
@@ -261,19 +264,23 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 
 	/*
 	 * Get info about the hash functions to be used for each hash key.
+	 * Also remember whether the join operators are strict.
 	 */
 	nkeys = list_length(hashOperators);
 	hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
+	hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
 	i = 0;
 	foreach(ho, hashOperators)
 	{
+		Oid			hashop = lfirst_oid(ho);
 		Oid			hashfn;
 
-		hashfn = get_op_hash_function(lfirst_oid(ho));
+		hashfn = get_op_hash_function(hashop);
 		if (!OidIsValid(hashfn))
 			elog(ERROR, "could not find hash function for hash operator %u",
-				 lfirst_oid(ho));
+				 hashop);
 		fmgr_info(hashfn, &hashtable->hashfunctions[i]);
+		hashtable->hashStrict[i] = op_strict(hashop);
 		i++;
 	}
 
@@ -657,11 +664,18 @@ ExecHashTableInsert(HashJoinTable hashtable,
  * The tuple to be tested must be in either econtext->ecxt_outertuple or
  * econtext->ecxt_innertuple.  Vars in the hashkeys expressions reference
  * either OUTER or INNER.
+ *
+ * A TRUE result means the tuple's hash value has been successfully computed
+ * and stored at *hashvalue.  A FALSE result means the tuple cannot match
+ * because it contains a null attribute, and hence it should be discarded
+ * immediately.  (If keep_nulls is true then FALSE is never returned.)
  */
-uint32
+bool
 ExecHashGetHashValue(HashJoinTable hashtable,
 					 ExprContext *econtext,
-					 List *hashkeys)
+					 List *hashkeys,
+					 bool keep_nulls,
+					 uint32 *hashvalue)
 {
 	uint32		hashkey = 0;
 	ListCell   *hk;
@@ -691,10 +705,27 @@ ExecHashGetHashValue(HashJoinTable hashtable,
 		keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL);
 
 		/*
-		 * Compute the hash function
+		 * If the attribute is NULL, and the join operator is strict, then
+		 * this tuple cannot pass the join qual so we can reject it
+		 * immediately (unless we're scanning the outside of an outer join,
+		 * in which case we must not reject it).  Otherwise we act like the
+		 * hashcode of NULL is zero (this will support operators that act like
+		 * IS NOT DISTINCT, though not any more-random behavior).  We treat
+		 * the hash support function as strict even if the operator is not.
+		 *
+		 * Note: currently, all hashjoinable operators must be strict since
+		 * the hash index AM assumes that.  However, it takes so little
+		 * extra code here to allow non-strict that we may as well do it.
 		 */
-		if (!isNull)			/* treat nulls as having hash key 0 */
+		if (isNull)
+		{
+			if (hashtable->hashStrict[i] && !keep_nulls)
+				return false;	/* cannot match */
+			/* else, leave hashkey unmodified, equivalent to hashcode 0 */
+		}
+		else
 		{
+			/* Compute the hash function */
 			uint32		hkey;
 
 			hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
@@ -707,7 +738,8 @@ ExecHashGetHashValue(HashJoinTable hashtable,
 
 	MemoryContextSwitchTo(oldContext);
 
-	return hashkey;
+	*hashvalue = hashkey;
+	return true;
 }
 
 /*
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 7f0801c69bd..b03086fb364 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.86 2007/01/05 22:19:28 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.87 2007/01/28 23:21:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -547,9 +547,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
 	int			curbatch = hashtable->curbatch;
 	TupleTableSlot *slot;
 
-	if (curbatch == 0)
-	{							/* if it is the first pass */
-
+	if (curbatch == 0)			/* if it is the first pass */
+	{
 		/*
 		 * Check to see if first outer tuple was already fetched by
 		 * ExecHashJoin() and not used yet.
@@ -559,7 +558,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
 			hjstate->hj_FirstOuterTupleSlot = NULL;
 		else
 			slot = ExecProcNode(outerNode);
-		if (!TupIsNull(slot))
+
+		while (!TupIsNull(slot))
 		{
 			/*
 			 * We have to compute the tuple's hash value.
@@ -567,13 +567,22 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
 			ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
 
 			econtext->ecxt_outertuple = slot;
-			*hashvalue = ExecHashGetHashValue(hashtable, econtext,
-											  hjstate->hj_OuterHashKeys);
+			if (ExecHashGetHashValue(hashtable, econtext,
+									 hjstate->hj_OuterHashKeys,
+									 (hjstate->js.jointype == JOIN_LEFT),
+									 hashvalue))
+			{
+				/* remember outer relation is not empty for possible rescan */
+				hjstate->hj_OuterNotEmpty = true;
 
-			/* remember outer relation is not empty for possible rescan */
-			hjstate->hj_OuterNotEmpty = true;
+				return slot;
+			}
 
-			return slot;
+			/*
+			 * That tuple couldn't match because of a NULL, so discard it
+			 * and continue with the next one.
+			 */
+			slot = ExecProcNode(outerNode);
 		}
 
 		/*
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 59ebb6ebbd9..ba086407679 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -108,6 +108,8 @@ typedef struct HashJoinTableData
 	 */
 	FmgrInfo   *hashfunctions;	/* lookup data for hash functions */
 
+	bool	   *hashStrict;		/* is each hash join operator strict? */
+
 	Size		spaceUsed;		/* memory space currently used by tuples */
 	Size		spaceAllowed;	/* upper limit for space used */
 
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index 0ed53ec2267..bf7292e8156 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,9 +28,11 @@ extern void ExecHashTableDestroy(HashJoinTable hashtable);
 extern void ExecHashTableInsert(HashJoinTable hashtable,
 					TupleTableSlot *slot,
 					uint32 hashvalue);
-extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
+extern bool ExecHashGetHashValue(HashJoinTable hashtable,
 					 ExprContext *econtext,
-					 List *hashkeys);
+					 List *hashkeys,
+					 bool keep_nulls,
+					 uint32 *hashvalue);
 extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
 						  uint32 hashvalue,
 						  int *bucketno,
-- 
GitLab