From e3a33a9a9f1a6afb80c9b83c1456c1a36fbcb70b Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 8 Jun 2005 23:02:05 +0000
Subject: [PATCH] Marginal hack to avoid spending a lot of time in
 find_join_rel during large planning problems: when the list of join rels gets
 too long, make an auxiliary hash table that hashes on the identifying
 Bitmapset.

---
 src/backend/nodes/bitmapset.c          |  27 +++++-
 src/backend/optimizer/geqo/geqo_eval.c |  30 +++++--
 src/backend/optimizer/geqo/geqo_main.c |   3 +-
 src/backend/optimizer/plan/planmain.c  |   3 +-
 src/backend/optimizer/util/relnode.c   | 109 +++++++++++++++++++++++--
 src/backend/utils/hash/hashfn.c        |  26 +++++-
 src/include/nodes/bitmapset.h          |   5 +-
 src/include/nodes/relation.h           |  12 ++-
 src/include/utils/hsearch.h            |   4 +-
 9 files changed, 194 insertions(+), 25 deletions(-)

diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 699f0439ffc..5f4ca9a779b 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -14,7 +14,7 @@
  * Copyright (c) 2003-2005, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.7 2005/01/01 20:44:15 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.8 2005/06/08 23:02:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -763,3 +763,28 @@ bms_first_member(Bitmapset *a)
 	}
 	return -1;
 }
+
+/*
+ * bms_hash_value - compute a hash key for a Bitmapset
+ *
+ * Note: we must ensure that any two bitmapsets that are bms_equal() will
+ * hash to the same value; in practice this means that trailing all-zero
+ * words cannot affect the result.  Longitudinal XOR provides a reasonable
+ * hash value that has this property.
+ */
+uint32
+bms_hash_value(const Bitmapset *a)
+{
+	bitmapword	result = 0;
+	int			nwords;
+	int			wordnum;
+
+	if (a == NULL)
+		return 0;				/* All empty sets hash to 0 */
+	nwords = a->nwords;
+	for (wordnum = 0; wordnum < nwords; wordnum++)
+	{
+		result ^= a->words[wordnum];
+	}
+	return (uint32) result;
+}
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 3460eb5b8e3..5d31ac738e7 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.74 2005/06/05 22:32:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.75 2005/06/08 23:02:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -47,7 +47,8 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
 	MemoryContext oldcxt;
 	RelOptInfo *joinrel;
 	Cost		fitness;
-	List	   *savelist;
+	int			savelength;
+	struct HTAB *savehash;
 
 	/*
 	 * Because gimme_tree considers both left- and right-sided trees,
@@ -83,13 +84,19 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
 	 * gimme_tree will add entries to root->join_rel_list, which may or may
 	 * not already contain some entries.  The newly added entries will be
 	 * recycled by the MemoryContextDelete below, so we must ensure that
-	 * the list is restored to its former state before exiting.  With the
-	 * new List implementation, the easiest way is to make a duplicate list
-	 * that gimme_tree can modify.
+	 * the list is restored to its former state before exiting.  We can
+	 * do this by truncating the list to its original length.  NOTE this
+	 * assumes that any added entries are appended at the end!
+	 *
+	 * We also must take care not to mess up the outer join_rel_hash,
+	 * if there is one.  We can do this by just temporarily setting the
+	 * link to NULL.  (If we are dealing with enough join rels, which we
+	 * very likely are, a new hash table will get built and used locally.)
 	 */
-	savelist = evaldata->root->join_rel_list;
+	savelength = list_length(evaldata->root->join_rel_list);
+	savehash = evaldata->root->join_rel_hash;
 
-	evaldata->root->join_rel_list = list_copy(savelist);
+	evaldata->root->join_rel_hash = NULL;
 
 	/* construct the best path for the given combination of relations */
 	joinrel = gimme_tree(tour, num_gene, evaldata);
@@ -105,8 +112,13 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
 	else
 		fitness = DBL_MAX;
 
-	/* restore join_rel_list */
-	evaldata->root->join_rel_list = savelist;
+	/*
+	 * Restore join_rel_list to its former state, and put back original
+	 * hashtable if any.
+	 */
+	evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list,
+												  savelength);
+	evaldata->root->join_rel_hash = savehash;
 
 	/* release all the memory acquired within gimme_tree */
 	MemoryContextSwitchTo(oldcxt);
diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c
index f19f5f7c34d..c027f4370c3 100644
--- a/src/backend/optimizer/geqo/geqo_main.c
+++ b/src/backend/optimizer/geqo/geqo_main.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.49 2005/06/05 22:32:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.50 2005/06/08 23:02:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -252,7 +252,6 @@ geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
 	 */
 	best_tour = (Gene *) pool->data[0].string;
 
-	/* root->join_rel_list will be modified during this ! */
 	best_rel = gimme_tree(best_tour, pool->string_length, &evaldata);
 
 	if (best_rel == NULL)
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 1c87b24e4c6..50e1bc5ea8c 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.83 2005/06/06 04:13:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.84 2005/06/08 23:02:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -116,6 +116,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
 	root->base_rel_array = (RelOptInfo **)
 		palloc0(root->base_rel_array_size * sizeof(RelOptInfo *));
 	root->join_rel_list = NIL;
+	root->join_rel_hash = NULL;
 	root->equi_key_list = NIL;
 
 	/*
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 996f7691870..fbaf0de83a8 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.68 2005/06/06 04:13:36 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.69 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,8 +21,15 @@
 #include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "parser/parsetree.h"
+#include "utils/hsearch.h"
 
 
+typedef struct JoinHashEntry
+{
+	Relids		join_relids;	/* hash key --- MUST BE FIRST */
+	RelOptInfo *join_rel;
+} JoinHashEntry;
+
 static RelOptInfo *make_reloptinfo(PlannerInfo *root, int relid,
 								   RelOptKind reloptkind);
 static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
@@ -197,6 +204,47 @@ find_base_rel(PlannerInfo *root, int relid)
 	return NULL;				/* keep compiler quiet */
 }
 
+/*
+ * build_join_rel_hash
+ *	  Construct the auxiliary hash table for join relations.
+ */
+static void
+build_join_rel_hash(PlannerInfo *root)
+{
+	HTAB	   *hashtab;
+	HASHCTL		hash_ctl;
+	ListCell   *l;
+
+	/* Create the hash table */
+	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+	hash_ctl.keysize = sizeof(Relids);
+	hash_ctl.entrysize = sizeof(JoinHashEntry);
+	hash_ctl.hash = bitmap_hash;
+	hash_ctl.match = bitmap_match;
+	hash_ctl.hcxt = CurrentMemoryContext;
+	hashtab = hash_create("JoinRelHashTable",
+						  256L,
+						  &hash_ctl,
+						  HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+	/* Insert all the already-existing joinrels */
+	foreach(l, root->join_rel_list)
+	{
+		RelOptInfo *rel = (RelOptInfo *) lfirst(l);
+		JoinHashEntry *hentry;
+		bool		found;
+
+		hentry = (JoinHashEntry *) hash_search(hashtab,
+											   &(rel->relids),
+											   HASH_ENTER,
+											   &found);
+		Assert(!found);
+		hentry->join_rel = rel;
+	}
+
+	root->join_rel_hash = hashtab;
+}
+
 /*
  * find_join_rel
  *	  Returns relation entry corresponding to 'relids' (a set of RT indexes),
@@ -205,14 +253,44 @@ find_base_rel(PlannerInfo *root, int relid)
 RelOptInfo *
 find_join_rel(PlannerInfo *root, Relids relids)
 {
-	ListCell   *l;
+	/*
+	 * Switch to using hash lookup when list grows "too long".  The threshold
+	 * is arbitrary and is known only here.
+	 */
+	if (!root->join_rel_hash && list_length(root->join_rel_list) > 32)
+		build_join_rel_hash(root);
 
-	foreach(l, root->join_rel_list)
+	/*
+	 * Use either hashtable lookup or linear search, as appropriate.
+	 *
+	 * Note: the seemingly redundant hashkey variable is used to avoid
+	 * taking the address of relids; unless the compiler is exceedingly
+	 * smart, doing so would force relids out of a register and thus
+	 * probably slow down the list-search case.
+	 */
+	if (root->join_rel_hash)
 	{
-		RelOptInfo *rel = (RelOptInfo *) lfirst(l);
+		Relids		hashkey = relids;
+		JoinHashEntry *hentry;
+
+		hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
+											   &hashkey,
+											   HASH_FIND,
+											   NULL);
+		if (hentry)
+			return hentry->join_rel;
+	}
+	else
+	{
+		ListCell   *l;
 
-		if (bms_equal(rel->relids, relids))
-			return rel;
+		foreach(l, root->join_rel_list)
+		{
+			RelOptInfo *rel = (RelOptInfo *) lfirst(l);
+
+			if (bms_equal(rel->relids, relids))
+				return rel;
+		}
 	}
 
 	return NULL;
@@ -329,9 +407,24 @@ build_join_rel(PlannerInfo *root,
 							   jointype, restrictlist);
 
 	/*
-	 * Add the joinrel to the query's joinrel list.
+	 * Add the joinrel to the query's joinrel list, and store it into
+	 * the auxiliary hashtable if there is one.  NB: GEQO requires us
+	 * to append the new joinrel to the end of the list!
 	 */
-	root->join_rel_list = lcons(joinrel, root->join_rel_list);
+	root->join_rel_list = lappend(root->join_rel_list, joinrel);
+
+	if (root->join_rel_hash)
+	{
+		JoinHashEntry *hentry;
+		bool		found;
+
+		hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
+											   &(joinrel->relids),
+											   HASH_ENTER,
+											   &found);
+		Assert(!found);
+		hentry->join_rel = joinrel;
+	}
 
 	return joinrel;
 }
diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c
index 24255f31e6e..c5968658161 100644
--- a/src/backend/utils/hash/hashfn.c
+++ b/src/backend/utils/hash/hashfn.c
@@ -9,13 +9,14 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.23 2005/04/14 20:32:43 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.24 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "nodes/bitmapset.h"
 #include "utils/hsearch.h"
 
 
@@ -53,3 +54,26 @@ oid_hash(const void *key, Size keysize)
 	/* We don't actually bother to do anything to the OID value ... */
 	return (uint32) *((const Oid *) key);
 }
+
+/*
+ * bitmap_hash: hash function for keys that are (pointers to) Bitmapsets
+ *
+ * Note: don't forget to specify bitmap_match as the match function!
+ */
+uint32
+bitmap_hash(const void *key, Size keysize)
+{
+	Assert(keysize == sizeof(Bitmapset *));
+	return bms_hash_value(*((const Bitmapset * const *) key));
+}
+
+/*
+ * bitmap_match: match function to use with bitmap_hash
+ */
+int
+bitmap_match(const void *key1, const void *key2, Size keysize)
+{
+	Assert(keysize == sizeof(Bitmapset *));
+	return !bms_equal(*((const Bitmapset * const *) key1),
+					  *((const Bitmapset * const *) key2));
+}
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 2ba017fc2ee..b831c4e59a1 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -13,7 +13,7 @@
  *
  * Copyright (c) 2003-2005, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.6 2005/01/01 20:44:28 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.7 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -80,4 +80,7 @@ extern Bitmapset *bms_join(Bitmapset *a, Bitmapset *b);
 /* support for iterating through the integer elements of a set: */
 extern int	bms_first_member(Bitmapset *a);
 
+/* support for hashtables using Bitmapsets as keys: */
+extern uint32 bms_hash_value(const Bitmapset *a);
+
 #endif   /* BITMAPSET_H */
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 93dc78aece4..7c702f7105a 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.111 2005/06/06 04:13:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.112 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -72,7 +72,17 @@ typedef struct PlannerInfo
 	struct RelOptInfo **base_rel_array;	/* All one-relation RelOptInfos */
 	int			base_rel_array_size;	/* current allocated array len */
 
+	/*
+	 * join_rel_list is a list of all join-relation RelOptInfos we have
+	 * considered in this planning run.  For small problems we just scan
+	 * the list to do lookups, but when there are many join relations we
+	 * build a hash table for faster lookups.  The hash table is present
+	 * and valid when join_rel_hash is not NULL.  Note that we still maintain
+	 * the list even when using the hash table for lookups; this simplifies
+	 * life for GEQO.
+	 */
 	List	   *join_rel_list;	/* list of join-relation RelOptInfos */
+	struct HTAB *join_rel_hash;	/* optional hashtable for join relations */
 
 	List	   *equi_key_list;	/* list of lists of equijoined
 								 * PathKeyItems */
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index bb93dea0775..881327a3e03 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.36 2005/05/29 04:23:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.37 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -184,5 +184,7 @@ extern long hash_select_dirsize(long num_entries);
 extern uint32 string_hash(const void *key, Size keysize);
 extern uint32 tag_hash(const void *key, Size keysize);
 extern uint32 oid_hash(const void *key, Size keysize);
+extern uint32 bitmap_hash(const void *key, Size keysize);
+extern int	bitmap_match(const void *key1, const void *key2, Size keysize);
 
 #endif   /* HSEARCH_H */
-- 
GitLab