From d54ca56743d5b0bb43f7cd951290384a6933e9f9 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 28 Nov 2007 18:47:56 +0000
Subject: [PATCH] Install a lookaside cache to speed up repeated lookups of the
 same operator by short-circuiting schema search path and ambiguous-operator
 resolution computations.  Remarkably, this buys as much as 45% speedup of
 repetitive simple queries that involve operators that are not an exact match
 to the input datatypes.  It should be marginally faster even for exact-match
 cases, though I've not had success in proving an improvement in benchmark
 tests.  Per report from Guillame Smet and subsequent discussion.

---
 src/backend/catalog/namespace.c |  36 ++++-
 src/backend/parser/parse_oper.c | 278 +++++++++++++++++++++++++++++++-
 src/include/catalog/namespace.h |   3 +-
 3 files changed, 311 insertions(+), 6 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 51102387783..71f6de197f2 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.102 2007/11/25 02:09:46 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.103 2007/11/28 18:47:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3006,6 +3006,40 @@ fetch_search_path(bool includeImplicit)
 	return result;
 }
 
+/*
+ * Fetch the active search path into a caller-allocated array of OIDs.
+ * Returns the number of path entries.  (If this is more than sarray_len,
+ * then the data didn't fit and is not all stored.)
+ *
+ * The returned list always includes the implicitly-prepended namespaces,
+ * but never includes the temp namespace.  (This is suitable for existing
+ * users, which would want to ignore the temp namespace anyway.)  This
+ * definition allows us to not worry about initializing the temp namespace.
+ */
+int
+fetch_search_path_array(Oid *sarray, int sarray_len)
+{
+	int			count = 0;
+	ListCell   *l;
+
+	recomputeNamespacePath();
+
+	foreach(l, activeSearchPath)
+	{
+		Oid			namespaceId = lfirst_oid(l);
+
+		if (namespaceId == myTempNamespace)
+			continue;			/* do not include temp namespace */
+
+		if (count < sarray_len)
+			sarray[count] = namespaceId;
+		count++;
+	}
+
+	return count;
+}
+
+
 /*
  * Export the FooIsVisible functions as SQL-callable functions.
  */
diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c
index 898c456b971..f948c703bb4 100644
--- a/src/backend/parser/parse_oper.c
+++ b/src/backend/parser/parse_oper.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.98 2007/11/22 19:40:25 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.99 2007/11/28 18:47:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -24,11 +24,47 @@
 #include "parser/parse_oper.h"
 #include "parser/parse_type.h"
 #include "utils/builtins.h"
+#include "utils/hsearch.h"
+#include "utils/inval.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
 
+/*
+ * The lookup key for the operator lookaside hash table.  Unused bits must be
+ * zeroes to ensure hashing works consistently --- in particular, oprname
+ * must be zero-padded and any unused entries in search_path must be zero.
+ *
+ * search_path contains the actual search_path with which the entry was
+ * derived (minus temp namespace if any), or else the single specified
+ * schema OID if we are looking up an explicitly-qualified operator name.
+ *
+ * search_path has to be fixed-length since the hashtable code insists on
+ * fixed-size keys.  If your search path is longer than that, we just punt
+ * and don't cache anything.
+ */
+
+/* If your search_path is longer than this, sucks to be you ... */
+#define MAX_CACHED_PATH_LEN		16
+
+typedef struct OprCacheKey
+{
+	char		oprname[NAMEDATALEN];
+	Oid			left_arg;		/* Left input OID, or 0 if prefix op */
+	Oid			right_arg;		/* Right input OID, or 0 if postfix op */
+	Oid			search_path[MAX_CACHED_PATH_LEN];
+} OprCacheKey;
+
+typedef struct OprCacheEntry
+{
+	/* the hash lookup key MUST BE FIRST */
+	OprCacheKey	key;
+
+	Oid			opr_oid;		/* OID of the resolved operator */
+} OprCacheEntry;
+
+
 static Oid	binary_oper_exact(List *opname, Oid arg1, Oid arg2);
 static FuncDetailCode oper_select_candidate(int nargs,
 					  Oid *input_typeids,
@@ -42,6 +78,11 @@ static void op_error(ParseState *pstate, List *op, char oprkind,
 static Expr *make_op_expr(ParseState *pstate, Operator op,
 			 Node *ltree, Node *rtree,
 			 Oid ltypeId, Oid rtypeId);
+static bool make_oper_cache_key(OprCacheKey *key, List *opname,
+								Oid ltypeId, Oid rtypeId);
+static Oid	find_oper_cache_entry(OprCacheKey *key);
+static void make_oper_cache_entry(OprCacheKey *key, Oid opr_oid);
+static void InvalidateOprCacheCallBack(Datum arg, Oid relid);
 
 
 /*
@@ -496,9 +537,28 @@ oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId,
 	 bool noError, int location)
 {
 	Oid			operOid;
+	OprCacheKey	key;
+	bool		key_ok;
 	FuncDetailCode fdresult = FUNCDETAIL_NOTFOUND;
 	HeapTuple	tup = NULL;
 
+	/*
+	 * Try to find the mapping in the lookaside cache.
+	 */
+	key_ok = make_oper_cache_key(&key, opname, ltypeId, rtypeId);
+	if (key_ok)
+	{
+		operOid = find_oper_cache_entry(&key);
+		if (OidIsValid(operOid))
+		{
+			tup = SearchSysCache(OPEROID,
+								 ObjectIdGetDatum(operOid),
+								 0, 0, 0);
+			if (HeapTupleIsValid(tup))
+				return (Operator) tup;
+		}
+	}
+
 	/*
 	 * First try for an "exact" match.
 	 */
@@ -537,7 +597,12 @@ oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId,
 							 ObjectIdGetDatum(operOid),
 							 0, 0, 0);
 
-	if (!HeapTupleIsValid(tup) && !noError)
+	if (HeapTupleIsValid(tup))
+	{
+		if (key_ok)
+			make_oper_cache_entry(&key, operOid);
+	}
+	else if (!noError)
 		op_error(pstate, opname, 'b', ltypeId, rtypeId, fdresult, location);
 
 	return (Operator) tup;
@@ -622,9 +687,28 @@ Operator
 right_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
 {
 	Oid			operOid;
+	OprCacheKey	key;
+	bool		key_ok;
 	FuncDetailCode fdresult = FUNCDETAIL_NOTFOUND;
 	HeapTuple	tup = NULL;
 
+	/*
+	 * Try to find the mapping in the lookaside cache.
+	 */
+	key_ok = make_oper_cache_key(&key, op, arg, InvalidOid);
+	if (key_ok)
+	{
+		operOid = find_oper_cache_entry(&key);
+		if (OidIsValid(operOid))
+		{
+			tup = SearchSysCache(OPEROID,
+								 ObjectIdGetDatum(operOid),
+								 0, 0, 0);
+			if (HeapTupleIsValid(tup))
+				return (Operator) tup;
+		}
+	}
+
 	/*
 	 * First try for an "exact" match.
 	 */
@@ -655,7 +739,12 @@ right_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
 							 ObjectIdGetDatum(operOid),
 							 0, 0, 0);
 
-	if (!HeapTupleIsValid(tup) && !noError)
+	if (HeapTupleIsValid(tup))
+	{
+		if (key_ok)
+			make_oper_cache_entry(&key, operOid);
+	}
+	else if (!noError)
 		op_error(pstate, op, 'r', arg, InvalidOid, fdresult, location);
 
 	return (Operator) tup;
@@ -680,9 +769,28 @@ Operator
 left_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
 {
 	Oid			operOid;
+	OprCacheKey	key;
+	bool		key_ok;
 	FuncDetailCode fdresult = FUNCDETAIL_NOTFOUND;
 	HeapTuple	tup = NULL;
 
+	/*
+	 * Try to find the mapping in the lookaside cache.
+	 */
+	key_ok = make_oper_cache_key(&key, op, InvalidOid, arg);
+	if (key_ok)
+	{
+		operOid = find_oper_cache_entry(&key);
+		if (OidIsValid(operOid))
+		{
+			tup = SearchSysCache(OPEROID,
+								 ObjectIdGetDatum(operOid),
+								 0, 0, 0);
+			if (HeapTupleIsValid(tup))
+				return (Operator) tup;
+		}
+	}
+
 	/*
 	 * First try for an "exact" match.
 	 */
@@ -725,7 +833,12 @@ left_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
 							 ObjectIdGetDatum(operOid),
 							 0, 0, 0);
 
-	if (!HeapTupleIsValid(tup) && !noError)
+	if (HeapTupleIsValid(tup))
+	{
+		if (key_ok)
+			make_oper_cache_entry(&key, operOid);
+	}
+	else if (!noError)
 		op_error(pstate, op, 'l', InvalidOid, arg, fdresult, location);
 
 	return (Operator) tup;
@@ -1018,3 +1131,160 @@ make_op_expr(ParseState *pstate, Operator op,
 
 	return (Expr *) result;
 }
+
+
+/*
+ * Lookaside cache to speed operator lookup.  Possibly this should be in
+ * a separate module under utils/cache/ ?
+ *
+ * The idea here is that the mapping from operator name and given argument
+ * types is constant for a given search path (or single specified schema OID)
+ * so long as the contents of pg_operator and pg_cast don't change.  And that
+ * mapping is pretty expensive to compute, especially for ambiguous operators;
+ * this is mainly because there are a *lot* of instances of popular operator
+ * names such as "=", and we have to check each one to see which is the
+ * best match.  So once we have identified the correct mapping, we save it
+ * in a cache that need only be flushed on pg_operator or pg_cast change.
+ * (pg_cast must be considered because changes in the set of implicit casts
+ * affect the set of applicable operators for any given input datatype.)
+ *
+ * XXX in principle, ALTER TABLE ... INHERIT could affect the mapping as
+ * well, but we disregard that since there's no convenient way to find out
+ * about it, and it seems a pretty far-fetched corner-case anyway.
+ *
+ * Note: at some point it might be worth doing a similar cache for function
+ * lookups.  However, the potential gain is a lot less since (a) function
+ * names are generally not overloaded as heavily as operator names, and
+ * (b) we'd have to flush on pg_proc updates, which are probably a good
+ * deal more common than pg_operator updates.
+ */
+
+/* The operator cache hashtable */
+static HTAB *OprCacheHash = NULL;
+
+
+/*
+ * make_oper_cache_key
+ *		Fill the lookup key struct given operator name and arg types.
+ *
+ * Returns TRUE if successful, FALSE if the search_path overflowed
+ * (hence no caching is possible).
+ */
+static bool
+make_oper_cache_key(OprCacheKey *key, List *opname, Oid ltypeId, Oid rtypeId)
+{
+	char	   *schemaname;
+	char	   *opername;
+
+	/* deconstruct the name list */
+	DeconstructQualifiedName(opname, &schemaname, &opername);
+
+	/* ensure zero-fill for stable hashing */
+	MemSet(key, 0, sizeof(OprCacheKey));
+
+	/* save operator name and input types into key */
+	strlcpy(key->oprname, opername, NAMEDATALEN);
+	key->left_arg = ltypeId;
+	key->right_arg = rtypeId;
+
+	if (schemaname)
+	{
+		/* search only in exact schema given */
+		key->search_path[0] = LookupExplicitNamespace(schemaname);
+	}
+	else
+	{
+		/* get the active search path */
+		if (fetch_search_path_array(key->search_path,
+									MAX_CACHED_PATH_LEN) > MAX_CACHED_PATH_LEN)
+			return false;		/* oops, didn't fit */
+	}
+
+	return true;
+}
+
+/*
+ * find_oper_cache_entry
+ *
+ * Look for a cache entry matching the given key.  If found, return the
+ * contained operator OID, else return InvalidOid.
+ */
+static Oid
+find_oper_cache_entry(OprCacheKey *key)
+{
+	OprCacheEntry *oprentry;
+
+	if (OprCacheHash == NULL)
+	{
+		/* First time through: initialize the hash table */
+		HASHCTL		ctl;
+
+		if (!CacheMemoryContext)
+			CreateCacheMemoryContext();
+
+		MemSet(&ctl, 0, sizeof(ctl));
+		ctl.keysize = sizeof(OprCacheKey);
+		ctl.entrysize = sizeof(OprCacheEntry);
+		ctl.hash = tag_hash;
+		OprCacheHash = hash_create("Operator lookup cache", 256,
+									&ctl, HASH_ELEM | HASH_FUNCTION);
+
+		/* Arrange to flush cache on pg_operator and pg_cast changes */
+		CacheRegisterSyscacheCallback(OPERNAMENSP,
+									  InvalidateOprCacheCallBack,
+									  (Datum) 0);
+		CacheRegisterSyscacheCallback(CASTSOURCETARGET,
+									  InvalidateOprCacheCallBack,
+									  (Datum) 0);
+	}
+
+	/* Look for an existing entry */
+	oprentry = (OprCacheEntry *) hash_search(OprCacheHash,
+											 (void *) key,
+											 HASH_FIND, NULL);
+	if (oprentry == NULL)
+		return InvalidOid;
+
+	return oprentry->opr_oid;
+}
+
+/*
+ * make_oper_cache_entry
+ *
+ * Insert a cache entry for the given key.
+ */
+static void
+make_oper_cache_entry(OprCacheKey *key, Oid opr_oid)
+{
+	OprCacheEntry *oprentry;
+
+	Assert(OprCacheHash != NULL);
+
+	oprentry = (OprCacheEntry *) hash_search(OprCacheHash,
+											 (void *) key,
+											 HASH_ENTER, NULL);
+	oprentry->opr_oid = opr_oid;
+}
+
+/*
+ * Callback for pg_operator and pg_cast inval events
+ */
+static void
+InvalidateOprCacheCallBack(Datum arg, Oid relid)
+{
+	HASH_SEQ_STATUS status;
+	OprCacheEntry *hentry;
+
+	Assert(OprCacheHash != NULL);
+
+	/* Currently we just flush all entries; hard to be smarter ... */
+	hash_seq_init(&status, OprCacheHash);
+
+	while ((hentry = (OprCacheEntry *) hash_seq_search(&status)) != NULL)
+	{
+		if (hash_search(OprCacheHash,
+						(void *) &hentry->key,
+						HASH_REMOVE, NULL) == NULL)
+			elog(ERROR, "hash table corrupted");
+	}
+}
diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h
index c2c3f7512a4..c2ad9d36c3b 100644
--- a/src/include/catalog/namespace.h
+++ b/src/include/catalog/namespace.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.51 2007/11/15 22:25:17 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.52 2007/11/28 18:47:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -115,5 +115,6 @@ extern void AtEOSubXact_Namespace(bool isCommit, SubTransactionId mySubid,
 extern char *namespace_search_path;
 
 extern List *fetch_search_path(bool includeImplicit);
+extern int	fetch_search_path_array(Oid *sarray, int sarray_len);
 
 #endif   /* NAMESPACE_H */
-- 
GitLab