From ab5b4e2f9ede250bd77d09aa8ec60bfeb982fb01 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 21 Jul 2009 19:53:12 +0000
Subject: [PATCH] Speed up AllocSetFreeIndex, which is a significant cost in
 palloc and pfree, by using a lookup table instead of a naive shift-and-count
 loop.  Based on code originally posted by Sean Eron Anderson at
 http://graphics.stanford.edu/%7eseander/bithacks.html. Greg Stark did the
 research and benchmarking to show that this is what we should use.  Jeremy
 Kerr first noticed that this is a hotspot that could be optimized, though we
 ended up not using his suggestion of platform-specific bit-searching code.

---
 src/backend/utils/mmgr/aset.c | 41 +++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index f11e15d57c5..520c9acbcce 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/aset.c,v 1.79 2009/06/11 14:49:06 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/aset.c,v 1.80 2009/07/21 19:53:12 tgl Exp $
  *
  * NOTE:
  *	This is a new (Feb. 05, 1999) implementation of the allocation set
@@ -238,6 +238,17 @@ static MemoryContextMethods AllocSetMethods = {
 #endif
 };
 
+/*
+ * Table for AllocSetFreeIndex
+ */
+#define LT16(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
+
+static const unsigned char LogTable256[256] =
+{
+	0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+	LT16(5), LT16(6), LT16(6), LT16(7), LT16(7), LT16(7), LT16(7),
+	LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8)
+};
 
 /* ----------
  * Debug macros
@@ -266,18 +277,30 @@ static MemoryContextMethods AllocSetMethods = {
 static inline int
 AllocSetFreeIndex(Size size)
 {
-	int			idx = 0;
+	int			idx;
+	unsigned int t,
+				tsize;
 
-	if (size > 0)
+	if (size > (1 << ALLOC_MINBITS))
 	{
-		size = (size - 1) >> ALLOC_MINBITS;
-		while (size != 0)
-		{
-			idx++;
-			size >>= 1;
-		}
+		tsize = (size - 1) >> ALLOC_MINBITS;
+
+		/*
+		 * At this point we need to obtain log2(tsize)+1, ie, the number
+		 * of not-all-zero bits at the right.  We used to do this with a
+		 * shift-and-count loop, but this function is enough of a hotspot
+		 * to justify micro-optimization effort.  The best approach seems
+		 * to be to use a lookup table.  Note that this code assumes that
+		 * ALLOCSET_NUM_FREELISTS <= 17, since we only cope with two bytes
+		 * of the tsize value.
+		 */
+		t = tsize >> 8;
+		idx = t ? LogTable256[t] + 8 : LogTable256[tsize];
+
 		Assert(idx < ALLOCSET_NUM_FREELISTS);
 	}
+	else
+		idx = 0;
 
 	return idx;
 }
-- 
GitLab