From ee4af347ba89b8492d1f86b6456865e1de1d030f Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 3 Jun 2016 10:52:36 -0400
Subject: [PATCH] Measure Bloom index signature-length reloption in bits, not
 words.

Per discussion, this is a more understandable and future-proof way of
exposing the setting to users.  On-disk, we can still store it in words,
so as to not break on-disk compatibility with beta1.

Along the way, clean up the code associated with Bloom reloptions.
Provide explicit macros for default and maximum lengths rather than
having magic numbers buried in multiple places in the code.  Drop
the adjustBloomOptions() code altogether: it was useless in view of
the fact that reloptions.c already performed default-substitution and
range checking for the options.  Rename a couple of macros and types
for more clarity.

Discussion: <23767.1464926580@sss.pgh.pa.us>
---
 contrib/bloom/bloom.h   |  33 +++++++----
 contrib/bloom/blscan.c  |   2 +-
 contrib/bloom/blutils.c | 125 +++++++++++++++++-----------------------
 doc/src/sgml/bloom.sgml |  19 +++---
 4 files changed, 86 insertions(+), 93 deletions(-)

diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index c21eebfc94d..c6091a8dd66 100644
--- a/contrib/bloom/bloom.h
+++ b/contrib/bloom/bloom.h
@@ -79,18 +79,31 @@ typedef BloomPageOpaqueData *BloomPageOpaque;
 #define BLOOM_HEAD_BLKNO		(1)		/* first data page */
 
 /*
- * Maximum of bloom signature length in uint16. Actual value
- * is 512 bytes
+ * We store Bloom signatures as arrays of uint16 words.
  */
-#define MAX_BLOOM_LENGTH		(256)
+typedef uint16 BloomSignatureWord;
+
+#define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
+
+/*
+ * Default and maximum Bloom signature length in bits.
+ */
+#define DEFAULT_BLOOM_LENGTH	(5 * SIGNWORDBITS)
+#define MAX_BLOOM_LENGTH		(256 * SIGNWORDBITS)
+
+/*
+ * Default and maximum signature bits generated per index key.
+ */
+#define DEFAULT_BLOOM_BITS		2
+#define MAX_BLOOM_BITS			(MAX_BLOOM_LENGTH - 1)
 
 /* Bloom index options */
 typedef struct BloomOptions
 {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	int			bloomLength;	/* length of signature in uint16 */
-	int			bitSize[INDEX_MAX_KEYS];		/* signature bits per index
-												 * key */
+	int			bloomLength;	/* length of signature in words (not bits!) */
+	int			bitSize[INDEX_MAX_KEYS];	/* # of bits generated for each
+											 * index key */
 }	BloomOptions;
 
 /*
@@ -143,12 +156,10 @@ typedef struct BloomState
 /*
  * Tuples are very different from all other relations
  */
-typedef uint16 SignType;
-
 typedef struct BloomTuple
 {
 	ItemPointerData heapPtr;
-	SignType	sign[FLEXIBLE_ARRAY_MEMBER];
+	BloomSignatureWord sign[FLEXIBLE_ARRAY_MEMBER];
 }	BloomTuple;
 
 #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
@@ -156,7 +167,7 @@ typedef struct BloomTuple
 /* Opaque data structure for bloom index scan */
 typedef struct BloomScanOpaqueData
 {
-	SignType   *sign;			/* Scan signature */
+	BloomSignatureWord *sign;			/* Scan signature */
 	BloomState	state;
 }	BloomScanOpaqueData;
 
@@ -170,7 +181,7 @@ extern void BloomFillMetapage(Relation index, Page metaPage);
 extern void BloomInitMetapage(Relation index);
 extern void BloomInitPage(Page page, uint16 flags);
 extern Buffer BloomNewBuffer(Relation index);
-extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
+extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno);
 extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
 extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
 
diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
index aebf32a8d2f..0c954dc8d55 100644
--- a/contrib/bloom/blscan.c
+++ b/contrib/bloom/blscan.c
@@ -93,7 +93,7 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 		/* New search: have to calculate search signature */
 		ScanKey		skey = scan->keyData;
 
-		so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength);
+		so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);
 
 		for (i = 0; i < scan->numberOfKeys; i++)
 		{
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 4a5b343dd02..876952f2d5f 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -27,23 +27,26 @@
 
 #include "bloom.h"
 
-/* Signature dealing macros */
-#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
-#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
-#define CLRBIT(x,i)   GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
-#define SETBIT(x,i)   GETWORD(x,i) |=  ( 0x01 << ( (i) % BITSIGNTYPE ) )
-#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
+/* Signature dealing macros - note i is assumed to be of type int */
+#define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
+#define CLRBIT(x,i)   GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
+#define SETBIT(x,i)   GETWORD(x,i) |=  ( 0x01 << ( (i) % SIGNWORDBITS ) )
+#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
 
 PG_FUNCTION_INFO_V1(blhandler);
 
-/* Kind of relation optioms for bloom index */
+/* Kind of relation options for bloom index */
 static relopt_kind bl_relopt_kind;
+/* parse table for fillRelOptions */
+static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
 
 static int32 myRand(void);
 static void mySrand(uint32 seed);
 
 /*
- * Module initialize function: initilized relation options.
+ * Module initialize function: initialize info about Bloom relation options.
+ *
+ * Note: keep this in sync with makeDefaultBloomOptions().
  */
 void
 _PG_init(void)
@@ -53,17 +56,46 @@ _PG_init(void)
 
 	bl_relopt_kind = add_reloption_kind();
 
+	/* Option for length of signature */
 	add_int_reloption(bl_relopt_kind, "length",
-					  "Length of signature in uint16 type", 5, 1, 256);
+					  "Length of signature in bits",
+					  DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH);
+	bl_relopt_tab[0].optname = "length";
+	bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
+	bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
 
+	/* Number of bits for each possible index column: col1, col2, ... */
 	for (i = 0; i < INDEX_MAX_KEYS; i++)
 	{
-		snprintf(buf, 16, "col%d", i + 1);
+		snprintf(buf, sizeof(buf), "col%d", i + 1);
 		add_int_reloption(bl_relopt_kind, buf,
-					  "Number of bits for corresponding column", 2, 1, 2048);
+						  "Number of bits generated for each index column",
+						  DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS);
+		bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
+														   buf);
+		bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
+		bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
 	}
 }
 
+/*
+ * Construct a default set of Bloom options.
+ */
+static BloomOptions *
+makeDefaultBloomOptions(void)
+{
+	BloomOptions *opts;
+	int			i;
+
+	opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
+	/* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
+	opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
+	for (i = 0; i < INDEX_MAX_KEYS; i++)
+		opts->bitSize[i] = DEFAULT_BLOOM_BITS;
+	SET_VARSIZE(opts, sizeof(BloomOptions));
+	return opts;
+}
+
 /*
  * Bloom handler function: return IndexAmRoutine with access method parameters
  * and callbacks.
@@ -157,7 +189,7 @@ initBloomState(BloomState *state, Relation index)
 
 	memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
 	state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
-		sizeof(SignType) * state->opts.bloomLength;
+		sizeof(BloomSignatureWord) * state->opts.bloomLength;
 }
 
 /*
@@ -208,7 +240,7 @@ mySrand(uint32 seed)
  * Add bits of given value to the signature.
  */
 void
-signValue(BloomState *state, SignType *sign, Datum value, int attno)
+signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
 {
 	uint32		hashVal;
 	int			nBit,
@@ -231,8 +263,8 @@ signValue(BloomState *state, SignType *sign, Datum value, int attno)
 
 	for (j = 0; j < state->opts.bitSize[attno]; j++)
 	{
-		/* prevent mutiple evaluation */
-		nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE);
+		/* prevent multiple evaluation in SETBIT macro */
+		nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
 		SETBIT(sign, nBit);
 	}
 }
@@ -361,39 +393,6 @@ BloomInitPage(Page page, uint16 flags)
 	opaque->bloom_page_id = BLOOM_PAGE_ID;
 }
 
-/*
- * Adjust options of bloom index.
- *
- * This must produce default options when *opts is initially all-zero.
- */
-static void
-adjustBloomOptions(BloomOptions *opts)
-{
-	int				i;
-
-	/* Default length of bloom filter is 5 of 16-bit integers */
-	if (opts->bloomLength <= 0)
-		opts->bloomLength = 5;
-	else if (opts->bloomLength > MAX_BLOOM_LENGTH)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("length of bloom signature (%d) is greater than maximum %d",
-						opts->bloomLength, MAX_BLOOM_LENGTH)));
-
-	/* Check signature length */
-	for (i = 0; i < INDEX_MAX_KEYS; i++)
-	{
-		/*
-		 * Zero and negative number of bits is meaningless.  Also setting
-		 * more bits than signature have seems useless.  Replace both cases
-		 * with 2 bits default.
-		 */
-		if (opts->bitSize[i] <= 0
-			|| opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE)
-			opts->bitSize[i] = 2;
-	}
-}
-
 /*
  * Fill in metapage for bloom index.
  */
@@ -405,14 +404,11 @@ BloomFillMetapage(Relation index, Page metaPage)
 
 	/*
 	 * Choose the index's options.  If reloptions have been assigned, use
-	 * those, otherwise create default options by applying adjustBloomOptions
-	 * to a zeroed chunk of memory.  We apply adjustBloomOptions to existing
-	 * reloptions too, just out of paranoia; they should be valid already.
+	 * those, otherwise create default options.
 	 */
 	opts = (BloomOptions *) index->rd_options;
 	if (!opts)
-		opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
-	adjustBloomOptions(opts);
+		opts = makeDefaultBloomOptions();
 
 	/*
 	 * Initialize contents of meta page, including a copy of the options,
@@ -462,30 +458,15 @@ bloptions(Datum reloptions, bool validate)
 	relopt_value *options;
 	int			numoptions;
 	BloomOptions *rdopts;
-	relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
-	int			i;
-	char		buf[16];
-
-	/* Option for length of signature */
-	tab[0].optname = "length";
-	tab[0].opttype = RELOPT_TYPE_INT;
-	tab[0].offset = offsetof(BloomOptions, bloomLength);
-
-	/* Number of bits for each of possible columns: col1, col2, ... */
-	for (i = 0; i < INDEX_MAX_KEYS; i++)
-	{
-		snprintf(buf, sizeof(buf), "col%d", i + 1);
-		tab[i + 1].optname = pstrdup(buf);
-		tab[i + 1].opttype = RELOPT_TYPE_INT;
-		tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
-	}
 
+	/* Parse the user-given reloptions */
 	options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
 	rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
 	fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
-				   validate, tab, INDEX_MAX_KEYS + 1);
+				   validate, bl_relopt_tab, lengthof(bl_relopt_tab));
 
-	adjustBloomOptions(rdopts);
+	/* Convert signature length from # of bits to # to words, rounding up */
+	rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
 
 	return (bytea *) rdopts;
 }
diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml
index 49cb066144c..8667763c43c 100644
--- a/doc/src/sgml/bloom.sgml
+++ b/doc/src/sgml/bloom.sgml
@@ -8,8 +8,8 @@
  </indexterm>
 
  <para>
-  <literal>bloom</> is a module which implements an index access method.  It comes
-  as an example of custom access methods and generic WAL records usage.  But it
+  <literal>bloom</> is a module that implements an index access method.  It comes
+  as an example of custom access methods and generic WAL record usage.  But it
   is also useful in itself.
  </para>
 
@@ -22,8 +22,9 @@
    allows fast exclusion of non-candidate tuples via signatures.
    Since a signature is a lossy representation of all indexed attributes,
    search results must be rechecked using heap information.
-   The user can specify signature length (in uint16, default is 5) and the
-   number of bits, which can be set per attribute (1 < colN < 2048).
+   The user can specify signature length in bits (default 80, maximum 4096)
+   and the number of bits generated for each index column (default 2,
+   maximum 4095).
   </para>
 
   <para>
@@ -51,17 +52,17 @@
     <term><literal>length</></term>
     <listitem>
      <para>
-      Length of signature in uint16 type values
+      Length of signature in bits
      </para>
     </listitem>
    </varlistentry>
    </variablelist>
    <variablelist>
    <varlistentry>
-    <term><literal>col1 &mdash; col16</></term>
+    <term><literal>col1 &mdash; col32</></term>
     <listitem>
      <para>
-      Number of bits for corresponding column
+      Number of bits generated for each index column
      </para>
     </listitem>
    </varlistentry>
@@ -77,12 +78,12 @@
 
 <programlisting>
 CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
-       WITH (length=5, col1=2, col2=2, col3=4);
+       WITH (length=80, col1=2, col2=2, col3=4);
 </programlisting>
 
   <para>
    Here, we created a bloom index with a signature length of 80 bits,
-   and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits.
+   and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits.
   </para>
 
   <para>
-- 
GitLab