From bc9236bc014aea08e06d52516e47abed6fdcaa53 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 16 Jul 1999 22:17:06 +0000
Subject: [PATCH] Revise _bt_binsrch() so that its binary search loop takes
 care of equal-key cases, eliminating bt_firsteq().  The linear search
 formerly done by bt_firsteq() took a lot of time in the case where many equal
 keys appear on the same page.

---
 src/backend/access/nbtree/nbtsearch.c | 208 +++++++++++---------------
 1 file changed, 85 insertions(+), 123 deletions(-)

diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 74c8b173fe3..c8ab46c8bca 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.50 1999/07/16 04:58:30 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.51 1999/07/16 22:17:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,8 +26,6 @@
 
 static BTStack _bt_searchr(Relation rel, int keysz, ScanKey scankey,
 			Buffer *bufP, BTStack stack_in);
-static OffsetNumber _bt_firsteq(Relation rel, TupleDesc itupdesc, Page page,
-			Size keysz, ScanKey scankey, OffsetNumber offnum);
 static int _bt_compare(Relation rel, TupleDesc itupdesc, Page page,
 			int keysz, ScanKey scankey, OffsetNumber offnum);
 static bool
@@ -368,7 +366,9 @@ _bt_skeycmp(Relation rel,
  *		comparison for every key in the scankey.  _bt_binsrch() returns
  *		the OffsetNumber of the first matching key on the page, or the
  *		OffsetNumber at which the matching key would appear if it were
- *		on this page.
+ *		on this page.  (NOTE: in particular, this means it is possible to
+ *		return a value 1 greater than the number of keys on the page, if
+ *		the scankey is > all keys on the page.)
  *
  *		By the time this procedure is called, we're sure we're looking
  *		at the right page -- don't need to walk right.  _bt_binsrch() has
@@ -385,8 +385,8 @@ _bt_binsrch(Relation rel,
 	Page		page;
 	BTPageOpaque opaque;
 	OffsetNumber low,
-				mid,
 				high;
+	bool		haveEq;
 	int			natts = rel->rd_rel->relnatts;
 	int			result;
 
@@ -395,148 +395,112 @@ _bt_binsrch(Relation rel,
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 
 	/* by convention, item 1 on any non-rightmost page is the high key */
-	low = mid = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+	low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
 
 	high = PageGetMaxOffsetNumber(page);
 
 	/*
-	 * Since for non-rightmost pages, the first item on the page is the
-	 * high key, there are two notions of emptiness.  One is if nothing
-	 * appears on the page.  The other is if nothing but the high key
-	 * does. The reason we test high <= low, rather than high == low, is
-	 * that after vacuuming there may be nothing *but* the high key on a
-	 * page. In that case, given the scheme above, low = 2 and high = 1.
+	 * If there are no keys on the page, return the first available slot.
+	 * Note this covers two cases: the page is really empty (no keys),
+	 * or it contains only a high key.  The latter case is possible after
+	 * vacuuming.
 	 */
-
-	if (PageIsEmpty(page))
+	if (high < low)
 		return low;
-	if ((!P_RIGHTMOST(opaque) && high <= low))
-	{
-		if (high < low ||
-			(srchtype == BT_DESCENT && !(opaque->btpo_flags & BTP_LEAF)))
-			return low;
-		/* It's insertion and high == low == 2 */
-		result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
-		if (result > 0)
-			return OffsetNumberNext(low);
-		return low;
-	}
 
-	while ((high - low) > 1)
+	/*
+	 * Binary search to find the first key on the page >= scan key.
+	 * Loop invariant: all slots before 'low' are < scan key, all slots
+	 * at or after 'high' are >= scan key.  Also, haveEq is true if the
+	 * tuple at 'high' is == scan key.
+	 * We can fall out when high == low.
+	 */
+	high++;						/* establish the loop invariant for high */
+	haveEq = false;
+
+	while (high > low)
 	{
-		mid = low + ((high - low) / 2);
+		OffsetNumber	mid = low + ((high - low) / 2);
+		/* We have low <= mid < high, so mid points at a real slot */
+
 		result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid);
 
 		if (result > 0)
-			low = mid;
-		else if (result < 0)
-			high = mid - 1;
+			low = mid + 1;
 		else
 		{
-			mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid);
-
-			/*
-			 * NOTE for multi-column indices: we may do scan using keys
-			 * not for all attrs. But we handle duplicates using all attrs
-			 * in _bt_insert/_bt_spool code. And so while searching on
-			 * internal pages having number of attrs > keysize we want to
-			 * point at the last item < the scankey, not at the first item
-			 * = the scankey (!!!), and let _bt_moveright decide later
-			 * whether to move right or not (see comments and example
-			 * there). Note also that INSERTions are not affected by this
-			 * code (natts == keysz).		   - vadim 04/15/97
-			 */
-			if (natts == keysz || opaque->btpo_flags & BTP_LEAF)
-				return mid;
-			low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
-			if (mid == low)
-				return mid;
-			return OffsetNumberPrev(mid);
+			high = mid;
+			haveEq = (result == 0);
 		}
 	}
 
-	/*
-	 * We terminated because the endpoints got too close together.	There
-	 * are two cases to take care of.
+	/*--------------------
+	 * At this point we have high == low, but be careful: they could point
+	 * past the last slot on the page.  We also know that haveEq is true
+	 * if and only if there is an equal key (in which case high&low point
+	 * at the first equal key).
 	 *
-	 * For non-insertion searches on internal pages, we want to point at the
-	 * last key <, or first key =, the scankey on the page.  This
-	 * guarantees that we'll descend the tree correctly. (NOTE comments
-	 * above for multi-column indices).
-	 *
-	 * For all other cases, we want to point at the first key >= the scankey
-	 * on the page.  This guarantees that scans and insertions will happen
-	 * correctly.
+	 * On a leaf page, we always return the first key >= scan key
+	 * (which could be the last slot + 1).
+	 *--------------------
 	 */
 
-	if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT)
-	{							/* We want the last key <, or first key
-								 * ==, the scan key. */
-		result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
+	if (opaque->btpo_flags & BTP_LEAF)
+		return low;
 
-		if (result == 0)
-		{
-			mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high);
+	/*--------------------
+	 * On a non-leaf page, there are special cases:
+	 *
+	 * For an insertion (srchtype != BT_DESCENT and natts == keysz)
+	 * always return first key >= scan key (which could be off the end).
+	 *
+	 * For a standard search (srchtype == BT_DESCENT and natts == keysz)
+	 * return the first equal key if one exists, else the last lesser key
+	 * if one exists, else the first slot on the page.
+	 *
+	 * For a partial-match search (srchtype == BT_DESCENT and natts < keysz)
+	 * return the last lesser key if one exists, else the first slot.
+	 *
+	 * Old comments:
+	 *	For multi-column indices, we may scan using keys
+	 *	not for all attrs. But we handle duplicates using all attrs
+	 *	in _bt_insert/_bt_spool code. And so while searching on
+	 *	internal pages having number of attrs > keysize we want to
+	 *	point at the last item < the scankey, not at the first item
+	 *	= the scankey (!!!), and let _bt_moveright decide later
+	 *	whether to move right or not (see comments and example
+	 *	there). Note also that INSERTions are not affected by this
+	 *	code (since natts == keysz for inserts).		   - vadim 04/15/97
+	 *--------------------
+	 */
 
-			/*
-			 * If natts > keysz we want last item < the scan key. See
-			 * comments above for multi-column indices.
-			 */
-			if (natts == keysz)
-				return mid;
-			low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
-			if (mid == low)
-				return mid;
-			return OffsetNumberPrev(mid);
-		}
-		else if (result > 0)
-			return high;
-		else
-			return low;
+	if (haveEq)
+	{
+		/*
+		 * There is an equal key.  We return either the first equal key
+		 * (which we just found), or the last lesser key.
+		 *
+		 * We need not check srchtype != BT_DESCENT here, since if that
+		 * is true then natts == keysz by assumption.
+		 */
+		if (natts == keysz)
+			return low;			/* return first equal key */
 	}
 	else
-/* we want the first key >= the scan key */
 	{
-		result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
-		if (result <= 0)
-			return low;
-		else
-		{
-			if (low == high)
-				return OffsetNumberNext(low);
-
-			result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
-			if (result <= 0)
-				return high;
-			else
-				return OffsetNumberNext(high);
-		}
+		/*
+		 * There is no equal key.  We return either the first greater key
+		 * (which we just found), or the last lesser key.
+		 */
+		if (srchtype != BT_DESCENT)
+			return low;			/* return first greater key */
 	}
-}
 
-static OffsetNumber
-_bt_firsteq(Relation rel,
-			TupleDesc itupdesc,
-			Page page,
-			Size keysz,
-			ScanKey scankey,
-			OffsetNumber offnum)
-{
-	BTPageOpaque opaque;
-	OffsetNumber limit;
 
-	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+	if (low == (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY))
+		return low;				/* there is no prior item */
 
-	/* skip the high key, if any */
-	limit = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
-
-	/* walk backwards looking for the first key in the chain of duplicates */
-	while (offnum > limit
-		   && _bt_compare(rel, itupdesc, page,
-						  keysz, scankey, OffsetNumberPrev(offnum)) == 0)
-		offnum = OffsetNumberPrev(offnum);
-
-	return offnum;
+	return OffsetNumberPrev(low);
 }
 
 /*
@@ -571,7 +535,6 @@ _bt_compare(Relation rel,
 {
 	Datum		datum;
 	BTItem		btitem;
-	ItemId		itemid;
 	IndexTuple	itup;
 	BTPageOpaque opaque;
 	ScanKey		entry;
@@ -589,12 +552,11 @@ _bt_compare(Relation rel,
 	 */
 
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
 	if (!(opaque->btpo_flags & BTP_LEAF)
 		&& P_LEFTMOST(opaque)
 		&& offnum == P_HIKEY)
 	{
-		itemid = PageGetItemId(page, offnum);
-
 		/*
 		 * we just have to believe that this will only be called with
 		 * offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the first
@@ -621,7 +583,7 @@ _bt_compare(Relation rel,
 		 * on the page is greater than anything.
 		 */
 
-		if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
+		if (_bt_skeycmp(rel, keysz, scankey, page, PageGetItemId(page, offnum),
 						BTEqualStrategyNumber))
 			return 0;
 		return 1;
-- 
GitLab