From dd728826c538f000220af98de025c00114ad0631 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 19 Dec 2016 12:31:50 -0500
Subject: [PATCH] Fix locking problem in _hash_squeezebucket() /
 _hash_freeovflpage().

A bucket squeeze operation needs to lock each page of the bucket
before releasing the prior page, but the previous coding fumbled the
locking when freeing an overflow page during a bucket squeeze
operation.  Commit 6d46f4783efe457f74816a75173eb23ed8930020
introduced this bug.

Amit Kapila, with help from Kuntal Ghosh and Dilip Kumar, after
an initial trouble report by Jeff Janes.  Reviewed by me.  I also
fixed a problem with a comment.
---
 src/backend/access/hash/hashovfl.c | 41 +++++++++---------------------
 src/include/access/hash.h          |  2 +-
 2 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 5f1513bb43c..df7838cd6ba 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -377,12 +377,11 @@ _hash_firstfreebit(uint32 map)
  *	NB: caller must not hold lock on metapage, nor on page, that's next to
  *	ovflbuf in the bucket chain.  We don't acquire the lock on page that's
  *	prior to ovflbuf in chain if it is same as wbuf because the caller already
- *	has a lock on same.  This function releases the lock on wbuf and caller
- *	is responsible for releasing the pin on same.
+ *	has a lock on same.
  */
 BlockNumber
 _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
-				   bool wbuf_dirty, BufferAccessStrategy bstrategy)
+				   BufferAccessStrategy bstrategy)
 {
 	HashMetaPage metap;
 	Buffer		metabuf;
@@ -447,24 +446,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
 		Assert(prevopaque->hasho_bucket == bucket);
 		prevopaque->hasho_nextblkno = nextblkno;
 
+		MarkBufferDirty(prevbuf);
 		if (prevblkno != writeblkno)
-		{
-			MarkBufferDirty(prevbuf);
 			_hash_relbuf(rel, prevbuf);
-		}
-		else
-		{
-			/* ensure to mark prevbuf as dirty */
-			wbuf_dirty = true;
-		}
 	}
-
-	/* write and unlock the write buffer */
-	if (wbuf_dirty)
-		_hash_chgbufaccess(rel, wbuf, HASH_WRITE, HASH_NOLOCK);
-	else
-		_hash_chgbufaccess(rel, wbuf, HASH_READ, HASH_NOLOCK);
-
 	if (BlockNumberIsValid(nextblkno))
 	{
 		Buffer		nextbuf = _hash_getbuf_with_strategy(rel,
@@ -783,30 +768,28 @@ _hash_squeezebucket(Relation rel,
 		 * Tricky point here: if our read and write pages are adjacent in the
 		 * bucket chain, our write lock on wbuf will conflict with
 		 * _hash_freeovflpage's attempt to update the sibling links of the
-		 * removed page.  In that case, we don't need to lock it again and we
-		 * always release the lock on wbuf in _hash_freeovflpage and then
-		 * retake it again here.  This will not only simplify the code, but is
-		 * required to atomically log the changes which will be helpful when
-		 * we write WAL for hash indexes.
+		 * removed page.  In that case, we don't need to lock it again.
 		 */
 		rblkno = ropaque->hasho_prevblkno;
 		Assert(BlockNumberIsValid(rblkno));
 
 		/* free this overflow page (releases rbuf) */
-		_hash_freeovflpage(rel, rbuf, wbuf, wbuf_dirty, bstrategy);
+		_hash_freeovflpage(rel, rbuf, wbuf, bstrategy);
+
+		if (wbuf_dirty)
+			MarkBufferDirty(wbuf);
 
 		/* are we freeing the page adjacent to wbuf? */
 		if (rblkno == wblkno)
 		{
 			/* retain the pin on primary bucket page till end of bucket scan */
-			if (wblkno != bucket_blkno)
-				_hash_dropbuf(rel, wbuf);
+			if (wblkno == bucket_blkno)
+				_hash_chgbufaccess(rel, wbuf, HASH_READ, HASH_NOLOCK);
+			else
+				_hash_relbuf(rel, wbuf);
 			return;
 		}
 
-		/* lock the overflow page being written, then get the previous one */
-		_hash_chgbufaccess(rel, wbuf, HASH_NOLOCK, HASH_WRITE);
-
 		rbuf = _hash_getbuf_with_strategy(rel,
 										  rblkno,
 										  HASH_WRITE,
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 9ce44a7f4e1..627fa2c0c58 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -314,7 +314,7 @@ extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
 /* hashovfl.c */
 extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin);
 extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
-				   bool wbuf_dirty, BufferAccessStrategy bstrategy);
+				   BufferAccessStrategy bstrategy);
 extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
 				 BlockNumber blkno, ForkNumber forkNum);
 extern void _hash_squeezebucket(Relation rel,
-- 
GitLab