diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 92299272e7e0e972837c2f643dd97d413648019a..5ec006ef581f824f8c5b8696348e8341b496f678 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.183 2004/12/31 22:00:49 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.184 2005/01/03 18:49:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,7 +84,7 @@ static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum,
 				   bool bufferLockHeld);
 static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
 			bool *foundPtr);
-static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, bool earlylock);
 static void write_buffer(Buffer buffer, bool unpin);
 
 
@@ -340,6 +340,10 @@ BufferAlloc(Relation reln,
 		 * allocated -- ours.  If it had a pin it wouldn't have been on
 		 * the free list.  No one else could have pinned it between
 		 * StrategyGetBuffer and here because we have the BufMgrLock.
+		 *
+		 * (We must pin the buffer before releasing BufMgrLock ourselves,
+		 * to ensure StrategyGetBuffer won't give the same buffer to someone
+		 * else.)
 		 */
 		Assert(buf->refcount == 0);
 		buf->refcount = 1;
@@ -367,9 +371,20 @@ BufferAlloc(Relation reln,
 
 			/*
 			 * Write the buffer out, being careful to release BufMgrLock
-			 * while doing the I/O.
+			 * while doing the I/O.  We also tell FlushBuffer to share-lock
+			 * the buffer before releasing BufMgrLock.  This is safe because
+			 * we know no other backend currently has the buffer pinned,
+			 * therefore no one can have it locked either, so we can always
+			 * get the lock without blocking.  It is necessary because if
+			 * we release BufMgrLock first, it's possible for someone else
+			 * to pin and exclusive-lock the buffer before we get to the
+			 * share-lock, causing us to block.  If the someone else then
+			 * blocks on a lock we hold, deadlock ensues.  This has been
+			 * observed to happen when two backends are both trying to split
+			 * btree index pages, and the second one just happens to be
+			 * trying to split the page the first one got from the freelist.
 			 */
-			FlushBuffer(buf, NULL);
+			FlushBuffer(buf, NULL, true);
 
 			/*
 			 * Somebody could have allocated another buffer for the same
@@ -766,7 +781,7 @@ BufferSync(int percent, int maxpages)
 		PinBuffer(bufHdr, true);
 		StartBufferIO(bufHdr, false);
 
-		FlushBuffer(bufHdr, NULL);
+		FlushBuffer(bufHdr, NULL, false);
 
 		TerminateBufferIO(bufHdr, 0);
 		UnpinBuffer(bufHdr, true);
@@ -1018,11 +1033,16 @@ BufferGetFileNode(Buffer buffer)
  * If the caller has an smgr reference for the buffer's relation, pass it
  * as the second parameter.  If not, pass NULL.  (Do not open relation
  * while holding BufMgrLock!)
+ *
+ * When earlylock is TRUE, we grab the per-buffer sharelock before releasing
+ * BufMgrLock, rather than after.  Normally this would be a bad idea since
+ * we might deadlock, but it is safe and necessary when called from
+ * BufferAlloc() --- see comments therein.
  */
 static void
-FlushBuffer(BufferDesc *buf, SMgrRelation reln)
+FlushBuffer(BufferDesc *buf, SMgrRelation reln, bool earlylock)
 {
-	Buffer		buffer;
+	Buffer		buffer = BufferDescriptorGetBuffer(buf);
 	XLogRecPtr	recptr;
 	ErrorContextCallback errcontext;
 
@@ -1033,6 +1053,13 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	/* To check if block content changed while flushing. - vadim 01/17/97 */
 	buf->flags &= ~BM_JUST_DIRTIED;
 
+	/*
+	 * If earlylock, grab buffer sharelock before anyone else could re-lock
+	 * the buffer.
+	 */
+	if (earlylock)
+		LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
 	/* Release BufMgrLock while doing xlog work */
 	LWLockRelease(BufMgrLock);
 
@@ -1046,14 +1073,13 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	if (reln == NULL)
 		reln = smgropen(buf->tag.rnode);
 
-	buffer = BufferDescriptorGetBuffer(buf);
-
 	/*
 	 * Protect buffer content against concurrent update.  (Note that
 	 * hint-bit updates can still occur while the write is in progress,
 	 * but we assume that that will not invalidate the data written.)
 	 */
-	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	if (!earlylock)
+		LockBuffer(buffer, BUFFER_LOCK_SHARE);
 
 	/*
 	 * Force XLOG flush for buffer' LSN.  This implements the basic WAL
@@ -1485,7 +1511,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
 				{
 					StartBufferIO(bufHdr, false);
 
-					FlushBuffer(bufHdr, rel->rd_smgr);
+					FlushBuffer(bufHdr, rel->rd_smgr, false);
 
 					TerminateBufferIO(bufHdr, 0);
 				}