From 0927d0c25c567564f1ce24ab9d8d6f1c5d83f1f0 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 1 Nov 2019 22:39:05 +0900
Subject: [PATCH] Fix race condition at backend exit when deleting element in
 syncrep queue

When a backend exits, it gets deleted from the syncrep queue if present.
The queue was checked without SyncRepLock taken in exclusive mode, so it
would have been possible for a backend to remove itself after a WAL
sender already did the job.  Fix this issue based on a suggestion from
Fujii Masao, by first checking the queue without the lock.  Then, if the
backend is present in the queue, take the lock and perform an additional
lookup check before doing the element deletion.

Author: Dongming Liu
Reviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael Paquier
Discussion: https://postgr.es/m/a0806273-8bbb-43b3-bbe1-c45a58f6ae21.lingce.ldm@alibaba-inc.com
Backpatch-through: 9.4
---
 src/backend/replication/syncrep.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index 08588bfcb04..733ac945491 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -304,10 +304,18 @@ SyncRepCancelWait(void)
 void
 SyncRepCleanupAtProcExit(void)
 {
+	/*
+	 * First check if we are removed from the queue without the lock to not
+	 * slow down backend exit.
+	 */
 	if (!SHMQueueIsDetached(&(MyProc->syncRepLinks)))
 	{
 		LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
-		SHMQueueDelete(&(MyProc->syncRepLinks));
+
+		/* maybe we have just been removed, so recheck */
+		if (!SHMQueueIsDetached(&(MyProc->syncRepLinks)))
+			SHMQueueDelete(&(MyProc->syncRepLinks));
+
 		LWLockRelease(SyncRepLock);
 	}
 }
-- 
GitLab