From f40022f1adaeff85b01d98fea38cf5aa16814aa7 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 10 May 2012 14:34:22 -0400
Subject: [PATCH] Make WaitLatch's WL_POSTMASTER_DEATH result trustworthy;
 simplify callers.

Per a suggestion from Peter Geoghegan, make WaitLatch responsible for
verifying that the WL_POSTMASTER_DEATH bit it returns is truthful (by
testing PostmasterIsAlive).  Then simplify its callers, who no longer
need to do that for themselves.  Remove weasel wording about falsely-set
result bits from WaitLatch's API contract.
---
 src/backend/port/unix_latch.c         | 35 ++++++++++++++++++---------
 src/backend/port/win32_latch.c        | 12 +++++++--
 src/backend/postmaster/autovacuum.c   |  9 ++++---
 src/backend/postmaster/bgwriter.c     |  7 ++----
 src/backend/postmaster/checkpointer.c |  7 ++----
 src/backend/postmaster/pgstat.c       |  7 ++----
 src/backend/postmaster/walwriter.c    |  7 ++----
 7 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c
index 10bf2dbec7e..30d1a488f0a 100644
--- a/src/backend/port/unix_latch.c
+++ b/src/backend/port/unix_latch.c
@@ -50,6 +50,7 @@
 #include "miscadmin.h"
 #include "postmaster/postmaster.h"
 #include "storage/latch.h"
+#include "storage/pmsignal.h"
 #include "storage/shmem.h"
 
 /* Are we currently in WaitLatch? The signal handler would like to know. */
@@ -160,15 +161,7 @@ DisownLatch(volatile Latch *latch)
  *
  * Returns bit mask indicating which condition(s) caused the wake-up. Note
  * that if multiple wake-up conditions are true, there is no guarantee that
- * we return all of them in one call, but we will return at least one. Also,
- * according to the select(2) man page on Linux, select(2) may spuriously
- * return and report a file descriptor as readable, when it's not. We use
- * select(2), so WaitLatch can also spuriously claim that a socket is
- * readable, or postmaster has died, even when none of the wake conditions
- * have been satisfied. That should be rare in practice, but the caller
- * should not use the return value for anything critical, re-checking the
- * situation with PostmasterIsAlive() or read() on a socket as necessary.
- * The latch and timeout flag bits can be trusted, however.
+ * we return all of them in one call, but we will return at least one.
  */
 int
 WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
@@ -318,7 +311,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		if ((wakeEvents & WL_POSTMASTER_DEATH) &&
 			(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
 		{
-			result |= WL_POSTMASTER_DEATH;
+			/*
+			 * According to the select(2) man page on Linux, select(2) may
+			 * spuriously return and report a file descriptor as readable,
+			 * when it's not; and presumably so can poll(2).  It's not clear
+			 * that the relevant cases would ever apply to the postmaster
+			 * pipe, but since the consequences of falsely returning
+			 * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
+			 * trouble to positively verify EOF with PostmasterIsAlive().
+			 */
+			if (!PostmasterIsAlive())
+				result |= WL_POSTMASTER_DEATH;
 		}
 
 #else /* !HAVE_POLL */
@@ -380,7 +383,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		if ((wakeEvents & WL_POSTMASTER_DEATH) &&
 			 FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
 		{
-			result |= WL_POSTMASTER_DEATH;
+			/*
+			 * According to the select(2) man page on Linux, select(2) may
+			 * spuriously return and report a file descriptor as readable,
+			 * when it's not; and presumably so can poll(2).  It's not clear
+			 * that the relevant cases would ever apply to the postmaster
+			 * pipe, but since the consequences of falsely returning
+			 * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
+			 * trouble to positively verify EOF with PostmasterIsAlive().
+			 */
+			if (!PostmasterIsAlive())
+				result |= WL_POSTMASTER_DEATH;
 		}
 #endif /* HAVE_POLL */
 	} while (result == 0);
diff --git a/src/backend/port/win32_latch.c b/src/backend/port/win32_latch.c
index e4fe05fa4b6..622798924dc 100644
--- a/src/backend/port/win32_latch.c
+++ b/src/backend/port/win32_latch.c
@@ -26,6 +26,7 @@
 #include "miscadmin.h"
 #include "postmaster/postmaster.h"
 #include "storage/latch.h"
+#include "storage/pmsignal.h"
 #include "storage/shmem.h"
 
 
@@ -217,8 +218,15 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
 				 rc == WAIT_OBJECT_0 + pmdeath_eventno)
 		{
-			/* Postmaster died */
-			result |= WL_POSTMASTER_DEATH;
+			/*
+			 * Postmaster apparently died.  Since the consequences of falsely
+			 * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we
+			 * take the trouble to positively verify this with
+			 * PostmasterIsAlive(), even though there is no known reason to
+			 * think that the event could be falsely set on Windows.
+			 */
+			if (!PostmasterIsAlive())
+				result |= WL_POSTMASTER_DEATH;
 		}
 		else
 			elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 49c15c32616..fa9a1159450 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -574,6 +574,7 @@ AutoVacLauncherMain(int argc, char *argv[])
 		TimestampTz current_time = 0;
 		bool		can_launch;
 		Dlelem	   *elem;
+		int			rc;
 
 		/*
 		 * This loop is a bit different from the normal use of WaitLatch,
@@ -592,9 +593,9 @@ AutoVacLauncherMain(int argc, char *argv[])
 		 * Wait until naptime expires or we get some type of signal (all the
 		 * signal handlers will wake us by calling SetLatch).
 		 */
-		WaitLatch(&MyProc->procLatch,
-				  WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
-				  (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L));
+		rc = WaitLatch(&MyProc->procLatch,
+					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+					   (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L));
 
 		ResetLatch(&MyProc->procLatch);
 
@@ -604,7 +605,7 @@ AutoVacLauncherMain(int argc, char *argv[])
 		 * Emergency bailout if postmaster has died.  This is to avoid the
 		 * necessity for manual cleanup of all postmaster children.
 		 */
-		if (!PostmasterIsAlive())
+		if (rc & WL_POSTMASTER_DEATH)
 			proc_exit(1);
 
 		/* the normal shutdown case */
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index abb665ee50d..5daf8c6177b 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -48,7 +48,6 @@
 #include "storage/buf_internals.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
-#include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/shmem.h"
 #include "storage/smgr.h"
@@ -323,11 +322,9 @@ BackgroundWriterMain(void)
 
 		/*
 		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.  Note
-		 * that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
-		 * if it is set, recheck with PostmasterIsAlive before believing it.
+		 * necessity for manual cleanup of all postmaster children.
 		 */
-		if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
+		if (rc & WL_POSTMASTER_DEATH)
 			exit(1);
 
 		prev_hibernate = can_hibernate;
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 7f8ba95c44d..5db60cd5d98 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -50,7 +50,6 @@
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
-#include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/shmem.h"
 #include "storage/smgr.h"
@@ -581,11 +580,9 @@ CheckpointerMain(void)
 
 		/*
 		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.  Note
-		 * that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
-		 * if it is set, recheck with PostmasterIsAlive before believing it.
+		 * necessity for manual cleanup of all postmaster children.
 		 */
-		if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
+		if (rc & WL_POSTMASTER_DEATH)
 			exit(1);
 	}
 }
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 15002dcd9d1..01273e19340 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -51,7 +51,6 @@
 #include "storage/ipc.h"
 #include "storage/latch.h"
 #include "storage/pg_shmem.h"
-#include "storage/pmsignal.h"
 #include "storage/procsignal.h"
 #include "utils/ascii.h"
 #include "utils/guc.h"
@@ -3227,11 +3226,9 @@ PgstatCollectorMain(int argc, char *argv[])
 
 		/*
 		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.  Note
-		 * that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
-		 * if it is set, recheck with PostmasterIsAlive before believing it.
+		 * necessity for manual cleanup of all postmaster children.
 		 */
-		if ((wr & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
+		if (wr & WL_POSTMASTER_DEATH)
 			break;
 	}							/* end of outer loop */
 
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 886ad4f9267..77455db1666 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -53,7 +53,6 @@
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
-#include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/smgr.h"
 #include "utils/guc.h"
@@ -305,11 +304,9 @@ WalWriterMain(void)
 
 		/*
 		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.  Note
-		 * that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
-		 * if it is set, recheck with PostmasterIsAlive before believing it.
+		 * necessity for manual cleanup of all postmaster children.
 		 */
-		if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
+		if (rc & WL_POSTMASTER_DEATH)
 			exit(1);
 	}
 }
-- 
GitLab