From cb2f2873d6b81ad7f0a9733ba738bfac0746fb7b Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 10 May 2012 17:26:08 -0400
Subject: [PATCH] Temporarily revert stats collector latch changes so we can
 ship beta1.

This patch reverts commit 49340037ee3ab46cb24144a86705e35f272c24d5 and some
follow-on tweaking in pgstat.c.  While the basic scheme of latch-ifying the
stats collector seems sound enough, it's failing on most Windows buildfarm
members for unknown reasons, and there's no time left to debug that before
9.2beta1.  Better to ship a beta version without this improvement.  I hope
to re-revert this once beta1 is out, though.
---
 src/backend/postmaster/pgstat.c | 185 ++++++++++++++++++++------------
 1 file changed, 117 insertions(+), 68 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 01273e19340..ac971abb184 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -28,6 +28,12 @@
 #include <arpa/inet.h>
 #include <signal.h>
 #include <time.h>
+#ifdef HAVE_POLL_H
+#include <poll.h>
+#endif
+#ifdef HAVE_SYS_POLL_H
+#include <sys/poll.h>
+#endif
 
 #include "pgstat.h"
 
@@ -49,8 +55,8 @@
 #include "storage/backendid.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
-#include "storage/latch.h"
 #include "storage/pg_shmem.h"
+#include "storage/pmsignal.h"
 #include "storage/procsignal.h"
 #include "utils/ascii.h"
 #include "utils/guc.h"
@@ -88,6 +94,9 @@
 										 * failed statistics collector; in
 										 * seconds. */
 
+#define PGSTAT_SELECT_TIMEOUT	2		/* How often to check for postmaster
+										 * death; in seconds. */
+
 #define PGSTAT_POLL_LOOP_COUNT	(PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
 #define PGSTAT_INQ_LOOP_COUNT	(PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
 
@@ -130,8 +139,6 @@ PgStat_MsgBgWriter BgWriterStats;
  */
 NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
 
-static Latch pgStatLatch;
-
 static struct sockaddr_storage pgStatAddr;
 
 static time_t last_pgstat_start_time;
@@ -3002,7 +3009,15 @@ PgstatCollectorMain(int argc, char *argv[])
 {
 	int			len;
 	PgStat_Msg	msg;
-	int			wr;
+
+#ifndef WIN32
+#ifdef HAVE_POLL
+	struct pollfd input_fd;
+#else
+	struct timeval sel_timeout;
+	fd_set		rfds;
+#endif
+#endif
 
 	IsUnderPostmaster = true;	/* we are a postmaster subprocess now */
 
@@ -3021,13 +3036,9 @@ PgstatCollectorMain(int argc, char *argv[])
 		elog(FATAL, "setsid() failed: %m");
 #endif
 
-	/* Initialize private latch for use by signal handlers */
-	InitLatch(&pgStatLatch);
-
 	/*
 	 * Ignore all signals usually bound to some action in the postmaster,
-	 * except SIGHUP and SIGQUIT.  Note we don't need a SIGUSR1 handler to
-	 * support latch operations, because pgStatLatch is local not shared.
+	 * except SIGQUIT.
 	 */
 	pqsignal(SIGHUP, pgstat_sighup_handler);
 	pqsignal(SIGINT, SIG_IGN);
@@ -3062,24 +3073,26 @@ PgstatCollectorMain(int argc, char *argv[])
 	pgStatRunningInCollector = true;
 	pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
 
+	/*
+	 * Setup the descriptor set for select(2).	Since only one bit in the set
+	 * ever changes, we need not repeat FD_ZERO each time.
+	 */
+#if !defined(HAVE_POLL) && !defined(WIN32)
+	FD_ZERO(&rfds);
+#endif
+
 	/*
 	 * Loop to process messages until we get SIGQUIT or detect ungraceful
 	 * death of our parent postmaster.
 	 *
-	 * For performance reasons, we don't want to do ResetLatch/WaitLatch after
-	 * every message; instead, do that only after a recv() fails to obtain a
-	 * message.  (This effectively means that if backends are sending us stuff
-	 * like mad, we won't notice postmaster death until things slack off a
-	 * bit; which seems fine.)  To do that, we have an inner loop that
-	 * iterates as long as recv() succeeds.  We do recognize got_SIGHUP inside
-	 * the inner loop, which means that such interrupts will get serviced but
-	 * the latch won't get cleared until next time there is a break in the
-	 * action.
+	 * For performance reasons, we don't want to do a PostmasterIsAlive() test
+	 * after every message; instead, do it only when select()/poll() is
+	 * interrupted by timeout.	In essence, we'll stay alive as long as
+	 * backends keep sending us stuff often, even if the postmaster is gone.
 	 */
 	for (;;)
 	{
-		/* Clear any already-pending wakeups */
-		ResetLatch(&pgStatLatch);
+		int			got_data;
 
 		/*
 		 * Quit if we get SIGQUIT from the postmaster.
@@ -3088,37 +3101,87 @@ PgstatCollectorMain(int argc, char *argv[])
 			break;
 
 		/*
-		 * Inner loop iterates as long as we keep getting messages, or until
-		 * need_exit becomes set.
+		 * Reload configuration if we got SIGHUP from the postmaster.
 		 */
-		while (!need_exit)
+		if (got_SIGHUP)
 		{
-			/*
-			 * Reload configuration if we got SIGHUP from the postmaster.
-			 */
-			if (got_SIGHUP)
-			{
-				got_SIGHUP = false;
-				ProcessConfigFile(PGC_SIGHUP);
-			}
+			ProcessConfigFile(PGC_SIGHUP);
+			got_SIGHUP = false;
+		}
 
-			/*
-			 * Write the stats file if a new request has arrived that is not
-			 * satisfied by existing file.
-			 */
-			if (last_statwrite < last_statrequest)
-				pgstat_write_statsfile(false);
+		/*
+		 * Write the stats file if a new request has arrived that is not
+		 * satisfied by existing file.
+		 */
+		if (last_statwrite < last_statrequest)
+			pgstat_write_statsfile(false);
 
-			/*
-			 * Try to receive and process a message.  This will not block,
-			 * since the socket is set to non-blocking mode.
-			 */
+		/*
+		 * Wait for a message to arrive; but not for more than
+		 * PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
+		 * shut down after an ungraceful postmaster termination; so it needn't
+		 * be very fast.  However, on some systems SIGQUIT won't interrupt the
+		 * poll/select call, so this also limits speed of response to SIGQUIT,
+		 * which is more important.)
+		 *
+		 * We use poll(2) if available, otherwise select(2). Win32 has its own
+		 * implementation.
+		 */
+#ifndef WIN32
+#ifdef HAVE_POLL
+		input_fd.fd = pgStatSock;
+		input_fd.events = POLLIN | POLLERR;
+		input_fd.revents = 0;
+
+		if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0)
+		{
+			if (errno == EINTR)
+				continue;
+			ereport(ERROR,
+					(errcode_for_socket_access(),
+					 errmsg("poll() failed in statistics collector: %m")));
+		}
+
+		got_data = (input_fd.revents != 0);
+#else							/* !HAVE_POLL */
+
+		FD_SET(pgStatSock, &rfds);
+
+		/*
+		 * timeout struct is modified by select() on some operating systems,
+		 * so re-fill it each time.
+		 */
+		sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT;
+		sel_timeout.tv_usec = 0;
+
+		if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0)
+		{
+			if (errno == EINTR)
+				continue;
+			ereport(ERROR,
+					(errcode_for_socket_access(),
+					 errmsg("select() failed in statistics collector: %m")));
+		}
+
+		got_data = FD_ISSET(pgStatSock, &rfds);
+#endif   /* HAVE_POLL */
+#else							/* WIN32 */
+		got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ,
+											   PGSTAT_SELECT_TIMEOUT * 1000);
+#endif
+
+		/*
+		 * If there is a message on the socket, read it and check for
+		 * validity.
+		 */
+		if (got_data)
+		{
 			len = recv(pgStatSock, (char *) &msg,
 					   sizeof(PgStat_Msg), 0);
 			if (len < 0)
 			{
-				if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
-					break;		/* out of inner loop */
+				if (errno == EINTR)
+					continue;
 				ereport(ERROR,
 						(errcode_for_socket_access(),
 						 errmsg("could not read statistics message: %m")));
@@ -3216,21 +3279,17 @@ PgstatCollectorMain(int argc, char *argv[])
 				default:
 					break;
 			}
-		}						/* end of inner message-processing loop */
-
-		/* Sleep until there's something to do */
-		wr = WaitLatchOrSocket(&pgStatLatch,
-							   WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
-							   pgStatSock,
-							   -1L);
-
-		/*
-		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.
-		 */
-		if (wr & WL_POSTMASTER_DEATH)
-			break;
-	}							/* end of outer loop */
+		}
+		else
+		{
+			/*
+			 * We can only get here if the select/poll timeout elapsed. Check
+			 * for postmaster death.
+			 */
+			if (!PostmasterIsAlive())
+				break;
+		}
+	}							/* end of message-processing loop */
 
 	/*
 	 * Save the final stats to reuse at next startup.
@@ -3245,24 +3304,14 @@ PgstatCollectorMain(int argc, char *argv[])
 static void
 pgstat_exit(SIGNAL_ARGS)
 {
-	int			save_errno = errno;
-
 	need_exit = true;
-	SetLatch(&pgStatLatch);
-
-	errno = save_errno;
 }
 
 /* SIGHUP handler for collector process */
 static void
 pgstat_sighup_handler(SIGNAL_ARGS)
 {
-	int			save_errno = errno;
-
 	got_SIGHUP = true;
-	SetLatch(&pgStatLatch);
-
-	errno = save_errno;
 }
 
 
-- 
GitLab