From ea0382e3706ab25935c733fd452d828832e4675f Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 26 Apr 2008 22:47:40 +0000
Subject: [PATCH] Code review for recent patch to terminate online backup
 during shutdown: do CancelBackup at a sane place, fix some oversights in the
 state transitions, allow only superusers to connect while we are waiting for
 backup mode to end.

---
 doc/src/sgml/runtime.sgml           | 25 ++++++++------
 src/backend/postmaster/postmaster.c | 53 ++++++++++++++++-------------
 src/backend/utils/init/postinit.c   | 13 ++++++-
 src/include/libpq/libpq-be.h        |  5 +--
 4 files changed, 58 insertions(+), 38 deletions(-)

diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 5c36f9fce00..75c6d266e9d 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.415 2008/04/23 13:44:58 mha Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.416 2008/04/26 22:47:40 tgl Exp $ -->
 
 <chapter Id="runtime">
  <title>Operating System Environment</title>
@@ -1306,12 +1306,15 @@ sysctl -w vm.overcommit_memory=2
      <term><systemitem>SIGTERM</systemitem><indexterm><primary>SIGTERM</></></term>
      <listitem>
       <para>
+       This is the <firstterm>Smart Shutdown</firstterm> mode.
        After receiving <systemitem>SIGTERM</systemitem>, the server
-       waits until online backup mode is no longer active. It then
        disallows new connections, but lets existing sessions end their
-       work normally. It shuts down only after all of the sessions
-       terminate normally. This is the <firstterm>Smart
-       Shutdown</firstterm>.
+       work normally. It shuts down only after all of the sessions terminate.
+       If the server is in online backup mode, it additionally waits
+       until online backup mode is no longer active.  While backup mode is
+       active, new connections will still be allowed, but only to superusers
+       (this exception allows a superuser to connect to terminate
+       online backup mode).
       </para>
      </listitem>
     </varlistentry>
@@ -1320,13 +1323,13 @@ sysctl -w vm.overcommit_memory=2
      <term><systemitem>SIGINT</systemitem><indexterm><primary>SIGINT</></></term>
      <listitem>
       <para>
+       This is the <firstterm>Fast Shutdown</firstterm> mode.
        The server disallows new connections and sends all existing
        server processes <systemitem>SIGTERM</systemitem>, which will cause them
        to abort their current transactions and exit promptly. It then
        waits for the server processes to exit and finally shuts down.
        If the server is in online backup mode, backup mode will be
-       terminated, rendering the backup useless.  This is the
-       <firstterm>Fast Shutdown</firstterm>.
+       terminated, rendering the backup useless.
       </para>
      </listitem>
     </varlistentry>
@@ -1335,8 +1338,8 @@ sysctl -w vm.overcommit_memory=2
      <term><systemitem>SIGQUIT</systemitem><indexterm><primary>SIGQUIT</></></term>
      <listitem>
       <para>
-      This is the <firstterm>Immediate Shutdown</firstterm>, which
-      will cause the master <command>postgres</command> process to send a
+      This is the <firstterm>Immediate Shutdown</firstterm> mode.
+      The master <command>postgres</command> process will send a
       <systemitem>SIGQUIT</systemitem> to all child processes and exit
       immediately, without properly shutting itself down. The child processes
       likewise exit immediately upon receiving
@@ -1377,8 +1380,8 @@ $ <userinput>kill -INT `head -1 /usr/local/pgsql/data/postmaster.pid`</userinput
   </important>
 
   <para>
-   To terminate a session while allowing other sessions to continue, use
-   <function>pg_terminate_backend()</> (<xref
+   To terminate an individual session while allowing other sessions to
+   continue, use <function>pg_terminate_backend()</> (see <xref
    linkend="functions-admin-signal-table">) or send a
    <systemitem>SIGTERM</> signal to the child process associated with
    the session.
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index f3bcdd968c7..54e9173f6c6 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.555 2008/04/23 13:44:59 mha Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.556 2008/04/26 22:47:40 tgl Exp $
  *
  * NOTES
  *
@@ -230,6 +230,7 @@ static bool FatalError = false; /* T if recovering from backend crash */
  * crash recovery (which is rather like shutdown followed by startup).
  *
  * Normal child backends can only be launched when we are in PM_RUN state.
+ * (We also allow it in PM_WAIT_BACKUP state, but only for superusers.)
  * In other states we handle connection requests by launching "dead_end"
  * child processes, which will simply send the client an error message and
  * quit.  (We track these in the BackendList so that we can know when they
@@ -242,9 +243,9 @@ static bool FatalError = false; /* T if recovering from backend crash */
  * will not be very long).
  *
  * Notice that this state variable does not distinguish *why* we entered
- * PM_WAIT_BACKENDS or later states --- Shutdown and FatalError must be
- * consulted to find that out.	FatalError is never true in PM_RUN state, nor
- * in PM_SHUTDOWN states (because we don't enter those states when trying to
+ * states later than PM_RUN --- Shutdown and FatalError must be consulted
+ * to find that out.  FatalError is never true in PM_RUN state, nor in
+ * PM_SHUTDOWN states (because we don't enter those states when trying to
  * recover from a crash).  It can be true in PM_STARTUP state, because we
  * don't clear it until we've successfully recovered.
  */
@@ -1650,6 +1651,9 @@ retry1:
 					(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
 					 errmsg("sorry, too many clients already")));
 			break;
+		case CAC_WAITBACKUP:
+			/* OK for now, will check in InitPostgres */
+			break;
 		case CAC_OK:
 			break;
 	}
@@ -1727,11 +1731,15 @@ canAcceptConnections(void)
 {
 	/*
 	 * Can't start backends when in startup/shutdown/recovery state.
-	 * In state PM_WAIT_BACKUP we must allow connections so that
-	 * a superuser can end online backup mode.
+	 *
+	 * In state PM_WAIT_BACKUP only superusers can connect (this must be
+	 * allowed so that a superuser can end online backup mode); we return
+	 * CAC_WAITBACKUP code to indicate that this must be checked later.
 	 */
-	if ((pmState != PM_RUN) && (pmState != PM_WAIT_BACKUP))
+	if (pmState != PM_RUN)
 	{
+		if (pmState == PM_WAIT_BACKUP)
+			return CAC_WAITBACKUP;	/* allow superusers only */
 		if (Shutdown > NoShutdown)
 			return CAC_SHUTDOWN;	/* shutdown is pending */
 		if (pmState == PM_STARTUP && !FatalError)
@@ -1997,7 +2005,7 @@ pmdie(SIGNAL_ARGS)
 
 			if (StartupPID != 0)
 				signal_child(StartupPID, SIGTERM);
-			if (pmState == PM_RUN)
+			if (pmState == PM_RUN || pmState == PM_WAIT_BACKUP)
 			{
 				ereport(LOG,
 						(errmsg("aborting any active transactions")));
@@ -2017,13 +2025,6 @@ pmdie(SIGNAL_ARGS)
 			 * PostmasterStateMachine will take the next step.
 			 */
 			PostmasterStateMachine();
-
-			/*
-			 * Terminate backup mode to avoid recovery after a
-			 * clean fast shutdown.
-			 */
-			CancelBackup();
-
 			break;
 
 		case SIGQUIT:
@@ -2499,7 +2500,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 
 	FatalError = true;
 	/* We now transit into a state of waiting for children to die */
-	if (pmState == PM_RUN || pmState == PM_SHUTDOWN)
+	if (pmState == PM_RUN ||
+		pmState == PM_WAIT_BACKUP ||
+		pmState == PM_SHUTDOWN)
 		pmState = PM_WAIT_BACKENDS;
 }
 
@@ -2568,15 +2571,10 @@ PostmasterStateMachine(void)
 	if (pmState == PM_WAIT_BACKUP)
 	{
 		/*
-		 * PM_WAIT_BACKUP state ends when online backup mode is no longer
-		 * active.  In this state canAcceptConnections() will still allow
-		 * client connections, which is necessary because a superuser
-		 * has to call pg_stop_backup() to end online backup mode.
+		 * PM_WAIT_BACKUP state ends when online backup mode is not active.
 		 */
 		if (!BackupInProgress())
-		{
 			pmState = PM_WAIT_BACKENDS;
-		}
 	}
 
 	/*
@@ -2699,6 +2697,12 @@ PostmasterStateMachine(void)
 		}
 		else
 		{
+			/*
+			 * Terminate backup mode to avoid recovery after a
+			 * clean fast shutdown.
+			 */
+			CancelBackup();
+
 			/* Normal exit from the postmaster is here */
 			ExitPostmaster(0);
 		}
@@ -2819,7 +2823,7 @@ BackendStartup(Port *port)
 		return STATUS_ERROR;
 	}
 
-	/* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
+	/* Pass down canAcceptConnections state */
 	port->canAcceptConnections = canAcceptConnections();
 
 #ifdef EXEC_BACKEND
@@ -2880,7 +2884,8 @@ BackendStartup(Port *port)
 	bn->pid = pid;
 	bn->cancel_key = MyCancelKey;
 	bn->is_autovacuum = false;
-	bn->dead_end = (port->canAcceptConnections != CAC_OK);
+	bn->dead_end = (port->canAcceptConnections != CAC_OK &&
+					port->canAcceptConnections != CAC_WAITBACKUP);
 	DLAddHead(BackendList, DLNewElem(bn));
 #ifdef EXEC_BACKEND
 	if (!bn->dead_end)
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 6c29ea19174..695db95c726 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.182 2008/03/26 21:10:39 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.183 2008/04/26 22:47:40 tgl Exp $
  *
  *
  *-------------------------------------------------------------------------
@@ -26,6 +26,7 @@
 #include "catalog/pg_database.h"
 #include "catalog/pg_tablespace.h"
 #include "libpq/hba.h"
+#include "libpq/libpq-be.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pgstat.h"
@@ -577,6 +578,16 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
 	if (!bootstrap)
 		CheckMyDatabase(dbname, am_superuser);
 
+	/*
+	 * If we're trying to shut down, only superusers can connect.
+	 */
+	if (!am_superuser &&
+		MyProcPort != NULL &&
+		MyProcPort->canAcceptConnections == CAC_WAITBACKUP)
+		ereport(FATAL,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to connect during database shutdown")));
+
 	/*
 	 * Check a normal user hasn't connected to a superuser reserved slot.
 	 */
diff --git a/src/include/libpq/libpq-be.h b/src/include/libpq/libpq-be.h
index da05b69cbdb..6927da2fd08 100644
--- a/src/include/libpq/libpq-be.h
+++ b/src/include/libpq/libpq-be.h
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/libpq/libpq-be.h,v 1.65 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/libpq/libpq-be.h,v 1.66 2008/04/26 22:47:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,7 +69,8 @@ typedef struct
 
 typedef enum CAC_state
 {
-	CAC_OK, CAC_STARTUP, CAC_SHUTDOWN, CAC_RECOVERY, CAC_TOOMANY
+	CAC_OK, CAC_STARTUP, CAC_SHUTDOWN, CAC_RECOVERY, CAC_TOOMANY,
+	CAC_WAITBACKUP
 } CAC_state;
 
 
-- 
GitLab