From aedccb1f6fef988af1d1a25b78151f3773954b4c Mon Sep 17 00:00:00 2001
From: Simon Riggs <simon@2ndQuadrant.com>
Date: Tue, 25 Nov 2014 20:13:30 +0000
Subject: [PATCH] action_at_recovery_target recovery config option

action_at_recovery_target = pause | promote | shutdown

Petr Jelinek

Reviewed by Muhammad Asif Naeem, Fujji Masao and
Simon Riggs
---
 doc/src/sgml/recovery-config.sgml   | 58 ++++++++++++++---
 src/backend/access/transam/xlog.c   | 98 +++++++++++++++++++++++++----
 src/backend/postmaster/postmaster.c | 12 ++++
 src/include/access/xlog_internal.h  | 10 +++
 4 files changed, 159 insertions(+), 19 deletions(-)

diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index 0f1ff343a6c..a145a3fee2b 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -289,12 +289,39 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
       </term>
       <listitem>
        <para>
-        Specifies whether recovery should pause when the recovery target
-        is reached. The default is true.
-        This is intended to allow queries to be executed against the
-        database to check if this recovery target is the most desirable
-        point for recovery. The paused state can be resumed by using
-        <function>pg_xlog_replay_resume()</> (See
+        Alias for action_at_recovery_target, <literal>true</> is same as
+        action_at_recovery_target = <literal>pause</> and <literal>false</>
+        is same as action_at_recovery_target = <literal>promote</>.
+       </para>
+       <para>
+        This setting has no effect if <xref linkend="guc-hot-standby"> is not
+        enabled, or if no recovery target is set.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     </variablelist>
+
+     <varlistentry id="action-at-recovery-target"
+                   xreflabel="action_at_recovery_target">
+      <term><varname>action_at_recovery_target</varname> (<type>enum</type>)
+      <indexterm>
+        <primary><varname>action_at_recovery_target</> recovery parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies what action the server should take once the recovery target is
+        reached. The default is <literal>pause</>, which means recovery will
+        be paused. <literal>promote</> means recovery process will finish and
+        the server will start to accept connections.
+        Finally <literal>shutdown</> will stop the server after reaching the
+        recovery target.
+       </para>
+        The intended use of <literal>pause</> setting is to allow queries to be
+        executed against the database to check if this recovery target is the
+        most desirable point for recovery. The paused state can be resumed by
+        using <function>pg_xlog_replay_resume()</> (See
         <xref linkend="functions-recovery-control-table">), which then
         causes recovery to end. If this recovery target is not the
         desired stopping point, then shutdown the server, change the
@@ -302,8 +329,23 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
         continue recovery.
        </para>
        <para>
-        This setting has no effect if <xref linkend="guc-hot-standby"> is not
-        enabled, or if no recovery target is set.
+        The <literal>shutdown</> setting is useful to have instance ready at
+        exact replay point desired.
+        The instance will still be able to replay more WAL records (and in fact
+        will have to replay WAL records since last checkpoint next time it is
+        started).
+       </para>
+       <para>
+        Note that because <filename>recovery.conf</> will not be renamed when
+        <varname>action_at_recovery_target</> is set to <literal>shutdown</>,
+        any subsequent start will end with immediate shutdown unless the
+        configuration is changed or the <filename>recovery.conf</> is removed
+        manually.
+       </para>
+       <para>
+        This setting has no effect if no recovery target is set.
+        If <xref linkend="guc-hot-standby"> is not enabled, a setting of
+        <literal>pause</> will act the same as <literal>shutdown</>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8e712b793f3..0f661f5010c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -228,7 +228,7 @@ static char *recoveryEndCommand = NULL;
 static char *archiveCleanupCommand = NULL;
 static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
 static bool recoveryTargetInclusive = true;
-static bool recoveryPauseAtTarget = true;
+static RecoveryTargetAction actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PAUSE;
 static TransactionId recoveryTargetXid;
 static TimestampTz recoveryTargetTime;
 static char *recoveryTargetName;
@@ -4647,6 +4647,9 @@ readRecoveryCommandFile(void)
 	ConfigVariable *item,
 			   *head = NULL,
 			   *tail = NULL;
+	bool		recoveryPauseAtTargetSet = false;
+	bool		actionAtRecoveryTargetSet = false;
+
 
 	fd = AllocateFile(RECOVERY_COMMAND_FILE, "r");
 	if (fd == NULL)
@@ -4692,13 +4695,43 @@ readRecoveryCommandFile(void)
 		}
 		else if (strcmp(item->name, "pause_at_recovery_target") == 0)
 		{
+			bool recoveryPauseAtTarget;
+
 			if (!parse_bool(item->value, &recoveryPauseAtTarget))
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 						 errmsg("parameter \"%s\" requires a Boolean value", "pause_at_recovery_target")));
+
 			ereport(DEBUG2,
 					(errmsg_internal("pause_at_recovery_target = '%s'",
 									 item->value)));
+
+			actionAtRecoveryTarget = recoveryPauseAtTarget ?
+									 RECOVERY_TARGET_ACTION_PAUSE :
+									 RECOVERY_TARGET_ACTION_PROMOTE;
+
+			recoveryPauseAtTargetSet = true;
+		}
+		else if (strcmp(item->name, "action_at_recovery_target") == 0)
+		{
+			if (strcmp(item->value, "pause") == 0)
+				actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PAUSE;
+			else if (strcmp(item->value, "promote") == 0)
+				actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PROMOTE;
+			else if (strcmp(item->value, "shutdown") == 0)
+				actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_SHUTDOWN;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("invalid value for recovery parameter \"%s\"",
+								"action_at_recovery_target"),
+						 errhint("The allowed values are \"pause\", \"promote\" and \"shutdown\".")));
+
+			ereport(DEBUG2,
+					(errmsg_internal("action_at_recovery_target = '%s'",
+									 item->value)));
+
+			actionAtRecoveryTargetSet = true;
 		}
 		else if (strcmp(item->name, "recovery_target_timeline") == 0)
 		{
@@ -4863,6 +4896,28 @@ readRecoveryCommandFile(void)
 							RECOVERY_COMMAND_FILE)));
 	}
 
+	/*
+	 * Check for mutually exclusive parameters
+	 */
+	if (recoveryPauseAtTargetSet && actionAtRecoveryTargetSet)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("cannot set both \"%s\" and \"%s\" recovery parameters",
+						"pause_at_recovery_target",
+						"action_at_recovery_target"),
+				 errhint("The \"pause_at_recovery_target\" is deprecated.")));
+
+
+	/*
+	 * Override any inconsistent requests. Not that this is a change
+	 * of behaviour in 9.5; prior to this we simply ignored a request
+	 * to pause if hot_standby = off, which was surprising behaviour.
+	 */
+	if (actionAtRecoveryTarget == RECOVERY_TARGET_ACTION_PAUSE &&
+		actionAtRecoveryTargetSet &&
+		standbyState == STANDBY_DISABLED)
+			actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_SHUTDOWN;
+
 	/* Enable fetching from archive recovery area */
 	ArchiveRecoveryRequested = true;
 
@@ -6415,10 +6470,37 @@ StartupXLOG(void)
 			 * end of main redo apply loop
 			 */
 
-			if (recoveryPauseAtTarget && reachedStopPoint)
+			if (reachedStopPoint)
 			{
-				SetRecoveryPause(true);
-				recoveryPausesHere();
+				if (!reachedConsistency)
+					ereport(FATAL,
+						(errmsg("requested recovery stop point is before consistent recovery point")));
+
+				/*
+				 * This is the last point where we can restart recovery with a
+				 * new recovery target, if we shutdown and begin again. After
+				 * this, Resource Managers may choose to do permanent corrective
+				 * actions at end of recovery.
+				 */
+				switch (actionAtRecoveryTarget)
+				{
+					case RECOVERY_TARGET_ACTION_SHUTDOWN:
+							/*
+							 * exit with special return code to request shutdown
+							 * of postmaster.  Log messages issued from
+							 * postmaster.
+							 */
+							proc_exit(3);
+
+					case RECOVERY_TARGET_ACTION_PAUSE:
+							SetRecoveryPause(true);
+							recoveryPausesHere();
+
+							/* drop into promote */
+
+					case RECOVERY_TARGET_ACTION_PROMOTE:
+							break;
+				}
 			}
 
 			/* Allow resource managers to do any required cleanup. */
@@ -6436,6 +6518,7 @@ StartupXLOG(void)
 				ereport(LOG,
 					 (errmsg("last completed transaction was at log time %s",
 							 timestamptz_to_str(xtime))));
+
 			InRedo = false;
 		}
 		else
@@ -6496,13 +6579,6 @@ StartupXLOG(void)
 		(EndOfLog < minRecoveryPoint ||
 		 !XLogRecPtrIsInvalid(ControlFile->backupStartPoint)))
 	{
-		if (reachedStopPoint)
-		{
-			/* stopped because of stop request */
-			ereport(FATAL,
-					(errmsg("requested recovery stop point is before consistent recovery point")));
-		}
-
 		/*
 		 * Ran off end of WAL before reaching end-of-backup WAL record, or
 		 * minRecoveryPoint. That's usually a bad sign, indicating that you
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 6220a8e6e47..5106f52e0e0 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -509,6 +509,7 @@ static void ShmemBackendArrayRemove(Backend *bn);
 /* Macros to check exit status of a child process */
 #define EXIT_STATUS_0(st)  ((st) == 0)
 #define EXIT_STATUS_1(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 1)
+#define EXIT_STATUS_3(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 3)
 
 #ifndef WIN32
 /*
@@ -2555,6 +2556,17 @@ reaper(SIGNAL_ARGS)
 				continue;
 			}
 
+			if (EXIT_STATUS_3(exitstatus))
+			{
+				ereport(LOG,
+					(errmsg("shutdown at recovery target")));
+				Shutdown = SmartShutdown;
+				TerminateChildren(SIGTERM);
+				pmState = PM_WAIT_BACKENDS;
+				/* PostmasterStateMachine logic does the rest */
+				continue;
+			}
+
 			/*
 			 * Unexpected exit of startup process (including FATAL exit)
 			 * during PM_STARTUP is treated as catastrophic. There are no
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 423ef4d7fa0..85b3fe76bb6 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -214,6 +214,16 @@ typedef struct XLogRecData
 	uint32		len;			/* length of rmgr data to include */
 } XLogRecData;
 
+/*
+ * Recovery target action.
+ */
+typedef enum
+{
+	RECOVERY_TARGET_ACTION_PAUSE,
+	RECOVERY_TARGET_ACTION_PROMOTE,
+	RECOVERY_TARGET_ACTION_SHUTDOWN,
+} RecoveryTargetAction;
+
 /*
  * Method table for resource managers.
  *
-- 
GitLab