From 41c184bc642b25f67fb1d8ee290f28805fa5a0b4 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Wed, 22 Jan 2020 11:56:34 +0900
Subject: [PATCH] Add GUC ignore_invalid_pages.

Detection of WAL records having references to invalid pages
during recovery causes PostgreSQL to raise a PANIC-level error,
aborting the recovery. Setting ignore_invalid_pages to on causes
the system to ignore those WAL records (but still report a warning),
and continue recovery. This behavior may cause crashes, data loss,
propagate or hide corruption, or other serious problems.
However, it may allow you to get past the PANIC-level error,
to finish the recovery, and to cause the server to start up.

Author: Fujii Masao
Reviewed-by: Michael Paquier
Discussion: https://www.postgresql.org/message-id/CAHGQGwHCK6f77yeZD4MHOnN+PaTf6XiJfEB+Ce7SksSHjeAWtg@mail.gmail.com
---
 doc/src/sgml/config.sgml               | 25 +++++++++++++++++++++++++
 src/backend/access/transam/xlogutils.c |  9 +++++++--
 src/backend/utils/misc/guc.c           | 20 ++++++++++++++++++++
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 3ccacd528b4..e07dc01e802 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -9950,6 +9950,31 @@ LOG:  CleanUpLock: deleting: lock(0xb7acd844) id(24688,24696,0,0,0,1)
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-ignore-invalid-pages" xreflabel="ignore_invalid_pages">
+      <term><varname>ignore_invalid_pages</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>ignore_invalid_pages</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If set to <literal>off</literal> (the default), detection of
+        WAL records having references to invalid pages during
+        recovery causes <productname>PostgreSQL</productname> to
+        raise a PANIC-level error, aborting the recovery. Setting
+        <varname>ignore_invalid_pages</varname> to <literal>on</literal>
+        causes the system to ignore invalid page references in WAL records
+        (but still report a warning), and continue the recovery.
+        This behavior may <emphasis>cause crashes, data loss,
+        propagate or hide corruption, or other serious problems</emphasis>.
+        However, it may allow you to get past the PANIC-level error,
+        to finish the recovery, and to cause the server to start up.
+        The parameter can only be set at server start. It only has effect
+        during recovery or in standby mode.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-jit-debugging-support" xreflabel="jit_debugging_support">
       <term><varname>jit_debugging_support</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index b55c3833703..b217ffa52ff 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -31,6 +31,9 @@
 #include "utils/rel.h"
 
 
+/* GUC variable */
+bool		ignore_invalid_pages = false;
+
 /*
  * During XLOG replay, we may see XLOG records for incremental updates of
  * pages that no longer exist, because their relation was later dropped or
@@ -93,7 +96,8 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
 	if (reachedConsistency)
 	{
 		report_invalid_page(WARNING, node, forkno, blkno, present);
-		elog(PANIC, "WAL contains references to invalid pages");
+		elog(ignore_invalid_pages ? WARNING : PANIC,
+			 "WAL contains references to invalid pages");
 	}
 
 	/*
@@ -240,7 +244,8 @@ XLogCheckInvalidPages(void)
 	}
 
 	if (foundone)
-		elog(PANIC, "WAL contains references to invalid pages");
+		elog(ignore_invalid_pages ? WARNING : PANIC,
+			 "WAL contains references to invalid pages");
 
 	hash_destroy(invalid_page_tab);
 	invalid_page_tab = NULL;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e44f71e9910..9f179a91295 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -123,6 +123,7 @@ extern int	CommitSiblings;
 extern char *default_tablespace;
 extern char *temp_tablespaces;
 extern bool ignore_checksum_failure;
+extern bool ignore_invalid_pages;
 extern bool synchronize_seqscans;
 
 #ifdef TRACE_SYNCSCAN
@@ -1172,6 +1173,25 @@ static struct config_bool ConfigureNamesBool[] =
 		false,
 		NULL, NULL, NULL
 	},
+	{
+		{"ignore_invalid_pages", PGC_POSTMASTER, DEVELOPER_OPTIONS,
+			gettext_noop("Continues recovery after an invalid pages failure."),
+			gettext_noop("Detection of WAL records having references to "
+						 "invalid pages during recovery causes PostgreSQL to "
+						 "raise a PANIC-level error, aborting the recovery. "
+						 "Setting ignore_invalid_pages to true causes "
+						 "the system to ignore invalid page references "
+						 "in WAL records (but still report a warning), "
+						 "and continue recovery. This behavior may cause "
+						 "crashes, data loss, propagate or hide corruption, "
+						 "or other serious problems. Only has an effect "
+						 "during recovery or in standby mode."),
+			GUC_NOT_IN_SAMPLE
+		},
+		&ignore_invalid_pages,
+		false,
+		NULL, NULL, NULL
+	},
 	{
 		{"full_page_writes", PGC_SIGHUP, WAL_SETTINGS,
 			gettext_noop("Writes full pages to WAL when first modified after a checkpoint."),
-- 
GitLab