From 50e547096c4858a68abf09894667a542cc418315 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Fri, 13 Dec 2013 16:26:14 +0200
Subject: [PATCH] Add GUC to enable WAL-logging of hint bits, even with
 checksums disabled.

WAL records of hint bit updates is useful to tools that want to examine
which pages have been modified. In particular, this is required to make
the pg_rewind tool safe (without checksums).

This can also be used to test how much extra WAL-logging would occur if
you enabled checksums, without actually enabling them (which you can't
currently do without re-initdb'ing).

Sawada Masahiko, docs by Samrat Revagade. Reviewed by Dilip Kumar, with
further changes by me.
---
 doc/src/sgml/config.sgml                      | 26 +++++++++++++++++++
 src/backend/access/heap/heapam.c              |  2 +-
 src/backend/access/heap/visibilitymap.c       |  6 ++---
 src/backend/access/transam/xlog.c             |  6 +++++
 src/backend/storage/buffer/bufmgr.c           |  7 +++--
 src/backend/utils/misc/guc.c                  | 11 ++++++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/bin/pg_controldata/pg_controldata.c       |  2 ++
 src/bin/pg_resetxlog/pg_resetxlog.c           |  2 ++
 src/include/access/xlog.h                     | 12 +++++++++
 src/include/access/xlog_internal.h            |  1 +
 src/include/catalog/catversion.h              |  2 +-
 src/include/catalog/pg_control.h              |  1 +
 13 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8896988e5aa..f0794467ba4 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1944,6 +1944,32 @@ include 'filename'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-wal-log-hintbits" xreflabel="wal_log_hintbits">
+      <term><varname>wal_log_hintbits</varname> (<type>boolean</type>)</term>
+      <indexterm>
+       <primary><varname>wal_log_hintbits</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        When this parameter is <literal>on</>, the <productname>PostgreSQL</>
+        server writes the entire content of each disk page to WAL during the
+        first modification of that page after a checkpoint, even for
+        non-critical modifications of so-called hint bits.
+       </para>
+
+       <para>
+        If data checksums are enabled, hint bit updates are always WAL-logged
+        and this setting is ignored. You can use this setting to test how much
+        extra WAL-logging would occur if your database had data checksums
+        enabled.
+       </para>
+
+       <para>
+        This parameter can only be set at server start. The default value is <literal>off</>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-wal-buffers" xreflabel="wal_buffers">
       <term><varname>wal_buffers</varname> (<type>integer</type>)</term>
       <indexterm>
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 09174b472b0..91cfae1603d 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6271,7 +6271,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
 	rdata[1].buffer_std = false;
 	rdata[1].next = NULL;
 
-	if (DataChecksumsEnabled())
+	if (XLogHintBitIsNeeded())
 	{
 		rdata[1].next = &(rdata[2]);
 
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 7f40d89b9f1..60ee76a2081 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -287,10 +287,10 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 										  cutoff_xid);
 
 				/*
-				 * If data checksums are enabled, we need to protect the heap
-				 * page from being torn.
+				 * If data checksums are enabled (or wal_log_hintbits=on), we
+				 * need to protect the heap page from being torn.
 				 */
-				if (DataChecksumsEnabled())
+				if (XLogHintBitIsNeeded())
 				{
 					Page		heapPage = BufferGetPage(heapBuf);
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a76aef37f3d..0efb50b17ef 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -79,6 +79,7 @@ bool		XLogArchiveMode = false;
 char	   *XLogArchiveCommand = NULL;
 bool		EnableHotStandby = false;
 bool		fullPageWrites = true;
+bool		walLogHintbits = false;
 bool		log_checkpoints = false;
 int			sync_method = DEFAULT_SYNC_METHOD;
 int			wal_level = WAL_LEVEL_MINIMAL;
@@ -5270,6 +5271,7 @@ BootStrapXLOG(void)
 	ControlFile->max_prepared_xacts = max_prepared_xacts;
 	ControlFile->max_locks_per_xact = max_locks_per_xact;
 	ControlFile->wal_level = wal_level;
+	ControlFile->wal_log_hintbits = walLogHintbits;
 	ControlFile->data_checksum_version = bootstrap_data_checksum_version;
 
 	/* some additional ControlFile fields are set in WriteControlFile() */
@@ -9058,6 +9060,7 @@ static void
 XLogReportParameters(void)
 {
 	if (wal_level != ControlFile->wal_level ||
+		walLogHintbits != ControlFile->wal_log_hintbits ||
 		MaxConnections != ControlFile->MaxConnections ||
 		max_worker_processes != ControlFile->max_worker_processes ||
 		max_prepared_xacts != ControlFile->max_prepared_xacts ||
@@ -9080,6 +9083,7 @@ XLogReportParameters(void)
 			xlrec.max_prepared_xacts = max_prepared_xacts;
 			xlrec.max_locks_per_xact = max_locks_per_xact;
 			xlrec.wal_level = wal_level;
+			xlrec.wal_log_hintbits = walLogHintbits;
 
 			rdata.buffer = InvalidBuffer;
 			rdata.data = (char *) &xlrec;
@@ -9094,6 +9098,7 @@ XLogReportParameters(void)
 		ControlFile->max_prepared_xacts = max_prepared_xacts;
 		ControlFile->max_locks_per_xact = max_locks_per_xact;
 		ControlFile->wal_level = wal_level;
+		ControlFile->wal_log_hintbits = walLogHintbits;
 		UpdateControlFile();
 	}
 }
@@ -9480,6 +9485,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 		ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts;
 		ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact;
 		ControlFile->wal_level = xlrec.wal_level;
+		ControlFile->wal_log_hintbits = walLogHintbits;
 
 		/*
 		 * Update minRecoveryPoint to ensure that if recovery is aborted, we
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f8483918116..081165faef0 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2626,16 +2626,15 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 		bool		delayChkpt = false;
 
 		/*
-		 * If checksums are enabled, and the buffer is permanent, then a full
-		 * page image may be required even for some hint bit updates to
-		 * protect against torn pages. This full page image is only necessary
+		 * If we need to protect hint bit updates from torn writes, WAL-log a
+		 * full page image of the page. This full page image is only necessary
 		 * if the hint bit update is the first change to the page since the
 		 * last checkpoint.
 		 *
 		 * We don't check full_page_writes here because that logic is included
 		 * when we call XLogInsert() since the value changes dynamically.
 		 */
-		if (DataChecksumsEnabled() && (bufHdr->flags & BM_PERMANENT))
+		if (XLogHintBitIsNeeded() && (bufHdr->flags & BM_PERMANENT))
 		{
 			/*
 			 * If we're in recovery we cannot dirty a page because of a hint.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e69e132f056..b0c14a2dfce 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -866,6 +866,17 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+
+	{
+		{"wal_log_hintbits", PGC_POSTMASTER, WAL_SETTINGS,
+			gettext_noop("Writes full pages to WAL when first modified after a checkpoint, even for a non-critical modifications"),
+			NULL
+		},
+		&walLogHintbits,
+		false,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
 			gettext_noop("Logs each checkpoint."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f8bdce34d33..d049159444f 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -184,6 +184,7 @@
 					#   fsync_writethrough
 					#   open_sync
 #full_page_writes = on			# recover from partial page writes
+#wal_log_hintbits = off			# also do full pages writes of non-critical updates
 #wal_buffers = -1			# min 32kB, -1 sets based on shared_buffers
 					# (change requires restart)
 #wal_writer_delay = 200ms		# 1-10000 milliseconds
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 8c6cf24d237..da48e98ff99 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -260,6 +260,8 @@ main(int argc, char *argv[])
 		   ControlFile.backupEndRequired ? _("yes") : _("no"));
 	printf(_("Current wal_level setting:            %s\n"),
 		   wal_level_str(ControlFile.wal_level));
+	printf(_("Current wal_log_hintbits setting:     %s\n"),
+		   ControlFile.wal_log_hintbits ? _("on") : _("off"));
 	printf(_("Current max_connections setting:      %d\n"),
 		   ControlFile.MaxConnections);
 	printf(_("Current max_worker_processes setting: %d\n"),
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index ed0f9456796..da13ff4d134 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -525,6 +525,7 @@ GuessControlValues(void)
 	/* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
 
 	ControlFile.wal_level = WAL_LEVEL_MINIMAL;
+	ControlFile.wal_log_hintbits = false;
 	ControlFile.MaxConnections = 100;
 	ControlFile.max_worker_processes = 8;
 	ControlFile.max_prepared_xacts = 0;
@@ -721,6 +722,7 @@ RewriteControlFile(void)
 	 * anyway at startup.
 	 */
 	ControlFile.wal_level = WAL_LEVEL_MINIMAL;
+	ControlFile.wal_log_hintbits = false;
 	ControlFile.MaxConnections = 100;
 	ControlFile.max_worker_processes = 8;
 	ControlFile.max_prepared_xacts = 0;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 7415a261bbd..e69accd4b2e 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -189,6 +189,7 @@ extern bool XLogArchiveMode;
 extern char *XLogArchiveCommand;
 extern bool EnableHotStandby;
 extern bool fullPageWrites;
+extern bool walLogHintbits;
 extern bool log_checkpoints;
 extern int	num_xloginsert_slots;
 
@@ -211,6 +212,17 @@ extern int	wal_level;
  */
 #define XLogIsNeeded() (wal_level >= WAL_LEVEL_ARCHIVE)
 
+/*
+ * Is a full-page image needed for hint bit updates?
+ *
+ * Normally, we don't WAL-log hint bit updates, but if checksums are enabled,
+ * we have to protect them against torn page writes.  When you only set
+ * individual bits on a page, it's still consistent no matter what combination
+ * of the bits make it to disk, but the checksum wouldn't match.  Also WAL-log
+ * them if forced by wal_log_hintbits=on.
+ */
+#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || walLogHintbits)
+
 /* Do we need to WAL-log information required only for Hot Standby and logical replication? */
 #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_HOT_STANDBY)
 
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 64ba55355b9..c78a2fbfae8 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -209,6 +209,7 @@ typedef struct xl_parameter_change
 	int			max_prepared_xacts;
 	int			max_locks_per_xact;
 	int			wal_level;
+	bool		wal_log_hintbits;
 } xl_parameter_change;
 
 /* logs restore point */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 759395a24b9..b7610e8c3f8 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201311261
+#define CATALOG_VERSION_NO	201312131
 
 #endif
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index 637221e6347..8f23508ad0f 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -171,6 +171,7 @@ typedef struct ControlFileData
 	 * or hot standby.
 	 */
 	int			wal_level;
+	bool		wal_log_hintbits;
 	int			MaxConnections;
 	int			max_worker_processes;
 	int			max_prepared_xacts;
-- 
GitLab