From 6b61955135e94b39d85571fdbb0c5a749af767f1 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Tue, 29 Sep 2015 14:40:56 -0300
Subject: [PATCH] Code review for transaction commit timestamps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are three main changes here:

1. No longer cause a start failure in a standby if the feature is
disabled in postgresql.conf but enabled in the master.  This reverts one
part of commit 4f3924d9cd43; what we keep is the ability of the standby
to activate/deactivate the module (which includes creating and removing
segments as appropriate) during replay of such actions in the master.

2. Replay WAL records affecting commitTS even if the feature is
disabled.  This means the standby will always have the same state as the
master after replay.

3. Have COMMIT PREPARE record the transaction commit time as well.  We
were previously only applying it in the normal transaction commit path.

Author: Petr Jelínek
Discussion: http://www.postgresql.org/message-id/CAHGQGwHereDzzzmfxEBYcVQu3oZv6vZcgu1TPeERWbDc+gQ06g@mail.gmail.com
Discussion: http://www.postgresql.org/message-id/CAHGQGwFuzfO4JscM9LCAmCDCxp_MfLvN4QdB+xWsS-FijbjTYQ@mail.gmail.com

Additionally, I cleaned up nearby code related to replication origins,
which I found a bit hard to follow, and fixed a couple of typos.

Backpatch to 9.5, where this code was introduced.

Per bug reports from Fujii Masao and subsequent discussion.
---
 src/backend/access/rmgrdesc/xlogdesc.c |  2 +-
 src/backend/access/transam/commit_ts.c | 29 +++++++++++------
 src/backend/access/transam/twophase.c  | 41 ++++++++++++++++++++---
 src/backend/access/transam/xact.c      | 45 +++++++++++++++++---------
 src/backend/access/transam/xlog.c      | 16 ---------
 src/include/access/commit_ts.h         |  5 +--
 6 files changed, 90 insertions(+), 48 deletions(-)

diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 4f2913683a1..83cc9e896eb 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -111,7 +111,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
 		appendStringInfo(buf, "max_connections=%d max_worker_processes=%d "
 						 "max_prepared_xacts=%d max_locks_per_xact=%d "
 						 "wal_level=%s wal_log_hints=%s "
-						 "track_commit_timestamps=%s",
+						 "track_commit_timestamp=%s",
 						 xlrec.MaxConnections,
 						 xlrec.max_worker_processes,
 						 xlrec.max_prepared_xacts,
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 33136e3c1d9..78090c5f09c 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -122,29 +122,39 @@ static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
  * subtrans implementation changes in the future, we might want to revisit the
  * decision of storing timestamp info for each subxid.
  *
- * The do_xlog parameter tells us whether to include an XLog record of this
- * or not.  Normal path through RecordTransactionCommit() will be related
- * to a transaction commit XLog record, and so should pass "false" here.
- * Other callers probably want to pass true, so that the given values persist
- * in case of crashes.
+ * The replaying_xlog parameter indicates whether the module should execute
+ * its write even if the feature is nominally disabled, because we're replaying
+ * a record generated from a master where the feature is enabled.
+ *
+ * The write_xlog parameter tells us whether to include an XLog record of this
+ * or not.  Normally, this is called from transaction commit routines (both
+ * normal and prepared) and the information will be stored in the transaction
+ * commit XLog record, and so they should pass "false" for this.  The XLog redo
+ * code should use "false" here as well.  Other callers probably want to pass
+ * true, so that the given values persist in case of crashes.
  */
 void
 TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
 							   TransactionId *subxids, TimestampTz timestamp,
-							   RepOriginId nodeid, bool do_xlog)
+							   RepOriginId nodeid,
+							   bool replaying_xlog, bool write_xlog)
 {
 	int			i;
 	TransactionId headxid;
 	TransactionId newestXact;
 
-	if (!track_commit_timestamp)
+	/* We'd better not try to write xlog during replay */
+	Assert(!(write_xlog && replaying_xlog));
+
+	/* No-op if feature not enabled, unless replaying WAL */
+	if (!track_commit_timestamp && !replaying_xlog)
 		return;
 
 	/*
 	 * Comply with the WAL-before-data rule: if caller specified it wants this
 	 * value to be recorded in WAL, do so before touching the data.
 	 */
-	if (do_xlog)
+	if (write_xlog)
 		WriteSetTimestampXlogRec(xid, nsubxids, subxids, timestamp, nodeid);
 
 	/*
@@ -906,7 +916,8 @@ commit_ts_redo(XLogReaderState *record)
 			subxids = NULL;
 
 		TransactionTreeSetCommitTsData(setts->mainxid, nsubxids, subxids,
-									 setts->timestamp, setts->nodeid, false);
+									   setts->timestamp, setts->nodeid, false,
+									   true);
 		if (subxids)
 			pfree(subxids);
 	}
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index d48d101340f..e005cc558ab 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -41,6 +41,7 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "access/commit_ts.h"
 #include "access/htup_details.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
@@ -56,8 +57,9 @@
 #include "miscadmin.h"
 #include "pg_trace.h"
 #include "pgstat.h"
-#include "replication/walsender.h"
+#include "replication/origin.h"
 #include "replication/syncrep.h"
+#include "replication/walsender.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/predicate.h"
@@ -2070,8 +2072,9 @@ RecoverPreparedTransactions(void)
 /*
  *	RecordTransactionCommitPrepared
  *
- * This is basically the same as RecordTransactionCommit: in particular,
- * we must set the delayChkpt flag to avoid a race condition.
+ * This is basically the same as RecordTransactionCommit (q.v. if you change
+ * this function): in particular, we must set the delayChkpt flag to avoid a
+ * race condition.
  *
  * We know the transaction made at least one XLOG entry (its PREPARE),
  * so it is never possible to optimize out the commit record.
@@ -2087,6 +2090,15 @@ RecordTransactionCommitPrepared(TransactionId xid,
 								bool initfileinval)
 {
 	XLogRecPtr	recptr;
+	TimestampTz committs = GetCurrentTimestamp();
+	bool		replorigin;
+
+	/*
+	 * Are we using the replication origins feature?  Or, in other words, are
+	 * we replaying remote actions?
+	 */
+	replorigin = (replorigin_session_origin != InvalidRepOriginId &&
+				  replorigin_session_origin != DoNotReplicateId);
 
 	START_CRIT_SECTION();
 
@@ -2094,12 +2106,33 @@ RecordTransactionCommitPrepared(TransactionId xid,
 	MyPgXact->delayChkpt = true;
 
 	/* Emit the XLOG commit record */
-	recptr = XactLogCommitRecord(GetCurrentTimestamp(),
+	recptr = XactLogCommitRecord(committs,
 								 nchildren, children, nrels, rels,
 								 ninvalmsgs, invalmsgs,
 								 initfileinval, false,
 								 xid);
 
+
+	if (replorigin)
+		/* Move LSNs forward for this replication origin */
+		replorigin_session_advance(replorigin_session_origin_lsn,
+								   XactLastRecEnd);
+
+	/*
+	 * Record commit timestamp.  The value comes from plain commit timestamp
+	 * if replorigin is not enabled, or replorigin already set a value for us
+	 * in replorigin_session_origin_timestamp otherwise.
+	 *
+	 * We don't need to WAL-log anything here, as the commit record written
+	 * above already contains the data.
+	 */
+	if (!replorigin || replorigin_session_origin_timestamp == 0)
+		replorigin_session_origin_timestamp = committs;
+
+	TransactionTreeSetCommitTsData(xid, nchildren, children,
+								   replorigin_session_origin_timestamp,
+								   replorigin_session_origin, false, false);
+
 	/*
 	 * We don't currently try to sleep before flush here ... nor is there any
 	 * support for async commit of a prepared xact (the very idea is probably
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 068214d83e0..8f56a44d06e 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -42,9 +42,9 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "replication/logical.h"
-#include "replication/walsender.h"
-#include "replication/syncrep.h"
 #include "replication/origin.h"
+#include "replication/syncrep.h"
+#include "replication/walsender.h"
 #include "storage/fd.h"
 #include "storage/lmgr.h"
 #include "storage/predicate.h"
@@ -1119,6 +1119,8 @@ AtSubStart_ResourceOwner(void)
  *
  * Returns latest XID among xact and its children, or InvalidTransactionId
  * if the xact has no XID.  (We compute that here just because it's easier.)
+ *
+ * If you change this function, see RecordTransactionCommitPrepared also.
  */
 static TransactionId
 RecordTransactionCommit(void)
@@ -1172,6 +1174,15 @@ RecordTransactionCommit(void)
 	}
 	else
 	{
+		bool		replorigin;
+
+		/*
+		 * Are we using the replication origins feature?  Or, in other words,
+		 * are we replaying remote actions?
+		 */
+		replorigin = (replorigin_session_origin != InvalidRepOriginId &&
+					  replorigin_session_origin != DoNotReplicateId);
+
 		/*
 		 * Begin commit critical section and insert the commit XLOG record.
 		 */
@@ -1206,26 +1217,28 @@ RecordTransactionCommit(void)
 							RelcacheInitFileInval, forceSyncCommit,
 							InvalidTransactionId /* plain commit */ );
 
-		/*
-		 * Record plain commit ts if not replaying remote actions, or if no
-		 * timestamp is configured.
-		 */
-		if (replorigin_session_origin == InvalidRepOriginId ||
-			replorigin_session_origin == DoNotReplicateId ||
-			replorigin_session_origin_timestamp == 0)
-			replorigin_session_origin_timestamp = xactStopTimestamp;
-		else
+		if (replorigin)
+			/* Move LSNs forward for this replication origin */
 			replorigin_session_advance(replorigin_session_origin_lsn,
 									   XactLastRecEnd);
 
 		/*
-		 * We don't need to WAL log origin or timestamp here, the commit
-		 * record contains all the necessary information and will redo the SET
-		 * action during replay.
+		 * Record commit timestamp.  The value comes from plain commit
+		 * timestamp if there's no replication origin; otherwise, the
+		 * timestamp was already set in replorigin_session_origin_timestamp by
+		 * replication.
+		 *
+		 * We don't need to WAL-log anything here, as the commit record
+		 * written above already contains the data.
 		 */
+
+		if (!replorigin || replorigin_session_origin_timestamp == 0)
+			replorigin_session_origin_timestamp = xactStopTimestamp;
+
 		TransactionTreeSetCommitTsData(xid, nchildren, children,
 									   replorigin_session_origin_timestamp,
-									   replorigin_session_origin, false);
+									   replorigin_session_origin,
+									   false, false);
 	}
 
 	/*
@@ -5321,7 +5334,7 @@ xact_redo_commit(xl_xact_parsed_commit *parsed,
 	/* Set the transaction commit timestamp and metadata */
 	TransactionTreeSetCommitTsData(xid, parsed->nsubxacts, parsed->subxacts,
 								   commit_time, origin_id,
-								   false);
+								   true, false);
 
 	if (standbyState == STANDBY_DISABLED)
 	{
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 22e6a214144..0266d61bbdb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5826,19 +5826,6 @@ do { \
 						minValue))); \
 } while(0)
 
-#define RecoveryRequiresBoolParameter(param_name, currValue, masterValue) \
-do { \
-	bool _currValue = (currValue); \
-	bool _masterValue = (masterValue); \
-	if (_currValue != _masterValue) \
-		ereport(ERROR, \
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
-				 errmsg("hot standby is not possible because it requires \"%s\" to be same on master and standby (master has \"%s\", standby has \"%s\")", \
-						param_name, \
-						_masterValue ? "true" : "false", \
-						_currValue ? "true" : "false"))); \
-} while(0)
-
 /*
  * Check to see if required parameters are set high enough on this server
  * for various aspects of recovery operation.
@@ -5885,9 +5872,6 @@ CheckRequiredParameterValues(void)
 		RecoveryRequiresIntParameter("max_locks_per_transaction",
 									 max_locks_per_xact,
 									 ControlFile->max_locks_per_xact);
-		RecoveryRequiresBoolParameter("track_commit_timestamp",
-									  track_commit_timestamp,
-									  ControlFile->track_commit_timestamp);
 	}
 }
 
diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h
index bd05ab4d5ce..dc865d1bc3d 100644
--- a/src/include/access/commit_ts.h
+++ b/src/include/access/commit_ts.h
@@ -24,7 +24,8 @@ extern bool check_track_commit_timestamp(bool *newval, void **extra,
 
 extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
 							   TransactionId *subxids, TimestampTz timestamp,
-							   RepOriginId nodeid, bool do_xlog);
+							   RepOriginId nodeid,
+							   bool replaying_xlog, bool write_xlog);
 extern bool TransactionIdGetCommitTsData(TransactionId xid,
 							 TimestampTz *ts, RepOriginId *nodeid);
 extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
@@ -67,4 +68,4 @@ extern void commit_ts_redo(XLogReaderState *record);
 extern void commit_ts_desc(StringInfo buf, XLogReaderState *record);
 extern const char *commit_ts_identify(uint8 info);
 
-#endif   /* COMMITTS_H */
+#endif   /* COMMIT_TS_H */
-- 
GitLab