From 1578d13dc76d8e40cb0ef04d525af406357bd57d Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Tue, 29 Jul 2014 10:33:15 +0300
Subject: [PATCH] Treat 2PC commit/abort the same as regular xacts in recovery.

There were several oversights in recovery code where COMMIT/ABORT PREPARED
records were ignored:

* pg_last_xact_replay_timestamp() (wasn't updated for 2PC commits)
* recovery_min_apply_delay (2PC commits were applied immediately)
* recovery_target_xid (recovery would not stop if the XID used 2PC)

The first of those was reported by Sergiy Zuban in bug #11032, analyzed by
Tom Lane and Andres Freund. The bug was always there, but was masked before
commit d19bd29f07aef9e508ff047d128a4046cc8bc1e2, because COMMIT PREPARED
always created an extra regular transaction that was WAL-logged.

Backpatch to all supported versions (older versions didn't have all the
features and therefore didn't have all of the above bugs).
---
 src/backend/access/transam/xlog.c | 32 +++++++++++++++++++++++++++----
 src/include/access/xact.h         |  3 +--
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7cba4a16d92..fde5da00bfb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5880,6 +5880,7 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 	bool		stopsHere;
 	uint8		record_info;
 	TimestampTz recordXtime;
+	TransactionId recordXid;
 	char		recordRPName[MAXFNAMELEN];
 
 	/* We only consider stopping at COMMIT, ABORT or RESTORE POINT records */
@@ -5892,6 +5893,7 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
 		recordXactCommitData = (xl_xact_commit_compact *) XLogRecGetData(record);
 		recordXtime = recordXactCommitData->xact_time;
+		recordXid = record->xl_xid;
 	}
 	else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
 	{
@@ -5899,6 +5901,15 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
 		recordXactCommitData = (xl_xact_commit *) XLogRecGetData(record);
 		recordXtime = recordXactCommitData->xact_time;
+		recordXid = record->xl_xid;
+	}
+	else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
+	{
+		xl_xact_commit_prepared *recordXactCommitData;
+
+		recordXactCommitData = (xl_xact_commit_prepared *) XLogRecGetData(record);
+		recordXtime = recordXactCommitData->crec.xact_time;
+		recordXid = recordXactCommitData->xid;
 	}
 	else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
 	{
@@ -5906,6 +5917,15 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
 		recordXactAbortData = (xl_xact_abort *) XLogRecGetData(record);
 		recordXtime = recordXactAbortData->xact_time;
+		recordXid = record->xl_xid;
+	}
+	else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
+	{
+		xl_xact_abort_prepared *recordXactAbortData;
+
+		recordXactAbortData = (xl_xact_abort_prepared *) XLogRecGetData(record);
+		recordXtime = recordXactAbortData->arec.xact_time;
+		recordXid = recordXactAbortData->xid;
 	}
 	else if (record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
 	{
@@ -5913,6 +5933,7 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
 		recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
 		recordXtime = recordRestorePointData->rp_time;
+		recordXid = InvalidTransactionId;
 		strlcpy(recordRPName, recordRestorePointData->rp_name, MAXFNAMELEN);
 	}
 	else
@@ -5941,7 +5962,7 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 		 * they complete. A higher numbered xid will complete before you about
 		 * 50% of the time...
 		 */
-		stopsHere = (record->xl_xid == recoveryTargetXid);
+		stopsHere = (recordXid == recoveryTargetXid);
 		if (stopsHere)
 			*includeThis = recoveryTargetInclusive;
 	}
@@ -5976,11 +5997,13 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
 	if (stopsHere)
 	{
-		recoveryStopXid = record->xl_xid;
+		recoveryStopXid = recordXid;
 		recoveryStopTime = recordXtime;
 		recoveryStopAfter = *includeThis;
 
-		if (record_info == XLOG_XACT_COMMIT_COMPACT || record_info == XLOG_XACT_COMMIT)
+		if (record_info == XLOG_XACT_COMMIT_COMPACT ||
+			record_info == XLOG_XACT_COMMIT ||
+			record_info == XLOG_XACT_COMMIT_PREPARED)
 		{
 			if (recoveryStopAfter)
 				ereport(LOG,
@@ -5993,7 +6016,8 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 								recoveryStopXid,
 								timestamptz_to_str(recoveryStopTime))));
 		}
-		else if (record_info == XLOG_XACT_ABORT)
+		else if (record_info == XLOG_XACT_ABORT ||
+				 record_info == XLOG_XACT_ABORT_PREPARED)
 		{
 			if (recoveryStopAfter)
 				ereport(LOG,
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 86dd6db777f..bd6962f11fc 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -177,8 +177,7 @@ typedef struct xl_xact_abort
 /*
  * COMMIT_PREPARED and ABORT_PREPARED are identical to COMMIT/ABORT records
  * except that we have to store the XID of the prepared transaction explicitly
- * --- the XID in the record header will be for the transaction doing the
- * COMMIT PREPARED or ABORT PREPARED command.
+ * --- the XID in the record header will be invalid.
  */
 
 typedef struct xl_xact_commit_prepared
-- 
GitLab