From 406d61835b97a801807913e0fc67eadd9c6a3ffa Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 7 Jul 2011 22:35:09 +0300
Subject: [PATCH] SSI has a race condition, where the order of commit sequence
 numbers of transactions might not match the order the work done in those
 transactions become visible to others. The logic in SSI, however, assumed
 that it does. Fix that by having two sequence numbers for each serializable
 transaction, one taken before a transaction becomes visible to others, and
 one after it. This is easier than trying to make the the transition totally
 atomic, which would require holding ProcArrayLock and
 SerializableXactHashLock at the same time. By using prepareSeqNo instead of
 commitSeqNo in a few places where commit sequence numbers are compared, we
 can make those comparisons err on the safe side when we don't know for sure
 which committed first.

Per analysis by Kevin Grittner and Dan Ports, but this approach to fix it
is different from the original patch.
---
 src/backend/storage/lmgr/predicate.c      | 29 +++++++++--------------
 src/include/storage/predicate_internals.h | 21 ++++++++++++++--
 2 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 7f6dcbb6ca6..d93de7de904 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -1190,6 +1190,7 @@ InitPredicateLocks(void)
 		}
 		PredXact->OldCommittedSxact = CreatePredXact();
 		SetInvalidVirtualTransactionId(PredXact->OldCommittedSxact->vxid);
+		PredXact->OldCommittedSxact->prepareSeqNo = 0;
 		PredXact->OldCommittedSxact->commitSeqNo = 0;
 		PredXact->OldCommittedSxact->SeqNo.lastCommitBeforeSnapshot = 0;
 		SHMQueueInit(&PredXact->OldCommittedSxact->outConflicts);
@@ -1650,6 +1651,7 @@ RegisterSerializableTransactionInt(Snapshot snapshot)
 	/* Initialize the structure. */
 	sxact->vxid = vxid;
 	sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo;
+	sxact->prepareSeqNo = InvalidSerCommitSeqNo;
 	sxact->commitSeqNo = InvalidSerCommitSeqNo;
 	SHMQueueInit(&(sxact->outConflicts));
 	SHMQueueInit(&(sxact->inConflicts));
@@ -3267,8 +3269,8 @@ ReleasePredicateLocks(bool isCommit)
 			&& SxactIsCommitted(conflict->sxactIn))
 		{
 			if ((MySerializableXact->flags & SXACT_FLAG_CONFLICT_OUT) == 0
-				|| conflict->sxactIn->commitSeqNo < MySerializableXact->SeqNo.earliestOutConflictCommit)
-				MySerializableXact->SeqNo.earliestOutConflictCommit = conflict->sxactIn->commitSeqNo;
+				|| conflict->sxactIn->prepareSeqNo < MySerializableXact->SeqNo.earliestOutConflictCommit)
+				MySerializableXact->SeqNo.earliestOutConflictCommit = conflict->sxactIn->prepareSeqNo;
 			MySerializableXact->flags |= SXACT_FLAG_CONFLICT_OUT;
 		}
 
@@ -4407,18 +4409,13 @@ OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader,
 		{
 			SERIALIZABLEXACT *t2 = conflict->sxactIn;
 
-			/*
-			 * Note that if T2 is merely prepared but not yet committed, we
-			 * rely on t->commitSeqNo being InvalidSerCommitSeqNo, which is
-			 * larger than any valid commit sequence number.
-			 */
 			if (SxactIsPrepared(t2)
 				&& (!SxactIsCommitted(reader)
-					|| t2->commitSeqNo <= reader->commitSeqNo)
+					|| t2->prepareSeqNo <= reader->commitSeqNo)
 				&& (!SxactIsCommitted(writer)
-					|| t2->commitSeqNo <= writer->commitSeqNo)
+					|| t2->prepareSeqNo <= writer->commitSeqNo)
 				&& (!SxactIsReadOnly(reader)
-			   || t2->commitSeqNo <= reader->SeqNo.lastCommitBeforeSnapshot))
+			   || t2->prepareSeqNo <= reader->SeqNo.lastCommitBeforeSnapshot))
 			{
 				failure = true;
 				break;
@@ -4459,17 +4456,11 @@ OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader,
 		{
 			SERIALIZABLEXACT *t0 = conflict->sxactOut;
 
-			/*
-			 * Note that if the writer is merely prepared but not yet
-			 * committed, we rely on writer->commitSeqNo being
-			 * InvalidSerCommitSeqNo, which is larger than any valid commit
-			 * sequence number.
-			 */
 			if (!SxactIsDoomed(t0)
 				&& (!SxactIsCommitted(t0)
-					|| t0->commitSeqNo >= writer->commitSeqNo)
+					|| t0->commitSeqNo >= writer->prepareSeqNo)
 				&& (!SxactIsReadOnly(t0)
-			   || t0->SeqNo.lastCommitBeforeSnapshot >= writer->commitSeqNo))
+			   || t0->SeqNo.lastCommitBeforeSnapshot >= writer->prepareSeqNo))
 			{
 				failure = true;
 				break;
@@ -4608,6 +4599,7 @@ PreCommit_CheckForSerializationFailure(void)
 						 offsetof(RWConflictData, inLink));
 	}
 
+	MySerializableXact->prepareSeqNo = ++(PredXact->LastSxactCommitSeqNo);
 	MySerializableXact->flags |= SXACT_FLAG_PREPARED;
 
 	LWLockRelease(SerializableXactHashLock);
@@ -4782,6 +4774,7 @@ predicatelock_twophase_recover(TransactionId xid, uint16 info,
 		sxact->pid = 0;
 
 		/* a prepared xact hasn't committed yet */
+		sxact->prepareSeqNo = RecoverySerCommitSeqNo;
 		sxact->commitSeqNo = InvalidSerCommitSeqNo;
 		sxact->finishedBefore = InvalidTransactionId;
 
diff --git a/src/include/storage/predicate_internals.h b/src/include/storage/predicate_internals.h
index 0c90f275d65..fcbf2d8a220 100644
--- a/src/include/storage/predicate_internals.h
+++ b/src/include/storage/predicate_internals.h
@@ -57,9 +57,26 @@ typedef struct SERIALIZABLEXACT
 {
 	VirtualTransactionId vxid;	/* The executing process always has one of
 								 * these. */
+
+	/*
+	 * We use two numbers to track the order that transactions commit. Before
+	 * commit, a transaction is marked as prepared, and prepareSeqNo is set.
+	 * Shortly after commit, it's marked as committed, and commitSeqNo is set.
+	 * This doesn't give a strict commit order, but these two values together
+	 * are good enough for us, as we can always err on the safe side and
+	 * assume that there's a conflict, if we can't be sure of the exact
+	 * ordering of two commits.
+	 *
+	 * Note that a transaction is marked as prepared for a short period during
+	 * commit processing, even if two-phase commit is not used. But with
+	 * two-phase commit, a transaction can stay in prepared state for some
+	 * time.
+	 */
+	SerCommitSeqNo prepareSeqNo;
 	SerCommitSeqNo commitSeqNo;
-	union						/* these values are not both interesting at
-								 * the same time */
+
+	/* these values are not both interesting at the same time */
+	union
 	{
 		SerCommitSeqNo earliestOutConflictCommit;		/* when committed with
 														 * conflict out */
-- 
GitLab