From ed5c4e4a14e9f9f4b818521b943bace0d5cd1e01 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Mon, 16 Jul 2001 22:43:34 +0000 Subject: [PATCH] Improve documentation about reasoning behind the order of operations in GetSnapshotData, GetNewTransactionId, CommitTransaction, AbortTransaction, etc. Correct race condition in transaction status testing in HeapTupleSatisfiesVacuum --- this wasn't important for old VACUUM with exclusive lock on its table, but it sure is important now. All per pghackers discussion 7/11/01 and 7/12/01. --- src/backend/access/transam/varsup.c | 14 +++++-- src/backend/access/transam/xact.c | 54 +++++++++++++++----------- src/backend/storage/ipc/sinval.c | 60 +++++++++++++++++++++++------ src/backend/utils/time/tqual.c | 31 +++++++++------ 4 files changed, 110 insertions(+), 49 deletions(-) diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 2b253fc5855..857f2c3d3e8 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -6,7 +6,7 @@ * Copyright (c) 2000, PostgreSQL Global Development Group * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.41 2001/07/12 04:11:13 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.42 2001/07/16 22:43:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -60,8 +60,16 @@ GetNewTransactionId(TransactionId *xid) * XXX by storing xid into MyProc without acquiring SInvalLock, we are * relying on fetch/store of an xid to be atomic, else other backends * might see a partially-set xid here. But holding both locks at once - * would be a nasty concurrency hit (and at this writing, could cause a - * deadlock against GetSnapshotData). So for now, assume atomicity. + * would be a nasty concurrency hit (and in fact could cause a deadlock + * against GetSnapshotData). So for now, assume atomicity. Note that + * readers of PROC xid field should be careful to fetch the value only + * once, rather than assume they can read it multiple times and get the + * same answer each time. + * + * A solution to the atomic-store problem would be to give each PROC + * its own spinlock used only for fetching/storing that PROC's xid. + * (SInvalLock would then mean primarily that PROCs couldn't be added/ + * removed while holding the lock.) */ if (MyProc != (PROC *) NULL) MyProc->xid = *xid; diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index d32a6dda978..f35e8d9203e 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.107 2001/07/15 22:48:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.108 2001/07/16 22:43:33 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -1018,16 +1018,21 @@ CommitTransaction(void) CloseSequences(); AtEOXact_portals(); + + /* Here is where we really truly commit. */ RecordTransactionCommit(); /* * Let others know about no transaction in progress by me. Note that - * this must be done _before_ releasing locks we hold and + * this must be done _before_ releasing locks we hold and _after_ + * RecordTransactionCommit. + * * SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked * by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot - * if xid 2' GetSnapshotData sees xid 1 as running then it must see * xid 0 as running as well or it will see two tuple versions - one - * deleted by xid 1 and one inserted by xid 0. + * deleted by xid 1 and one inserted by xid 0. See notes in + * GetSnapshotData. */ if (MyProc != (PROC *) NULL) { @@ -1038,6 +1043,12 @@ CommitTransaction(void) SpinRelease(SInvalLock); } + /* + * This is all post-commit cleanup. Note that if an error is raised + * here, it's too late to abort the transaction. This should be just + * noncritical resource releasing. + */ + RelationPurgeLocalRelation(true); AtEOXact_temp_relations(true); smgrDoPendingDeletes(true); @@ -1080,23 +1091,6 @@ AbortTransaction(void) /* Prevent cancel/die interrupt while cleaning up */ HOLD_INTERRUPTS(); - /* - * Let others to know about no transaction in progress - vadim - * 11/26/96 - * - * XXX it'd be nice to acquire SInvalLock for this, but too much risk of - * lockup: what if we were holding SInvalLock when we elog'd? Net effect - * is that we are relying on fetch/store of an xid to be atomic, else - * other backends might see a partially-zeroed xid here. Would it be - * safe to release spins before we reset xid/xmin? But see also - * GetNewTransactionId, which does the same thing. - */ - if (MyProc != (PROC *) NULL) - { - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; - } - /* * Release any spinlocks or buffer context locks we might be holding * as quickly as possible. (Real locks, however, must be held till we @@ -1143,10 +1137,23 @@ AbortTransaction(void) AtAbort_Notify(); CloseSequences(); AtEOXact_portals(); + + /* Advertise the fact that we aborted in pg_log. */ RecordTransactionAbort(); - /* Count transaction abort in statistics collector */ - pgstat_count_xact_rollback(); + /* + * Let others know about no transaction in progress by me. Note that + * this must be done _before_ releasing locks we hold and _after_ + * RecordTransactionAbort. + */ + if (MyProc != (PROC *) NULL) + { + /* Lock SInvalLock because that's what GetSnapshotData uses. */ + SpinAcquire(SInvalLock); + MyProc->xid = InvalidTransactionId; + MyProc->xmin = InvalidTransactionId; + SpinRelease(SInvalLock); + } RelationPurgeLocalRelation(false); AtEOXact_temp_relations(false); @@ -1165,6 +1172,9 @@ AbortTransaction(void) SharedBufferChanged = false;/* safest place to do it */ + /* Count transaction abort in statistics collector */ + pgstat_count_xact_rollback(); + /* * State remains TRANS_ABORT until CleanupTransaction(). */ diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index a9b9046702c..a93e91237f6 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.36 2001/07/12 04:11:13 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.37 2001/07/16 22:43:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -193,8 +193,10 @@ TransactionIdIsInProgress(TransactionId xid) if (pOffset != INVALID_OFFSET) { PROC *proc = (PROC *) MAKE_PTR(pOffset); + /* Fetch xid just once - see GetNewTransactionId */ + TransactionId pxid = proc->xid; - if (TransactionIdEquals(proc->xid, xid)) + if (TransactionIdEquals(pxid, xid)) { result = true; break; @@ -236,9 +238,9 @@ GetXmaxRecent(TransactionId *XmaxRecent) if (pOffset != INVALID_OFFSET) { PROC *proc = (PROC *) MAKE_PTR(pOffset); - TransactionId xid; + /* Fetch xid just once - see GetNewTransactionId */ + TransactionId xid = proc->xid; - xid = proc->xid; if (! TransactionIdIsSpecial(xid)) { if (TransactionIdPrecedes(xid, result)) @@ -256,8 +258,19 @@ GetXmaxRecent(TransactionId *XmaxRecent) *XmaxRecent = result; } -/* +/*---------- * GetSnapshotData -- returns information about running transactions. + * + * The returned snapshot includes xmin (lowest still-running xact ID), + * xmax (next xact ID to be assigned), and a list of running xact IDs + * in the range xmin <= xid < xmax. It is used as follows: + * All xact IDs < xmin are considered finished. + * All xact IDs >= xmax are considered still running. + * For an xact ID xmin <= xid < xmax, consult list to see whether + * it is considered running or not. + * This ensures that the set of transactions seen as "running" by the + * current xact will not change after it takes the snapshot. + *---------- */ Snapshot GetSnapshotData(bool serializable) @@ -287,12 +300,33 @@ GetSnapshotData(bool serializable) elog(ERROR, "Memory exhausted in GetSnapshotData"); } - /* - * Unfortunately, we have to call ReadNewTransactionId() after - * acquiring SInvalLock above. It's not good because - * ReadNewTransactionId() does SpinAcquire(XidGenLockId) but - * _necessary_. + /*-------------------- + * Unfortunately, we have to call ReadNewTransactionId() after acquiring + * SInvalLock above. It's not good because ReadNewTransactionId() does + * SpinAcquire(XidGenLockId), but *necessary*. We need to be sure that + * no transactions exit the set of currently-running transactions + * between the time we fetch xmax and the time we finish building our + * snapshot. Otherwise we could have a situation like this: + * + * 1. Tx Old is running (in Read Committed mode). + * 2. Tx S reads new transaction ID into xmax, then + * is swapped out before acquiring SInvalLock. + * 3. Tx New gets new transaction ID (>= S' xmax), + * makes changes and commits. + * 4. Tx Old changes some row R changed by Tx New and commits. + * 5. Tx S finishes getting its snapshot data. It sees Tx Old as + * done, but sees Tx New as still running (since New >= xmax). + * + * Now S will see R changed by both Tx Old and Tx New, *but* does not + * see other changes made by Tx New. If S is supposed to be in + * Serializable mode, this is wrong. + * + * By locking SInvalLock before we read xmax, we ensure that TX Old + * cannot exit the set of running transactions seen by Tx S. Therefore + * both Old and New will be seen as still running => no inconsistency. + *-------------------- */ + ReadNewTransactionId(&(snapshot->xmax)); for (index = 0; index < segP->lastBackend; index++) @@ -302,6 +336,7 @@ GetSnapshotData(bool serializable) if (pOffset != INVALID_OFFSET) { PROC *proc = (PROC *) MAKE_PTR(pOffset); + /* Fetch xid just once - see GetNewTransactionId */ TransactionId xid = proc->xid; /* @@ -325,11 +360,12 @@ GetSnapshotData(bool serializable) if (serializable) MyProc->xmin = snapshot->xmin; - /* Serializable snapshot must be computed before any other... */ - Assert(MyProc->xmin != InvalidTransactionId); SpinRelease(SInvalLock); + /* Serializable snapshot must be computed before any other... */ + Assert(MyProc->xmin != InvalidTransactionId); + snapshot->xcnt = count; return snapshot; } diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index 2dd56b6f08e..35113a36228 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.38 2001/07/12 04:11:13 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.39 2001/07/16 22:43:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -610,6 +610,13 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent) * * If the inserting transaction aborted, then the tuple was never visible * to any other transaction, so we can delete it immediately. + * + * NOTE: must check TransactionIdIsInProgress (which looks in shared mem) + * before TransactionIdDidCommit/TransactionIdDidAbort (which look in + * pg_log). Otherwise we have a race condition where we might decide + * that a just-committed transaction crashed, because none of the tests + * succeed. xact.c is careful to record commit/abort in pg_log before + * it unsets MyProc->xid in shared memory. */ if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { @@ -636,19 +643,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent) } tuple->t_infomask |= HEAP_XMIN_COMMITTED; } + else if (TransactionIdIsInProgress(tuple->t_xmin)) + return HEAPTUPLE_INSERT_IN_PROGRESS; + else if (TransactionIdDidCommit(tuple->t_xmin)) + tuple->t_infomask |= HEAP_XMIN_COMMITTED; else if (TransactionIdDidAbort(tuple->t_xmin)) { tuple->t_infomask |= HEAP_XMIN_INVALID; return HEAPTUPLE_DEAD; } - else if (TransactionIdDidCommit(tuple->t_xmin)) - tuple->t_infomask |= HEAP_XMIN_COMMITTED; - else if (TransactionIdIsInProgress(tuple->t_xmin)) - return HEAPTUPLE_INSERT_IN_PROGRESS; else { /* - * Not Aborted, Not Committed, Not in Progress - + * Not in Progress, Not Committed, Not Aborted - * so it's from crashed process. - vadim 11/26/96 */ tuple->t_infomask |= HEAP_XMIN_INVALID; @@ -667,19 +674,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent) if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdDidAbort(tuple->t_xmax)) + if (TransactionIdIsInProgress(tuple->t_xmax)) + return HEAPTUPLE_DELETE_IN_PROGRESS; + else if (TransactionIdDidCommit(tuple->t_xmax)) + tuple->t_infomask |= HEAP_XMAX_COMMITTED; + else if (TransactionIdDidAbort(tuple->t_xmax)) { tuple->t_infomask |= HEAP_XMAX_INVALID; return HEAPTUPLE_LIVE; } - else if (TransactionIdDidCommit(tuple->t_xmax)) - tuple->t_infomask |= HEAP_XMAX_COMMITTED; - else if (TransactionIdIsInProgress(tuple->t_xmax)) - return HEAPTUPLE_DELETE_IN_PROGRESS; else { /* - * Not Aborted, Not Committed, Not in Progress - + * Not in Progress, Not Committed, Not Aborted - * so it's from crashed process. - vadim 06/02/97 */ tuple->t_infomask |= HEAP_XMAX_INVALID; -- GitLab