diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index de7b3fc80cf67a2dd558a30edf95786a5149ab62..6f6611210fe4b8ac722de5032384598871c718ca 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -10,13 +10,14 @@ * the passed-in buffer. The caller must hold not only a pin, but at least * shared buffer content lock on the buffer containing the tuple. * - * NOTE: must check TransactionIdIsInProgress (which looks in PGXACT array) + * NOTE: When using a non-MVCC snapshot, we must check + * TransactionIdIsInProgress (which looks in the PGXACT array) * before TransactionIdDidCommit/TransactionIdDidAbort (which look in * pg_clog). Otherwise we have a race condition: we might decide that a * just-committed transaction crashed, because none of the tests succeed. * xact.c is careful to record commit/abort in pg_clog before it unsets - * MyPgXact->xid in PGXACT array. That fixes that problem, but it also - * means there is a window where TransactionIdIsInProgress and + * MyPgXact->xid in the PGXACT array. That fixes that problem, but it + * also means there is a window where TransactionIdIsInProgress and * TransactionIdDidCommit will both return true. If we check only * TransactionIdDidCommit, we could consider a tuple committed when a * later GetSnapshotData call will still think the originating transaction @@ -26,6 +27,11 @@ * subtransactions of our own main transaction and so there can't be any * race condition. * + * When using an MVCC snapshot, we rely on XidInMVCCSnapshot rather than + * TransactionIdIsInProgress, but the logic is otherwise the same: do not + * check pg_clog until after deciding that the xact is no longer in progress. + * + * * Summary of visibility functions: * * HeapTupleSatisfiesMVCC() @@ -936,9 +942,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, * transactions started after the snapshot was taken * changes made by the current command * - * (Notice, however, that the tuple status hint bits will be updated on the - * basis of the true state of the transaction, even if we then pretend we - * can't see it.) + * Notice that here, we will not update the tuple status hint bits if the + * inserting/deleting transaction is still running according to our snapshot, + * even if in reality it's committed or aborted by now. This is intentional. + * Checking the true transaction state would require access to high-traffic + * shared data structures, creating contention we'd rather do without, and it + * would not change the result of our visibility check anyway. The hint bits + * will be updated by the first visitor that has a snapshot new enough to see + * the inserting/deleting transaction as done. In the meantime, the cost of + * leaving the hint bits unset is basically that each HeapTupleSatisfiesMVCC + * call will need to run TransactionIdIsCurrentTransactionId in addition to + * XidInMVCCSnapshot (but it would have to do the latter anyway). In the old + * coding where we tried to set the hint bits as soon as possible, we instead + * did TransactionIdIsInProgress in each call --- to no avail, as long as the + * inserting/deleting transaction was still running --- which was more cycles + * and more contention on the PGXACT array. */ bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, @@ -961,7 +979,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, if (TransactionIdIsCurrentTransactionId(xvac)) return false; - if (!TransactionIdIsInProgress(xvac)) + if (!XidInMVCCSnapshot(xvac, snapshot)) { if (TransactionIdDidCommit(xvac)) { @@ -980,7 +998,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, if (!TransactionIdIsCurrentTransactionId(xvac)) { - if (TransactionIdIsInProgress(xvac)) + if (XidInMVCCSnapshot(xvac, snapshot)) return false; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, @@ -1035,7 +1053,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, else return false; /* deleted before scan started */ } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) return false; else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, @@ -1048,14 +1066,15 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, return false; } } + else + { + /* xmin is committed, but maybe not according to our snapshot */ + if (!HeapTupleHeaderXminFrozen(tuple) && + XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + return false; /* treat as still in progress */ + } - /* - * By here, the inserting transaction has committed - have to check - * when... - */ - if (!HeapTupleHeaderXminFrozen(tuple) - && XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) - return false; /* treat as still in progress */ + /* by here, the inserting transaction has committed */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return true; @@ -1082,15 +1101,10 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, else return false; /* deleted before scan started */ } - if (TransactionIdIsInProgress(xmax)) + if (XidInMVCCSnapshot(xmax, snapshot)) return true; if (TransactionIdDidCommit(xmax)) - { - /* updating transaction committed, but when? */ - if (XidInMVCCSnapshot(xmax, snapshot)) - return true; /* treat as still in progress */ - return false; - } + return false; /* updating transaction committed */ /* it must have aborted or crashed */ return true; } @@ -1105,7 +1119,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, return false; /* deleted before scan started */ } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) return true; if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) @@ -1120,12 +1134,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetRawXmax(tuple)); } + else + { + /* xmax is committed, but maybe not according to our snapshot */ + if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + return true; /* treat as still in progress */ + } - /* - * OK, the deleting transaction committed too ... but when? - */ - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) - return true; /* treat as still in progress */ + /* xmax transaction committed */ return false; } @@ -1383,14 +1399,15 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, /* * HeapTupleIsSurelyDead * - * Determine whether a tuple is surely dead. We sometimes use this - * in lieu of HeapTupleSatisifesVacuum when the tuple has just been - * tested by HeapTupleSatisfiesMVCC and, therefore, any hint bits that - * can be set should already be set. We assume that if no hint bits - * either for xmin or xmax, the transaction is still running. This is - * therefore faster than HeapTupleSatisfiesVacuum, because we don't - * consult CLOG (and also because we don't need to give an exact answer, - * just whether or not the tuple is surely dead). + * Cheaply determine whether a tuple is surely dead to all onlookers. + * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the + * tuple has just been tested by another visibility routine (usually + * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set + * should already be set. We assume that if no hint bits are set, the xmin + * or xmax transaction is still running. This is therefore faster than + * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG. + * It's okay to return FALSE when in doubt, but we must return TRUE only + * if the tuple is removable. */ bool HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin) @@ -1443,8 +1460,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin) * * Note: GetSnapshotData never stores either top xid or subxids of our own * backend into a snapshot, so these xids will not be reported as "running" - * by this function. This is OK for current uses, because we actually only - * apply this for known-committed XIDs. + * by this function. This is OK for current uses, because we always check + * TransactionIdIsCurrentTransactionId first, except for known-committed + * XIDs which could not be ours anyway. */ static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) @@ -1481,7 +1499,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) */ if (!snapshot->suboverflowed) { - /* full data, so search subxip */ + /* we have full data, so search subxip */ int32 j; for (j = 0; j < snapshot->subxcnt; j++) @@ -1494,7 +1512,10 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) } else { - /* overflowed, so convert xid to top-level */ + /* + * Snapshot overflowed, so convert xid to top-level. This is safe + * because we eliminated too-old XIDs above. + */ xid = SubTransGetTopmostTransaction(xid); /* @@ -1525,7 +1546,10 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) */ if (snapshot->suboverflowed) { - /* overflowed, so convert xid to top-level */ + /* + * Snapshot overflowed, so convert xid to top-level. This is safe + * because we eliminated too-old XIDs above. + */ xid = SubTransGetTopmostTransaction(xid); /*