diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 454ca310f339db34a4cc352899fea1d663abf93b..0f4cea124d7436380c730203e6cfd1518bc5d3b2 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1655,8 +1655,9 @@ CheckPointMultiXact(void) * Set the next-to-be-assigned MultiXactId and offset * * This is used when we can determine the correct next ID/offset exactly - * from a checkpoint record. We need no locking since it is only called - * during bootstrap and XLog replay. + * from a checkpoint record. Although this is only called during bootstrap + * and XLog replay, we take the lock in case any hot-standby backends are + * examining the values. */ void MultiXactSetNextMXact(MultiXactId nextMulti, @@ -1664,8 +1665,10 @@ MultiXactSetNextMXact(MultiXactId nextMulti, { debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u", nextMulti, nextMultiOffset); + LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->nextMXact = nextMulti; MultiXactState->nextOffset = nextMultiOffset; + LWLockRelease(MultiXactGenLock); } /* @@ -1674,12 +1677,14 @@ MultiXactSetNextMXact(MultiXactId nextMulti, * * This is used when we can determine minimum safe values from an XLog * record (either an on-line checkpoint or an mxact creation log entry). - * We need no locking since it is only called during XLog replay. + * Although this is only called during XLog replay, we take the lock in case + * any hot-standby backends are examining the values. */ void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset) { + LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti)) { debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti); @@ -1691,6 +1696,7 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti, minMultiOffset); MultiXactState->nextOffset = minMultiOffset; } + LWLockRelease(MultiXactGenLock); } /* diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 6e84cd0a21671486693e7f94d5fda8efdedf4bb4..3d08e92d3a747cc9426206dc0623ff390f18b09c 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1715,6 +1715,10 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) /* * Examine subtransaction XIDs ... they should all follow main * XID, and they may force us to advance nextXid. + * + * We don't expect anyone else to modify nextXid, hence we don't + * need to hold a lock while examining it. We still acquire the + * lock to modify it, though. */ subxids = (TransactionId *) (buf + MAXALIGN(sizeof(TwoPhaseFileHeader))); @@ -1726,8 +1730,10 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) if (TransactionIdFollowsOrEquals(subxid, ShmemVariableCache->nextXid)) { + LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->nextXid = subxid; TransactionIdAdvance(ShmemVariableCache->nextXid); + LWLockRelease(XidGenLock); } } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b128bfda36dbf00c0ef49a34bd2d48d6cdf42218..5e59c1ab1967a0516ef5e240286c36cab9ae2570 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6625,12 +6625,20 @@ StartupXLOG(void) errcontext.previous = error_context_stack; error_context_stack = &errcontext; - /* nextXid must be beyond record's xid */ + /* + * ShmemVariableCache->nextXid must be beyond record's xid. + * + * We don't expect anyone else to modify nextXid, hence we + * don't need to hold a lock while examining it. We still + * acquire the lock to modify it, though. + */ if (TransactionIdFollowsOrEquals(record->xl_xid, ShmemVariableCache->nextXid)) { + LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->nextXid = record->xl_xid; TransactionIdAdvance(ShmemVariableCache->nextXid); + LWLockRelease(XidGenLock); } /* @@ -6656,6 +6664,7 @@ StartupXLOG(void) TransactionIdIsValid(record->xl_xid)) RecordKnownAssignedTransactionIds(record->xl_xid); + /* Now apply the WAL record itself */ RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record); /* Pop the error context stack */ @@ -6971,8 +6980,10 @@ StartupXLOG(void) XLogCtl->ckptXid = ControlFile->checkPointCopy.nextXid; /* also initialize latestCompletedXid, to nextXid - 1 */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); ShmemVariableCache->latestCompletedXid = ShmemVariableCache->nextXid; TransactionIdRetreat(ShmemVariableCache->latestCompletedXid); + LWLockRelease(ProcArrayLock); /* * Start up the commit log and subtrans, if not already done for @@ -8547,12 +8558,18 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) { Oid nextOid; + /* + * We used to try to take the maximum of ShmemVariableCache->nextOid + * and the recorded nextOid, but that fails if the OID counter wraps + * around. Since no OID allocation should be happening during replay + * anyway, better to just believe the record exactly. We still take + * OidGenLock while setting the variable, just in case. + */ memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid)); - if (ShmemVariableCache->nextOid < nextOid) - { - ShmemVariableCache->nextOid = nextOid; - ShmemVariableCache->oidCount = 0; - } + LWLockAcquire(OidGenLock, LW_EXCLUSIVE); + ShmemVariableCache->nextOid = nextOid; + ShmemVariableCache->oidCount = 0; + LWLockRelease(OidGenLock); } else if (info == XLOG_CHECKPOINT_SHUTDOWN) { @@ -8560,9 +8577,13 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint)); /* In a SHUTDOWN checkpoint, believe the counters exactly */ + LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->nextXid = checkPoint.nextXid; + LWLockRelease(XidGenLock); + LWLockAcquire(OidGenLock, LW_EXCLUSIVE); ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; + LWLockRelease(OidGenLock); MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); @@ -8575,7 +8596,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) if (InArchiveRecovery && !XLogRecPtrIsInvalid(ControlFile->backupStartPoint) && XLogRecPtrIsInvalid(ControlFile->backupEndPoint)) - ereport(ERROR, + ereport(PANIC, (errmsg("online backup was canceled, recovery cannot continue"))); /* @@ -8641,15 +8662,17 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) CheckPoint checkPoint; memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint)); - /* In an ONLINE checkpoint, treat the counters like NEXTOID */ + /* In an ONLINE checkpoint, treat the XID counter as a minimum */ + LWLockAcquire(XidGenLock, LW_EXCLUSIVE); if (TransactionIdPrecedes(ShmemVariableCache->nextXid, checkPoint.nextXid)) ShmemVariableCache->nextXid = checkPoint.nextXid; - if (ShmemVariableCache->nextOid < checkPoint.nextOid) - { - ShmemVariableCache->nextOid = checkPoint.nextOid; - ShmemVariableCache->oidCount = 0; - } + LWLockRelease(XidGenLock); + /* ... but still treat OID counter as exact */ + LWLockAcquire(OidGenLock, LW_EXCLUSIVE); + ShmemVariableCache->nextOid = checkPoint.nextOid; + ShmemVariableCache->oidCount = 0; + LWLockRelease(OidGenLock); MultiXactAdvanceNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); if (TransactionIdPrecedes(ShmemVariableCache->oldestXid, diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 8bda474bc712938ce8619e5ba715c12e7ba45deb..09b7311e7bcd82fbdc16d62ffcd8f3214193ab50 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -654,17 +654,28 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) running->latestCompletedXid)) ShmemVariableCache->latestCompletedXid = running->latestCompletedXid; - /* nextXid must be beyond any observed xid */ + Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); + + LWLockRelease(ProcArrayLock); + + /* + * ShmemVariableCache->nextXid must be beyond any observed xid. + * + * We don't expect anyone else to modify nextXid, hence we don't need to + * hold a lock while examining it. We still acquire the lock to modify + * it, though. + */ nextXid = latestObservedXid; TransactionIdAdvance(nextXid); if (TransactionIdFollows(nextXid, ShmemVariableCache->nextXid)) + { + LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->nextXid = nextXid; + LWLockRelease(XidGenLock); + } - Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); Assert(TransactionIdIsValid(ShmemVariableCache->nextXid)); - LWLockRelease(ProcArrayLock); - KnownAssignedXidsDisplay(trace_recovery(DEBUG3)); if (standbyState == STANDBY_SNAPSHOT_READY) elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled"); @@ -1690,6 +1701,13 @@ GetOldestActiveTransactionId(void) LWLockAcquire(ProcArrayLock, LW_SHARED); + /* + * It's okay to read nextXid without acquiring XidGenLock because (1) we + * assume TransactionIds can be read atomically and (2) we don't care if + * we get a slightly stale value. It can't be very stale anyway, because + * the LWLockAcquire above will have done any necessary memory + * interlocking. + */ oldestRunningXid = ShmemVariableCache->nextXid; /* @@ -2609,7 +2627,9 @@ RecordKnownAssignedTransactionIds(TransactionId xid) /* ShmemVariableCache->nextXid must be beyond any observed xid */ next_expected_xid = latestObservedXid; TransactionIdAdvance(next_expected_xid); + LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->nextXid = next_expected_xid; + LWLockRelease(XidGenLock); } }