diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index edf33d90b1e72f198d23d50f32151f7b02b07480..6d438a7662e1b46a4ff25e3008ab71058bf79872 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -261,12 +261,14 @@ we need to be sure we don't miss or re-scan any items. A deleted page can only be reclaimed once there is no scan or search that has a reference to it; until then, it must stay in place with its -right-link undisturbed. We implement this by waiting until all -transactions that were running at the time of deletion are dead; which is +right-link undisturbed. We implement this by waiting until all active +snapshots and registered snapshots as of the deletion are gone; which is overly strong, but is simple to implement within Postgres. When marked dead, a deleted page is labeled with the next-transaction counter value. VACUUM can reclaim the page for re-use when this transaction number is -older than the oldest open transaction. +older than RecentGlobalXmin. As collateral damage, this implementation +also waits for running XIDs with no snapshots and for snapshots taken +until the next transaction to allocate an XID commits. Reclaiming a page doesn't actually change its state on disk --- we simply record it in the shared-memory free space map, from which it will be diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index c5e147ff435224111942846850e4e193eb247c0d..e6dec618c7781b7a454b899954be345593864f79 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -558,19 +558,9 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) */ if (XLogStandbyInfoActive()) { - TransactionId latestRemovedXid; - BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); - /* - * opaque->btpo.xact is the threshold value not the - * value to measure conflicts against. We must retreat - * by one from it to get the correct conflict xid. - */ - latestRemovedXid = opaque->btpo.xact; - TransactionIdRetreat(latestRemovedXid); - - _bt_log_reuse_page(rel, blkno, latestRemovedXid); + _bt_log_reuse_page(rel, blkno, opaque->btpo.xact); } /* Okay to use page. Re-initialize and return it */ @@ -685,7 +675,6 @@ bool _bt_page_recyclable(Page page) { BTPageOpaque opaque; - TransactionId cutoff; /* * It's possible to find an all-zeroes page in an index --- for example, a @@ -698,18 +687,11 @@ _bt_page_recyclable(Page page) /* * Otherwise, recycle if deleted and too old to have any processes - * interested in it. If we are generating records for Hot Standby - * defer page recycling until RecentGlobalXmin to respect user - * controls specified by vacuum_defer_cleanup_age or hot_standby_feedback. + * interested in it. */ - if (XLogStandbyInfoActive()) - cutoff = RecentGlobalXmin; - else - cutoff = RecentXmin; - opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (P_ISDELETED(opaque) && - TransactionIdPrecedesOrEquals(opaque->btpo.xact, cutoff)) + TransactionIdPrecedes(opaque->btpo.xact, RecentGlobalXmin)) return true; return false; } @@ -1376,7 +1358,13 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack) /* * Mark the page itself deleted. It can be recycled when all current - * transactions are gone. + * transactions are gone. Storing GetTopTransactionId() would work, but + * we're in VACUUM and would not otherwise have an XID. Having already + * updated links to the target, ReadNewTransactionId() suffices as an + * upper bound. Any scan having retained a now-stale link is advertising + * in its PGXACT an xmin less than or equal to the value we read here. It + * will continue to do so, holding back RecentGlobalXmin, for the duration + * of that scan. */ page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 3b351a8b9641d1cba148e5c84b3ac5779b9ac7a2..deca38c57c2c88921bdae6d750673ce4d2c81534 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -968,7 +968,11 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record) /* * Btree reuse page records exist to provide a conflict point * when we reuse pages in the index via the FSM. That's all it - * does though. + * does though. latestRemovedXid was the page's btpo.xact. The + * btpo.xact < RecentGlobalXmin test in _bt_page_recyclable() + * conceptually mirrors the pgxact->xmin > limitXmin test in + * GetConflictingVirtualXIDs(). Consequently, one XID value + * achieves the same exclusion effect on master and standby. */ { xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);