From e9816533e39be464227b748ee5eeb3d9f688cd76 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Fri, 31 Oct 2008 19:40:27 +0000
Subject: [PATCH] Update FSM on WAL replay. This is a bit limited; the FSM is
 only updated on non-full-page-image WAL records, and quite arbitrarily, only
 if there's less than 20% free space on the page after the insert/update (not
 on HOT updates, though). The 20% cutoff should avoid most of the overhead,
 when replaying a bulk insertion, for example, while ensuring that pages that
 are full are marked as full in the FSM.

This is mostly to avoid the nasty worst case scenario, where you replay
from a PITR archive, and the FSM information in the base backup is really
out of date. If there was a lot of pages that the outdated FSM claims to
have free space, but don't actually have any, the first unlucky inserter
after the recovery would traverse through all those pages, just to find
out that they're full. We didn't have this problem with the old FSM
implementation, because we simply threw the FSM information away on a
non-clean shutdown.
---
 src/backend/access/heap/heapam.c          | 65 ++++++++++++++++++++---
 src/backend/storage/freespace/freespace.c | 32 ++++++++++-
 src/include/storage/freespace.h           |  4 +-
 3 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 8b0c826aaf2..49bca5b3299 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.267 2008/10/31 15:04:59 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -54,6 +54,7 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
+#include "storage/freespace.h"
 #include "storage/lmgr.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
@@ -4022,6 +4023,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
 	int			nredirected;
 	int			ndead;
 	int			nunused;
+	Size		freespace;
 
 	if (record->xl_info & XLR_BKP_BLOCK_1)
 		return;
@@ -4053,6 +4055,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
 							nowunused, nunused,
 							clean_move);
 
+	freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
+
 	/*
 	 * Note: we don't worry about updating the page's prunability hints.
 	 * At worst this will cause an extra prune cycle to occur soon.
@@ -4062,6 +4066,15 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
 	PageSetTLI(page, ThisTimeLineID);
 	MarkBufferDirty(buffer);
 	UnlockReleaseBuffer(buffer);
+
+	/*
+	 * Update the FSM as well.
+	 *
+	 * XXX: We don't get here if the page was restored from full page image.
+	 * We don't bother to update the FSM in that case, it doesn't need to be
+	 * totally accurate anyway.
+	 */
+	XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
 }
 
 static void
@@ -4205,15 +4218,17 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
 	HeapTupleHeader htup;
 	xl_heap_header xlhdr;
 	uint32		newlen;
+	Size		freespace;
+	BlockNumber	blkno;
 
 	if (record->xl_info & XLR_BKP_BLOCK_1)
 		return;
 
+	blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
+
 	if (record->xl_info & XLOG_HEAP_INIT_PAGE)
 	{
-		buffer = XLogReadBuffer(xlrec->target.node,
-							 ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-								true);
+		buffer = XLogReadBuffer(xlrec->target.node, blkno, true);
 		Assert(BufferIsValid(buffer));
 		page = (Page) BufferGetPage(buffer);
 
@@ -4221,9 +4236,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
 	}
 	else
 	{
-		buffer = XLogReadBuffer(xlrec->target.node,
-							 ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-								false);
+		buffer = XLogReadBuffer(xlrec->target.node, blkno, false);
 		if (!BufferIsValid(buffer))
 			return;
 		page = (Page) BufferGetPage(buffer);
@@ -4261,10 +4274,25 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
 	offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
 	if (offnum == InvalidOffsetNumber)
 		elog(PANIC, "heap_insert_redo: failed to add tuple");
+
+	freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
+
 	PageSetLSN(page, lsn);
 	PageSetTLI(page, ThisTimeLineID);
 	MarkBufferDirty(buffer);
 	UnlockReleaseBuffer(buffer);
+
+	/*
+	 * If the page is running low on free space, update the FSM as well.
+	 * Arbitrarily, our definition of "low" is less than 20%. We can't do
+	 * much better than that without knowing the fill-factor for the table.
+	 *
+	 * XXX: We don't get here if the page was restored from full page image.
+	 * We don't bother to update the FSM in that case, it doesn't need to be
+	 * totally accurate anyway.
+	 */
+	if (freespace < BLCKSZ / 5)
+		XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace);
 }
 
 /*
@@ -4289,6 +4317,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update)
 	xl_heap_header xlhdr;
 	int			hsize;
 	uint32		newlen;
+	Size		freespace;
 
 	if (record->xl_info & XLR_BKP_BLOCK_1)
 	{
@@ -4446,10 +4475,32 @@ newsame:;
 	offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
 	if (offnum == InvalidOffsetNumber)
 		elog(PANIC, "heap_update_redo: failed to add tuple");
+
+	freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
+
 	PageSetLSN(page, lsn);
 	PageSetTLI(page, ThisTimeLineID);
 	MarkBufferDirty(buffer);
 	UnlockReleaseBuffer(buffer);
+
+	/*
+	 * If the page is running low on free space, update the FSM as well.
+	 * Arbitrarily, our definition of "low" is less than 20%. We can't do
+	 * much better than that without knowing the fill-factor for the table.
+	 *
+	 * However, don't update the FSM on HOT updates, because after crash
+	 * recovery, either the old or the new tuple will certainly be dead and
+	 * prunable. After pruning, the page will have roughly as much free space
+	 * as it did before the update, assuming the new tuple is about the same
+	 * size as the old one.
+	 *
+	 * XXX: We don't get here if the page was restored from full page image.
+	 * We don't bother to update the FSM in that case, it doesn't need to be
+	 * totally accurate anyway.
+	 */
+	if (!hot_update && freespace < BLCKSZ / 5)
+		XLogRecordPageWithFreeSpace(xlrec->target.node,
+					ItemPointerGetBlockNumber(&(xlrec->newtid)), freespace);
 }
 
 static void
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 724e87fa204..10cca029d10 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.65 2008/10/31 15:05:00 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.66 2008/10/31 19:40:27 heikki Exp $
  *
  *
  * NOTES:
@@ -202,6 +202,36 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
 	fsm_set_and_search(rel, addr, slot, new_cat, 0);
 }
 
+/*
+ * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
+ *		WAL replay
+ */
+void
+XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
+							Size spaceAvail)
+{
+	int			new_cat = fsm_space_avail_to_cat(spaceAvail);
+	FSMAddress	addr;
+	uint16		slot;
+	BlockNumber blkno;
+	Buffer		buf;
+	Page		page;
+
+	/* Get the location of the FSM byte representing the heap block */
+	addr = fsm_get_location(heapBlk, &slot);
+	blkno = fsm_logical_to_physical(addr);
+
+	/* If the page doesn't exist already, extend */
+	buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR);
+	page = BufferGetPage(buf);
+	if (PageIsNew(page))
+		PageInit(page, BLCKSZ, 0);
+
+	if (fsm_set_avail(page, slot, new_cat))
+		MarkBufferDirty(buf);
+	UnlockReleaseBuffer(buf);
+}
+
 /*
  * GetRecordedFreePage - return the amount of free space on a particular page,
  *		according to the FSM.
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index d417e8c9805..858be595284 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.29 2008/09/30 10:52:13 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.30 2008/10/31 19:40:27 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -27,6 +27,8 @@ extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
 							  Size spaceNeeded);
 extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
 									Size spaceAvail);
+extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
+										Size spaceAvail);
 
 extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
 extern void FreeSpaceMapVacuum(Relation rel);
-- 
GitLab