From 9e857436ef7dff8fb8a0bf43cfe62650e2be6be9 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 4 Dec 2013 00:10:47 +0200
Subject: [PATCH] Don't include unused space in LOG_NEWPAGE records.

This is the same trick we use when taking a full page image of a buffer
passed to XLogInsert.
---
 src/backend/access/gin/gininsert.c    |   4 +-
 src/backend/access/gist/gist.c        |   2 +-
 src/backend/access/heap/heapam.c      | 123 +++++++++++++++++---------
 src/backend/access/heap/rewriteheap.c |   6 +-
 src/backend/access/nbtree/nbtree.c    |   2 +-
 src/backend/access/nbtree/nbtsort.c   |   2 +-
 src/backend/access/spgist/spginsert.c |   6 +-
 src/backend/commands/tablecmds.c      |   8 +-
 src/backend/commands/vacuumlazy.c     |   6 +-
 src/include/access/heapam_xlog.h      |  10 ++-
 src/include/access/xlog_internal.h    |   2 +-
 11 files changed, 109 insertions(+), 62 deletions(-)

diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 556e31854ea..bd6ccd0e09c 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -435,10 +435,10 @@ ginbuildempty(PG_FUNCTION_ARGS)
 	START_CRIT_SECTION();
 	GinInitMetabuffer(MetaBuffer);
 	MarkBufferDirty(MetaBuffer);
-	log_newpage_buffer(MetaBuffer);
+	log_newpage_buffer(MetaBuffer, false);
 	GinInitBuffer(RootBuffer, GIN_LEAF);
 	MarkBufferDirty(RootBuffer);
-	log_newpage_buffer(RootBuffer);
+	log_newpage_buffer(RootBuffer, false);
 	END_CRIT_SECTION();
 
 	/* Unlock and release the buffers. */
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 99cbcaa58ee..815ad5ceee5 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -83,7 +83,7 @@ gistbuildempty(PG_FUNCTION_ARGS)
 	START_CRIT_SECTION();
 	GISTInitBuffer(buffer, F_LEAF);
 	MarkBufferDirty(buffer);
-	log_newpage_buffer(buffer);
+	log_newpage_buffer(buffer, true);
 	END_CRIT_SECTION();
 
 	/* Unlock and release the buffer */
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index c13f87c4ced..8d596202ba4 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6207,16 +6207,22 @@ log_heap_update(Relation reln, Buffer oldbuf,
  * memory and writing them directly to smgr.  If you're using buffers, call
  * log_newpage_buffer instead.
  *
- * Note: the NEWPAGE log record is used for both heaps and indexes, so do
- * not do anything that assumes we are touching a heap.
+ * If the page follows the standard page layout, with a PageHeader and unused
+ * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
+ * the unused space to be left out from the WAL record, making it smaller.
  */
 XLogRecPtr
 log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
-			Page page)
+			Page page, bool page_std)
 {
 	xl_heap_newpage xlrec;
 	XLogRecPtr	recptr;
-	XLogRecData rdata[2];
+	XLogRecData rdata[3];
+
+	/*
+	 * Note: the NEWPAGE log record is used for both heaps and indexes, so do
+	 * not do anything that assumes we are touching a heap.
+	 */
 
 	/* NO ELOG(ERROR) from here till newpage op is logged */
 	START_CRIT_SECTION();
@@ -6225,15 +6231,58 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
 	xlrec.forknum = forkNum;
 	xlrec.blkno = blkno;
 
+	if (page_std)
+	{
+		/* Assume we can omit data between pd_lower and pd_upper */
+		uint16		lower = ((PageHeader) page)->pd_lower;
+		uint16		upper = ((PageHeader) page)->pd_upper;
+
+		if (lower >= SizeOfPageHeaderData &&
+			upper > lower &&
+			upper <= BLCKSZ)
+		{
+			xlrec.hole_offset = lower;
+			xlrec.hole_length = upper - lower;
+		}
+		else
+		{
+			/* No "hole" to compress out */
+			xlrec.hole_offset = 0;
+			xlrec.hole_length = 0;
+		}
+	}
+	else
+	{
+		/* Not a standard page header, don't try to eliminate "hole" */
+		xlrec.hole_offset = 0;
+		xlrec.hole_length = 0;
+	}
+
 	rdata[0].data = (char *) &xlrec;
 	rdata[0].len = SizeOfHeapNewpage;
 	rdata[0].buffer = InvalidBuffer;
 	rdata[0].next = &(rdata[1]);
 
-	rdata[1].data = (char *) page;
-	rdata[1].len = BLCKSZ;
-	rdata[1].buffer = InvalidBuffer;
-	rdata[1].next = NULL;
+	if (xlrec.hole_length == 0)
+	{
+		rdata[1].data = (char *) page;
+		rdata[1].len = BLCKSZ;
+		rdata[1].buffer = InvalidBuffer;
+		rdata[1].next = NULL;
+	}
+	else
+	{
+		/* must skip the hole */
+		rdata[1].data = (char *) page;
+		rdata[1].len = xlrec.hole_offset;
+		rdata[1].buffer = InvalidBuffer;
+		rdata[1].next = &rdata[2];
+
+		rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
+		rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
+		rdata[2].buffer = InvalidBuffer;
+		rdata[2].next = NULL;
+	}
 
 	recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
 
@@ -6257,44 +6306,24 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
  * Caller should initialize the buffer and mark it dirty before calling this
  * function.  This function will set the page LSN and TLI.
  *
- * Note: the NEWPAGE log record is used for both heaps and indexes, so do
- * not do anything that assumes we are touching a heap.
+ * If the page follows the standard page layout, with a PageHeader and unused
+ * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
+ * the unused space to be left out from the WAL record, making it smaller.
  */
 XLogRecPtr
-log_newpage_buffer(Buffer buffer)
+log_newpage_buffer(Buffer buffer, bool page_std)
 {
-	xl_heap_newpage xlrec;
-	XLogRecPtr	recptr;
-	XLogRecData rdata[2];
 	Page		page = BufferGetPage(buffer);
+	RelFileNode rnode;
+	ForkNumber	forkNum;
+	BlockNumber blkno;
 
-	/* We should be in a critical section. */
+	/* Shared buffers should be modified in a critical section. */
 	Assert(CritSectionCount > 0);
 
-	BufferGetTag(buffer, &xlrec.node, &xlrec.forknum, &xlrec.blkno);
-
-	rdata[0].data = (char *) &xlrec;
-	rdata[0].len = SizeOfHeapNewpage;
-	rdata[0].buffer = InvalidBuffer;
-	rdata[0].next = &(rdata[1]);
-
-	rdata[1].data = page;
-	rdata[1].len = BLCKSZ;
-	rdata[1].buffer = InvalidBuffer;
-	rdata[1].next = NULL;
-
-	recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
-
-	/*
-	 * The page may be uninitialized. If so, we can't set the LSN and TLI
-	 * because that would corrupt the page.
-	 */
-	if (!PageIsNew(page))
-	{
-		PageSetLSN(page, recptr);
-	}
+	BufferGetTag(buffer, &rnode, &forkNum, &blkno);
 
-	return recptr;
+	return log_newpage(&rnode, forkNum, blkno, page, page_std);
 }
 
 /*
@@ -6582,12 +6611,15 @@ static void
 heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
 {
 	xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
+	char	   *blk = ((char *) xlrec) + sizeof(xl_heap_newpage);
 	Buffer		buffer;
 	Page		page;
 
 	/* Backup blocks are not used in newpage records */
 	Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
 
+	Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ - xlrec->hole_length);
+
 	/*
 	 * Note: the NEWPAGE log record is used for both heaps and indexes, so do
 	 * not do anything that assumes we are touching a heap.
@@ -6598,8 +6630,19 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
 	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 	page = (Page) BufferGetPage(buffer);
 
-	Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ);
-	memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ);
+	if (xlrec->hole_length == 0)
+	{
+		memcpy((char *) page, blk, BLCKSZ);
+	}
+	else
+	{
+		memcpy((char *) page, blk, xlrec->hole_offset);
+		/* must zero-fill the hole */
+		MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
+		memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
+			   blk + xlrec->hole_offset,
+			   BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
+	}
 
 	/*
 	 * The page may be uninitialized. If so, we can't set the LSN because that
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 951894ce5ac..deec77ddff2 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -277,7 +277,8 @@ end_heap_rewrite(RewriteState state)
 			log_newpage(&state->rs_new_rel->rd_node,
 						MAIN_FORKNUM,
 						state->rs_blockno,
-						state->rs_buffer);
+						state->rs_buffer,
+						true);
 		RelationOpenSmgr(state->rs_new_rel);
 
 		PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
@@ -622,7 +623,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 				log_newpage(&state->rs_new_rel->rd_node,
 							MAIN_FORKNUM,
 							state->rs_blockno,
-							page);
+							page,
+							true);
 
 			/*
 			 * Now write the page. We say isTemp = true even if it's not a
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 073190ffd53..207a4edbc57 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -222,7 +222,7 @@ btbuildempty(PG_FUNCTION_ARGS)
 			  (char *) metapage, true);
 	if (XLogIsNeeded())
 		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
-					BTREE_METAPAGE, metapage);
+					BTREE_METAPAGE, metapage, false);
 
 	/*
 	 * An immediate sync is require even if we xlog'd the page, because the
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 52c5a2676ef..366f39522d8 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -274,7 +274,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
 	if (wstate->btws_use_wal)
 	{
 		/* We use the heap NEWPAGE record type for this */
-		log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page);
+		log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
 	}
 
 	/*
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 2a50d87c74b..de8fc4de6d5 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -169,7 +169,7 @@ spgbuildempty(PG_FUNCTION_ARGS)
 			  (char *) page, true);
 	if (XLogIsNeeded())
 		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
-					SPGIST_METAPAGE_BLKNO, page);
+					SPGIST_METAPAGE_BLKNO, page, false);
 
 	/* Likewise for the root page. */
 	SpGistInitPage(page, SPGIST_LEAF);
@@ -179,7 +179,7 @@ spgbuildempty(PG_FUNCTION_ARGS)
 			  (char *) page, true);
 	if (XLogIsNeeded())
 		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
-					SPGIST_ROOT_BLKNO, page);
+					SPGIST_ROOT_BLKNO, page, true);
 
 	/* Likewise for the null-tuples root page. */
 	SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
@@ -189,7 +189,7 @@ spgbuildempty(PG_FUNCTION_ARGS)
 			  (char *) page, true);
 	if (XLogIsNeeded())
 		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
-					SPGIST_NULL_BLKNO, page);
+					SPGIST_NULL_BLKNO, page, true);
 
 	/*
 	 * An immediate sync is required even if we xlog'd the pages, because the
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 1aa1ad91277..1d9f29a7b7c 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -9130,9 +9130,13 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
 										   src->smgr_rnode.backend,
 										   forkNum))));
 
-		/* XLOG stuff */
+		/*
+		 * WAL-log the copied page. Unfortunately we don't know what kind of
+		 * a page this is, so we have to log the full page including any
+		 * unused space.
+		 */
 		if (use_wal)
-			log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page);
+			log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
 
 		PageSetChecksumInplace(page, blkno);
 
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index fe2d9e78fa8..d346772600c 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -699,14 +699,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 				 * which will cause a PANIC.  To prevent that, check whether
 				 * the page has been previously WAL-logged, and if not, do that
 				 * now.
-				 *
-				 * XXX: It would be nice to use a logging method supporting
-				 * standard buffers here since log_newpage_buffer() will write
-				 * the full block instead of omitting the hole.
 				 */
 				if (RelationNeedsWAL(onerel) &&
 					PageGetLSN(page) == InvalidXLogRecPtr)
-					log_newpage_buffer(buf);
+					log_newpage_buffer(buf, true);
 
 				PageSetAllVisible(page);
 				visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 4381778308f..63b73d03291 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -198,10 +198,12 @@ typedef struct xl_heap_newpage
 	RelFileNode node;
 	ForkNumber	forknum;
 	BlockNumber blkno;			/* location of new page */
-	/* entire page contents follow at end of record */
+	uint16		hole_offset;	/* number of bytes before "hole" */
+	uint16		hole_length;	/* number of bytes in "hole" */
+	/* entire page contents (minus the hole) follow at end of record */
 } xl_heap_newpage;
 
-#define SizeOfHeapNewpage	(offsetof(xl_heap_newpage, blkno) + sizeof(BlockNumber))
+#define SizeOfHeapNewpage	(offsetof(xl_heap_newpage, hole_length) + sizeof(uint16))
 
 /* flags for infobits_set */
 #define XLHL_XMAX_IS_MULTI		0x01
@@ -282,7 +284,7 @@ extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
 extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
 				 Buffer vm_buffer, TransactionId cutoff_xid);
 extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
-			BlockNumber blk, Page page);
-extern XLogRecPtr log_newpage_buffer(Buffer buffer);
+			BlockNumber blk, Page page, bool page_std);
+extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
 
 #endif   /* HEAPAM_XLOG_H */
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index b6320eee3cf..9fba8c3db86 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -55,7 +55,7 @@ typedef struct BkpBlock
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD077	/* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD078	/* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {
-- 
GitLab