From 877b088785b178c50e7976d86c82dfafa4031792 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Fri, 4 Apr 2014 13:12:38 +0300
Subject: [PATCH] Avoid allocations in critical sections.

If a palloc in a critical section fails, it becomes a PANIC.
---
 src/backend/access/nbtree/nbtinsert.c   | 55 +++++++++++++------------
 src/backend/access/spgist/spgdoinsert.c |  8 ++--
 src/backend/access/transam/xlog.c       | 22 ++++------
 src/backend/storage/page/bufpage.c      | 21 ++++------
 4 files changed, 49 insertions(+), 57 deletions(-)

diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index d2ca8d9350e..922412eef15 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -1995,8 +1995,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 	BTPageOpaque lopaque;
 	ItemId		itemid;
 	IndexTuple	item;
-	Size		itemsz;
-	IndexTuple	new_item;
+	IndexTuple	left_item;
+	Size		left_item_sz;
+	IndexTuple	right_item;
+	Size		right_item_sz;
 	Buffer		metabuf;
 	Page		metapg;
 	BTMetaPageData *metad;
@@ -2016,6 +2018,26 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 	metapg = BufferGetPage(metabuf);
 	metad = BTPageGetMeta(metapg);
 
+	/*
+	 * Create downlink item for left page (old root).  Since this will be the
+	 * first item in a non-leaf page, it implicitly has minus-infinity key
+	 * value, so we need not store any actual key in it.
+	 */
+	left_item_sz = sizeof(IndexTupleData);
+	left_item = (IndexTuple) palloc(left_item_sz);
+	left_item->t_info = left_item_sz;
+	ItemPointerSet(&(left_item->t_tid), lbkno, P_HIKEY);
+
+	/*
+	 * Create downlink item for right page.  The key for it is obtained from
+	 * the "high key" position in the left page.
+	 */
+	itemid = PageGetItemId(lpage, P_HIKEY);
+	right_item_sz = ItemIdGetLength(itemid);
+	item = (IndexTuple) PageGetItem(lpage, itemid);
+	right_item = CopyIndexTuple(item);
+	ItemPointerSet(&(right_item->t_tid), rbkno, P_HIKEY);
+
 	/* NO EREPORT(ERROR) from here till newroot op is logged */
 	START_CRIT_SECTION();
 
@@ -2033,16 +2055,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 	metad->btm_fastroot = rootblknum;
 	metad->btm_fastlevel = rootopaque->btpo.level;
 
-	/*
-	 * Create downlink item for left page (old root).  Since this will be the
-	 * first item in a non-leaf page, it implicitly has minus-infinity key
-	 * value, so we need not store any actual key in it.
-	 */
-	itemsz = sizeof(IndexTupleData);
-	new_item = (IndexTuple) palloc(itemsz);
-	new_item->t_info = itemsz;
-	ItemPointerSet(&(new_item->t_tid), lbkno, P_HIKEY);
-
 	/*
 	 * Insert the left page pointer into the new root page.  The root page is
 	 * the rightmost page on its level so there is no "high key" in it; the
@@ -2051,32 +2063,20 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 	 * Note: we *must* insert the two items in item-number order, for the
 	 * benefit of _bt_restore_page().
 	 */
-	if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY,
+	if (PageAddItem(rootpage, (Item) left_item, left_item_sz, P_HIKEY,
 					false, false) == InvalidOffsetNumber)
 		elog(PANIC, "failed to add leftkey to new root page"
 			 " while splitting block %u of index \"%s\"",
 			 BufferGetBlockNumber(lbuf), RelationGetRelationName(rel));
-	pfree(new_item);
-
-	/*
-	 * Create downlink item for right page.  The key for it is obtained from
-	 * the "high key" position in the left page.
-	 */
-	itemid = PageGetItemId(lpage, P_HIKEY);
-	itemsz = ItemIdGetLength(itemid);
-	item = (IndexTuple) PageGetItem(lpage, itemid);
-	new_item = CopyIndexTuple(item);
-	ItemPointerSet(&(new_item->t_tid), rbkno, P_HIKEY);
 
 	/*
 	 * insert the right page pointer into the new root page.
 	 */
-	if (PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY,
+	if (PageAddItem(rootpage, (Item) right_item, right_item_sz, P_FIRSTKEY,
 					false, false) == InvalidOffsetNumber)
 		elog(PANIC, "failed to add rightkey to new root page"
 			 " while splitting block %u of index \"%s\"",
 			 BufferGetBlockNumber(lbuf), RelationGetRelationName(rel));
-	pfree(new_item);
 
 	/* Clear the incomplete-split flag in the left child */
 	Assert(P_INCOMPLETE_SPLIT(lopaque));
@@ -2129,6 +2129,9 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 	/* done with metapage */
 	_bt_relbuf(rel, metabuf);
 
+	pfree(left_item);
+	pfree(right_item);
+
 	return rootbuf;
 }
 
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 1f5d97694db..48f32cda241 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -122,7 +122,8 @@ cmpOffsetNumbers(const void *a, const void *b)
  *
  * NB: this is used during WAL replay, so beware of trying to make it too
  * smart.  In particular, it shouldn't use "state" except for calling
- * spgFormDeadTuple().
+ * spgFormDeadTuple().  This is also used in a critical section, so no
+ * pallocs either!
  */
 void
 spgPageIndexMultiDelete(SpGistState *state, Page page,
@@ -131,7 +132,7 @@ spgPageIndexMultiDelete(SpGistState *state, Page page,
 						BlockNumber blkno, OffsetNumber offnum)
 {
 	OffsetNumber firstItem;
-	OffsetNumber *sortednos;
+	OffsetNumber sortednos[MaxIndexTuplesPerPage];
 	SpGistDeadTuple tuple = NULL;
 	int			i;
 
@@ -145,7 +146,6 @@ spgPageIndexMultiDelete(SpGistState *state, Page page,
 	 * replacement tuples.)  However, we must not scribble on the caller's
 	 * array, so we have to make a copy.
 	 */
-	sortednos = (OffsetNumber *) palloc(sizeof(OffsetNumber) * nitems);
 	memcpy(sortednos, itemnos, sizeof(OffsetNumber) * nitems);
 	if (nitems > 1)
 		qsort(sortednos, nitems, sizeof(OffsetNumber), cmpOffsetNumbers);
@@ -173,8 +173,6 @@ spgPageIndexMultiDelete(SpGistState *state, Page page,
 		else if (tupstate == SPGIST_PLACEHOLDER)
 			SpGistPageGetOpaque(page)->nPlaceholder++;
 	}
-
-	pfree(sortednos);
 }
 
 /*
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9d6609d44f..e2a14de7212 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -859,9 +859,8 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
 
 	if (rechdr == NULL)
 	{
-		rechdr = malloc(SizeOfXLogRecord);
-		if (rechdr == NULL)
-			elog(ERROR, "out of memory");
+		static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
+		rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
 		MemSet(rechdr, 0, SizeOfXLogRecord);
 	}
 
@@ -3080,6 +3079,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 {
 	char		path[MAXPGPATH];
 	char		tmppath[MAXPGPATH];
+	char		zbuffer_raw[BLCKSZ + MAXIMUM_ALIGNOF];
 	char	   *zbuffer;
 	XLogSegNo	installed_segno;
 	int			max_advance;
@@ -3118,16 +3118,6 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 
 	unlink(tmppath);
 
-	/*
-	 * Allocate a buffer full of zeros. This is done before opening the file
-	 * so that we don't leak the file descriptor if palloc fails.
-	 *
-	 * Note: palloc zbuffer, instead of just using a local char array, to
-	 * ensure it is reasonably well-aligned; this may save a few cycles
-	 * transferring data to the kernel.
-	 */
-	zbuffer = (char *) palloc0(XLOG_BLCKSZ);
-
 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
 	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
 					   S_IRUSR | S_IWUSR);
@@ -3144,7 +3134,12 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 	 * fsync below) that all the indirect blocks are down on disk.	Therefore,
 	 * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
 	 * log file.
+	 *
+	 * Note: ensure the buffer is reasonably well-aligned; this may save a few
+	 * cycles transferring data to the kernel.
 	 */
+	zbuffer = (char *) MAXALIGN(zbuffer_raw);
+	memset(zbuffer, 0, BLCKSZ);
 	for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
 	{
 		errno = 0;
@@ -3167,7 +3162,6 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 					 errmsg("could not write to file \"%s\": %m", tmppath)));
 		}
 	}
-	pfree(zbuffer);
 
 	if (pg_fsync(fd) != 0)
 	{
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index ccc92daf1ff..7729fcacf0e 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/itup.h"
 #include "access/xlog.h"
 #include "storage/checksum.h"
 #include "utils/memdebug.h"
@@ -433,8 +434,6 @@ PageRepairFragmentation(Page page)
 	Offset		pd_lower = ((PageHeader) page)->pd_lower;
 	Offset		pd_upper = ((PageHeader) page)->pd_upper;
 	Offset		pd_special = ((PageHeader) page)->pd_special;
-	itemIdSort	itemidbase,
-				itemidptr;
 	ItemId		lp;
 	int			nline,
 				nstorage,
@@ -484,10 +483,11 @@ PageRepairFragmentation(Page page)
 		((PageHeader) page)->pd_upper = pd_special;
 	}
 	else
-	{							/* nstorage != 0 */
+	{
 		/* Need to compact the page the hard way */
-		itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nstorage);
-		itemidptr = itemidbase;
+		itemIdSortData itemidbase[MaxHeapTuplesPerPage];
+		itemIdSort	itemidptr = itemidbase;
+
 		totallen = 0;
 		for (i = 0; i < nline; i++)
 		{
@@ -532,8 +532,6 @@ PageRepairFragmentation(Page page)
 		}
 
 		((PageHeader) page)->pd_upper = upper;
-
-		pfree(itemidbase);
 	}
 
 	/* Set hint bit for PageAddItem */
@@ -782,8 +780,8 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
 	Offset		pd_lower = phdr->pd_lower;
 	Offset		pd_upper = phdr->pd_upper;
 	Offset		pd_special = phdr->pd_special;
-	itemIdSort	itemidbase,
-				itemidptr;
+	itemIdSortData	itemidbase[MaxIndexTuplesPerPage];
+	itemIdSort	itemidptr;
 	ItemId		lp;
 	int			nline,
 				nused;
@@ -795,6 +793,8 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
 	int			nextitm;
 	OffsetNumber offnum;
 
+	Assert(nitems < MaxIndexTuplesPerPage);
+
 	/*
 	 * If there aren't very many items to delete, then retail
 	 * PageIndexTupleDelete is the best way.  Delete the items in reverse
@@ -829,7 +829,6 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
 	 * still validity-checking.
 	 */
 	nline = PageGetMaxOffsetNumber(page);
-	itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nline);
 	itemidptr = itemidbase;
 	totallen = 0;
 	nused = 0;
@@ -895,8 +894,6 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
 
 	phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
 	phdr->pd_upper = upper;
-
-	pfree(itemidbase);
 }
 
 
-- 
GitLab