From 9e936693a9fc464511b52b14fb681cdea014bf59 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 21 Sep 2006 20:31:22 +0000
Subject: [PATCH] Fix free space map to correctly track the total amount of FSM
 space needed even when a single relation requires more than max_fsm_pages
 pages.  Also, make VACUUM emit a warning in this case, since it likely means
 that VACUUM FULL or other drastic corrective measure is needed.  Per reports
 from Jeff Frost and others of unexpected changes in the claimed max_fsm_pages
 need.

---
 .../pg_freespacemap/README.pg_freespacemap    |  82 ++++++------
 contrib/pg_freespacemap/pg_freespacemap.c     |  10 +-
 .../pg_freespacemap/pg_freespacemap.sql.in    |   2 +-
 src/backend/access/gin/ginvacuum.c            |  14 +-
 src/backend/access/gist/gistvacuum.c          |  20 +--
 src/backend/access/nbtree/nbtree.c            |  16 ++-
 src/backend/commands/vacuum.c                 |   4 +-
 src/backend/commands/vacuumlazy.c             |  27 +++-
 src/backend/storage/freespace/freespace.c     | 121 +++++++++++++-----
 src/include/storage/freespace.h               |  16 ++-
 10 files changed, 199 insertions(+), 113 deletions(-)

diff --git a/contrib/pg_freespacemap/README.pg_freespacemap b/contrib/pg_freespacemap/README.pg_freespacemap
index d66bd203294..9210419cb8c 100644
--- a/contrib/pg_freespacemap/README.pg_freespacemap
+++ b/contrib/pg_freespacemap/README.pg_freespacemap
@@ -36,19 +36,19 @@ Notes
 
    pg_freespacemap_relations
 
-       Column     |  references          | Description
-  ----------------+----------------------+------------------------------------
-   reltablespace  | pg_tablespace.oid    | Tablespace oid of the relation.
-   reldatabase    | pg_database.oid      | Database oid of the relation.
-   relfilenode    | pg_class.relfilenode | Relfilenode of the relation.
-   avgrequest     |                      | Moving average of free space 
-                  |                      | requests (NULL for indexes)
-   lastpagecount  |                      | Count of pages last reported as
-                  |                      | containing useful free space.
-   storedpages    |                      | Count of pages actually stored
-                  |                      | in free space map.
-   nextpage       |                      | Page index (from 0) to start next 
-                  |                      | search at.
+       Column       |  references          | Description
+  ------------------+----------------------+----------------------------------
+   reltablespace    | pg_tablespace.oid    | Tablespace oid of the relation.
+   reldatabase      | pg_database.oid      | Database oid of the relation.
+   relfilenode      | pg_class.relfilenode | Relfilenode of the relation.
+   avgrequest       |                      | Moving average of free space 
+                    |                      | requests (NULL for indexes)
+   interestingpages |                      | Count of pages last reported as
+                    |                      | containing useful free space.
+   storedpages      |                      | Count of pages actually stored
+                    |                      | in free space map.
+   nextpage         |                      | Page index (from 0) to start next 
+                    |                      | search at.
 
 
    pg_freespacemap_pages
@@ -65,11 +65,11 @@ Notes
 
   For pg_freespacemap_relations, there is one row for each relation in the free
   space map.  storedpages is the number of pages actually stored in the map,
-  while lastpagecount is the number of pages VACUUM last tried to store
-  (ie, the number that VACUUM thought had useful amounts of free space).
+  while interestingpages is the number of pages the last VACUUM thought had
+  useful amounts of free space.
 
-  If storedpages is consistently less than lastpagecount then it'd be a good
-  idea to increase max_fsm_pages.  Also, if the number of rows in
+  If storedpages is consistently less than interestingpages then it'd be a
+  good idea to increase max_fsm_pages.  Also, if the number of rows in
   pg_freespacemap_relations is close to max_fsm_relations, then you should
   consider increasing max_fsm_relations.
 
@@ -96,36 +96,36 @@ Sample output - pg_freespacemap_relations
 
 regression=# \d pg_freespacemap_relations
 View "public.pg_freespacemap_relations"
-    Column     |  Type   | Modifiers
----------------+---------+-----------
- reltablespace | oid     |
- reldatabase   | oid     |
- relfilenode   | oid     |
- avgrequest    | integer |
- lastpagecount | integer |
- storedpages   | integer |
- nextpage      | integer |
+    Column        |  Type   | Modifiers
+------------------+---------+-----------
+ reltablespace    | oid     |
+ reldatabase      | oid     |
+ relfilenode      | oid     |
+ avgrequest       | integer |
+ interestingpages | integer |
+ storedpages      | integer |
+ nextpage         | integer |
 View definition:
- SELECT p.reltablespace, p.reldatabase, p.relfilenode, p.avgrequest, p.lastpagecount, p.storedpages, p.nextpage
-   FROM pg_freespacemap_relations() p(reltablespace oid, reldatabase oid, relfilenode oid, avgrequest integer, lastpagecount integer, storedpages integer, nextpage integer);
+ SELECT p.reltablespace, p.reldatabase, p.relfilenode, p.avgrequest, p.interestingpages, p.storedpages, p.nextpage
+   FROM pg_freespacemap_relations() p(reltablespace oid, reldatabase oid, relfilenode oid, avgrequest integer, interestingpages integer, storedpages integer, nextpage integer);
 
-regression=# SELECT c.relname, r.avgrequest, r.lastpagecount, r.storedpages
+regression=# SELECT c.relname, r.avgrequest, r.interestingpages, r.storedpages
              FROM pg_freespacemap_relations r INNER JOIN pg_class c
              ON c.relfilenode = r.relfilenode INNER JOIN pg_database d
              ON r.reldatabase = d.oid AND (d.datname = current_database()) 
              ORDER BY r.storedpages DESC LIMIT 10;
-             relname             | avgrequest | lastpagecount | storedpages
----------------------------------+------------+---------------+-------------
- onek                            |        256 |           109 |         109
- pg_attribute                    |        167 |            93 |          93
- pg_class                        |        191 |            49 |          49
- pg_attribute_relid_attnam_index |            |            48 |          48
- onek2                           |        256 |            37 |          37
- pg_depend                       |         95 |            26 |          26
- pg_type                         |        199 |            16 |          16
- pg_rewrite                      |       1011 |            13 |          13
- pg_class_relname_nsp_index      |            |            10 |          10
- pg_proc                         |        302 |             8 |           8
+             relname             | avgrequest | interestingpages | storedpages
+---------------------------------+------------+------------------+-------------
+ onek                            |        256 |              109 |         109
+ pg_attribute                    |        167 |               93 |          93
+ pg_class                        |        191 |               49 |          49
+ pg_attribute_relid_attnam_index |            |               48 |          48
+ onek2                           |        256 |               37 |          37
+ pg_depend                       |         95 |               26 |          26
+ pg_type                         |        199 |               16 |          16
+ pg_rewrite                      |       1011 |               13 |          13
+ pg_class_relname_nsp_index      |            |               10 |          10
+ pg_proc                         |        302 |                8 |           8
 (10 rows)
 
 
diff --git a/contrib/pg_freespacemap/pg_freespacemap.c b/contrib/pg_freespacemap/pg_freespacemap.c
index 0ab99482ff5..9ce8422157f 100644
--- a/contrib/pg_freespacemap/pg_freespacemap.c
+++ b/contrib/pg_freespacemap/pg_freespacemap.c
@@ -3,7 +3,7 @@
  * pg_freespacemap.c
  *	  display some contents of the free space relation and page maps.
  *
- *	  $PostgreSQL: pgsql/contrib/pg_freespacemap/pg_freespacemap.c,v 1.6 2006/05/30 22:12:13 tgl Exp $
+ *	  $PostgreSQL: pgsql/contrib/pg_freespacemap/pg_freespacemap.c,v 1.7 2006/09/21 20:31:21 tgl Exp $
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
@@ -53,7 +53,7 @@ typedef struct
 	Oid				reldatabase;
 	Oid				relfilenode;
 	Size			avgrequest;
-	int				lastpagecount;
+	BlockNumber		interestingpages;
 	int				storedpages;
 	int				nextpage;
 	bool			isindex;
@@ -303,7 +303,7 @@ pg_freespacemap_relations(PG_FUNCTION_ARGS)
 						   OIDOID, -1, 0);
 		TupleDescInitEntry(tupledesc, (AttrNumber) 4, "avgrequest",
 						   INT4OID, -1, 0);
-		TupleDescInitEntry(tupledesc, (AttrNumber) 5, "lastpagecount",
+		TupleDescInitEntry(tupledesc, (AttrNumber) 5, "interestingpages",
 						   INT4OID, -1, 0);
 		TupleDescInitEntry(tupledesc, (AttrNumber) 6, "storedpages",
 						   INT4OID, -1, 0);
@@ -334,7 +334,7 @@ pg_freespacemap_relations(PG_FUNCTION_ARGS)
 			fctx->record[i].reldatabase = fsmrel->key.dbNode;
 			fctx->record[i].relfilenode = fsmrel->key.relNode;
 			fctx->record[i].avgrequest = (int64)fsmrel->avgRequest;
-			fctx->record[i].lastpagecount = fsmrel->lastPageCount;
+			fctx->record[i].interestingpages = fsmrel->interestingPages;
 			fctx->record[i].storedpages = fsmrel->storedPages;
 			fctx->record[i].nextpage = fsmrel->nextPage;
 			fctx->record[i].isindex = fsmrel->isIndex;
@@ -380,7 +380,7 @@ pg_freespacemap_relations(PG_FUNCTION_ARGS)
 			values[3] = UInt32GetDatum(record->avgrequest);
 			nulls[3] = false;
 		}
-		values[4] = Int32GetDatum(record->lastpagecount);
+		values[4] = Int32GetDatum(record->interestingpages);
 		nulls[4] = false;
 		values[5] = Int32GetDatum(record->storedpages);
 		nulls[5] = false;
diff --git a/contrib/pg_freespacemap/pg_freespacemap.sql.in b/contrib/pg_freespacemap/pg_freespacemap.sql.in
index c45335e3b1e..a91a991502a 100644
--- a/contrib/pg_freespacemap/pg_freespacemap.sql.in
+++ b/contrib/pg_freespacemap/pg_freespacemap.sql.in
@@ -30,7 +30,7 @@ CREATE VIEW pg_freespacemap_relations AS
 	 reldatabase oid,
 	 relfilenode oid,
 	 avgrequest integer,
-	 lastpagecount integer,
+	 interestingpages integer,
 	 storedpages integer,
 	 nextpage integer);
 
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 2bc80a26433..31e5f647f07 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *          $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.5 2006/07/31 20:08:59 tgl Exp $
+ *          $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.6 2006/09/21 20:31:21 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -575,7 +575,8 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
 	bool	 needLock;
     BlockNumber npages,
 				blkno;
-	BlockNumber nFreePages,
+	BlockNumber totFreePages,
+				nFreePages,
 			   *freePages,
 			   maxFreePages;
 	BlockNumber lastBlock = GIN_ROOT_BLKNO,
@@ -610,7 +611,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
 	if (maxFreePages > MaxFSMPages)
 		maxFreePages = MaxFSMPages;
 
-	nFreePages = 0;
+	totFreePages = nFreePages = 0;
 	freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
 
 	for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++) {
@@ -626,6 +627,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
 		if ( GinPageIsDeleted(page) ) {
 			if (nFreePages < maxFreePages)
 				freePages[nFreePages++] = blkno;
+			totFreePages++;
 		} else
 			lastFilledBlock = blkno;
 
@@ -638,7 +640,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
 		int         i;
 		for (i = 0; i < nFreePages; i++) 
 			if (freePages[i] >= lastFilledBlock) {
-				nFreePages = i;
+				totFreePages = nFreePages = i;
 				break;
 			}
 
@@ -648,8 +650,8 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
 		stats->pages_removed = lastBlock - lastFilledBlock;
 	}
 
-	RecordIndexFreeSpace(&index->rd_node, nFreePages, freePages);
-	stats->pages_free = nFreePages;
+	RecordIndexFreeSpace(&index->rd_node, totFreePages, nFreePages, freePages);
+	stats->pages_free = totFreePages;
 
 	if (needLock)
 		LockRelationForExtension(index, ExclusiveLock);
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index 37b5631b281..e5c73c8c224 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.26 2006/07/31 20:08:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.27 2006/09/21 20:31:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -491,7 +491,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
 	Relation	rel = info->index;
 	BlockNumber npages,
 				blkno;
-	BlockNumber nFreePages,
+	BlockNumber totFreePages,
+				nFreePages,
 			   *freePages,
 				maxFreePages;
 	BlockNumber lastBlock = GIST_ROOT_BLKNO,
@@ -563,8 +564,9 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
 	if (maxFreePages > MaxFSMPages)
 		maxFreePages = MaxFSMPages;
 
-	nFreePages = 0;
+	totFreePages = nFreePages = 0;
 	freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
+
 	for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++)
 	{
 		Buffer		buffer;
@@ -579,10 +581,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
 		if (PageIsNew(page) || GistPageIsDeleted(page))
 		{
 			if (nFreePages < maxFreePages)
-			{
-				freePages[nFreePages] = blkno;
-				nFreePages++;
-			}
+				freePages[nFreePages++] = blkno;
+			totFreePages++;
 		}
 		else
 			lastFilledBlock = blkno;
@@ -597,7 +597,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
 		for (i = 0; i < nFreePages; i++)
 			if (freePages[i] >= lastFilledBlock)
 			{
-				nFreePages = i;
+				totFreePages = nFreePages = i;
 				break;
 			}
 
@@ -606,11 +606,11 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
 		stats->std.pages_removed = lastBlock - lastFilledBlock;
 	}
 
-	RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages);
+	RecordIndexFreeSpace(&rel->rd_node, totFreePages, nFreePages, freePages);
 	pfree(freePages);
 
 	/* return statistics */
-	stats->std.pages_free = nFreePages;
+	stats->std.pages_free = totFreePages;
 	if (needLock)
 		LockRelationForExtension(rel, ExclusiveLock);
 	stats->std.num_pages = RelationGetNumberOfBlocks(rel);
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index c58974cca13..fa5b162c908 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.150 2006/08/24 01:18:34 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.151 2006/09/21 20:31:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,8 +53,9 @@ typedef struct
 	void	   *callback_state;
 	BTCycleId	cycleid;
 	BlockNumber *freePages;
-	int			nFreePages;
-	int			maxFreePages;
+	int			nFreePages;		/* number of entries in freePages[] */
+	int			maxFreePages;	/* allocated size of freePages[] */
+	BlockNumber	totFreePages;	/* true total # of free pages */
 	MemoryContext pagedelcontext;
 } BTVacState;
 
@@ -636,6 +637,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 	vstate.freePages = NULL;	/* temporarily */
 	vstate.nFreePages = 0;
 	vstate.maxFreePages = 0;
+	vstate.totFreePages = 0;
 
 	/* Create a temporary memory context to run _bt_pagedel in */
 	vstate.pagedelcontext = AllocSetContextCreate(CurrentMemoryContext,
@@ -716,6 +718,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 			new_pages--;
 			stats->pages_deleted--;
 			vstate.nFreePages--;
+			vstate.totFreePages = vstate.nFreePages;	/* can't be more */
 		}
 		if (new_pages != num_pages)
 		{
@@ -736,7 +739,8 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 	 * pages in the index, discarding any old info the map may have. We do not
 	 * need to sort the page numbers; they're in order already.
 	 */
-	RecordIndexFreeSpace(&rel->rd_node, vstate.nFreePages, vstate.freePages);
+	RecordIndexFreeSpace(&rel->rd_node, vstate.totFreePages,
+						 vstate.nFreePages, vstate.freePages);
 
 	pfree(vstate.freePages);
 
@@ -744,7 +748,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 
 	/* update statistics */
 	stats->num_pages = num_pages;
-	stats->pages_free = vstate.nFreePages;
+	stats->pages_free = vstate.totFreePages;
 }
 
 /*
@@ -816,6 +820,7 @@ restart:
 		/* Okay to recycle this page */
 		if (vstate->nFreePages < vstate->maxFreePages)
 			vstate->freePages[vstate->nFreePages++] = blkno;
+		vstate->totFreePages++;
 		stats->pages_deleted++;
 	}
 	else if (P_ISDELETED(opaque))
@@ -954,6 +959,7 @@ restart:
 		{
 			if (vstate->nFreePages < vstate->maxFreePages)
 				vstate->freePages[vstate->nFreePages++] = blkno;
+			vstate->totFreePages++;
 		}
 
 		MemoryContextSwitchTo(oldcontext);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 4ba48d34df4..5a6d5a04b02 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.339 2006/09/17 22:16:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.340 2006/09/21 20:31:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3314,7 +3314,7 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages,
 		}
 	}
 
-	RecordRelationFreeSpace(&onerel->rd_node, outPages, pageSpaces);
+	RecordRelationFreeSpace(&onerel->rd_node, outPages, outPages, pageSpaces);
 
 	pfree(pageSpaces);
 }
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index d3b91807795..c839b951d98 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -36,7 +36,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.78 2006/09/13 17:47:08 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.79 2006/09/21 20:31:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -90,6 +90,7 @@ typedef struct LVRelStats
 	int			num_free_pages; /* current # of entries */
 	int			max_free_pages; /* # slots allocated in array */
 	PageFreeSpaceInfo *free_pages;		/* array or heap of blkno/avail */
+	BlockNumber	tot_free_pages;	/* total pages with >= threshold space */
 } LVRelStats;
 
 
@@ -523,12 +524,21 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 					tups_vacuumed, num_tuples, nblocks),
 			 errdetail("%.0f dead row versions cannot be removed yet.\n"
 					   "There were %.0f unused item pointers.\n"
+					   "%u pages contain useful free space.\n"
 					   "%u pages are entirely empty.\n"
 					   "%s.",
 					   nkeep,
 					   nunused,
+					   vacrelstats->tot_free_pages,
 					   empty_pages,
 					   pg_rusage_show(&ru0))));
+
+	if (vacrelstats->tot_free_pages > MaxFSMPages)
+		ereport(WARNING,
+				(errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
+						get_namespace_name(RelationGetNamespace(onerel)),
+						relname),
+				 errhint("Consider compacting this relation or increasing the configuration parameter \"max_fsm_pages\".")));
 }
 
 
@@ -793,6 +803,14 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats,
 		}
 	}
 	vacrelstats->num_free_pages = j;
+	/*
+	 * If tot_free_pages was more than num_free_pages, we can't tell for sure
+	 * what its correct value is now, because we don't know which of the
+	 * forgotten pages are getting truncated.  Conservatively set it equal
+	 * to num_free_pages.
+	 */
+	vacrelstats->tot_free_pages = j;
+
 	/* We destroyed the heap ordering, so mark array unordered */
 	vacrelstats->fs_is_heap = false;
 
@@ -960,6 +978,7 @@ lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
 	vacrelstats->max_free_pages = maxpages;
 	vacrelstats->free_pages = (PageFreeSpaceInfo *)
 		palloc(maxpages * sizeof(PageFreeSpaceInfo));
+	vacrelstats->tot_free_pages = 0;
 }
 
 /*
@@ -1009,6 +1028,9 @@ lazy_record_free_space(LVRelStats *vacrelstats,
 	if (avail < vacrelstats->threshold)
 		return;
 
+	/* Count all pages over threshold, even if not enough space in array */
+	vacrelstats->tot_free_pages++;
+
 	/* Copy pointers to local variables for notational simplicity */
 	pageSpaces = vacrelstats->free_pages;
 	n = vacrelstats->max_free_pages;
@@ -1138,7 +1160,8 @@ lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats)
 		qsort(pageSpaces, nPages, sizeof(PageFreeSpaceInfo),
 			  vac_cmp_page_spaces);
 
-	RecordRelationFreeSpace(&onerel->rd_node, nPages, pageSpaces);
+	RecordRelationFreeSpace(&onerel->rd_node, vacrelstats->tot_free_pages,
+							nPages, pageSpaces);
 }
 
 /*
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 62e5252aac1..3309ba11dac 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.54 2006/07/14 14:52:22 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.55 2006/09/21 20:31:22 tgl Exp $
  *
  *
  * NOTES:
@@ -24,7 +24,7 @@
  * behavior keeps track of which relation is least recently used.
  *
  * For each known relation, we track the average request size given to
- * GetPageWithFreeSpace() as well as the most recent number of pages given
+ * GetPageWithFreeSpace() as well as the most recent number of pages reported
  * to RecordRelationFreeSpace().  The average request size is not directly
  * used in this module, but we expect VACUUM to use it to filter out
  * uninteresting amounts of space before calling RecordRelationFreeSpace().
@@ -82,7 +82,7 @@
  *		relfilenode
  *		isIndex
  *		avgRequest
- *		lastPageCount
+ *		interestingPages
  *		storedPages
  *		arena data		array of storedPages FSMPageData or IndexFSMPageData
  *----------
@@ -111,7 +111,7 @@ typedef struct FsmCacheRelHeader
 	RelFileNode key;			/* hash key (must be first) */
 	bool		isIndex;		/* if true, we store only page numbers */
 	uint32		avgRequest;		/* moving average of space requests */
-	int32		lastPageCount;	/* pages passed to RecordRelationFreeSpace */
+	BlockNumber	interestingPages;	/* # of pages with useful free space */
 	int32		storedPages;	/* # of pages stored in arena */
 } FsmCacheRelHeader;
 
@@ -128,7 +128,8 @@ static void CheckFreeSpaceMapStatistics(int elevel, int numRels,
 static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
 static FSMRelation *create_fsm_rel(RelFileNode *rel);
 static void delete_fsm_rel(FSMRelation *fsmrel);
-static int	realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex);
+static int	realloc_fsm_rel(FSMRelation *fsmrel, BlockNumber interestingPages,
+							bool isIndex);
 static void link_fsm_rel_usage(FSMRelation *fsmrel);
 static void unlink_fsm_rel_usage(FSMRelation *fsmrel);
 static void link_fsm_rel_storage(FSMRelation *fsmrel);
@@ -146,6 +147,7 @@ static void pack_incoming_pages(FSMPageData *newLocation, int newPages,
 static void pack_existing_pages(FSMPageData *newLocation, int newPages,
 					FSMPageData *oldLocation, int oldPages);
 static int	fsm_calc_request(FSMRelation *fsmrel);
+static int	fsm_calc_request_unclamped(FSMRelation *fsmrel);
 static int	fsm_calc_target_allocation(int myRequest);
 static int	fsm_current_chunks(FSMRelation *fsmrel);
 static int	fsm_current_allocation(FSMRelation *fsmrel);
@@ -361,11 +363,17 @@ GetAvgFSMRequestSize(RelFileNode *rel)
  *
  * Any pre-existing info about the relation is assumed obsolete and discarded.
  *
+ * interestingPages is the total number of pages in the relation that have
+ * at least threshold free space; nPages is the number actually reported in
+ * pageSpaces[] (may be less --- in particular, callers typically clamp their
+ * space usage to MaxFSMPages).
+ *
  * The given pageSpaces[] array must be sorted in order by blkno.  Note that
  * the FSM is at liberty to discard some or all of the data.
  */
 void
 RecordRelationFreeSpace(RelFileNode *rel,
+						BlockNumber interestingPages,
 						int nPages,
 						PageFreeSpaceInfo *pageSpaces)
 {
@@ -392,7 +400,7 @@ RecordRelationFreeSpace(RelFileNode *rel,
 		int			curAllocPages;
 		FSMPageData *newLocation;
 
-		curAlloc = realloc_fsm_rel(fsmrel, nPages, false);
+		curAlloc = realloc_fsm_rel(fsmrel, interestingPages, false);
 		curAllocPages = curAlloc * CHUNKPAGES;
 
 		/*
@@ -455,6 +463,7 @@ GetFreeIndexPage(RelFileNode *rel)
  */
 void
 RecordIndexFreeSpace(RelFileNode *rel,
+					 BlockNumber interestingPages,
 					 int nPages,
 					 BlockNumber *pages)
 {
@@ -481,7 +490,7 @@ RecordIndexFreeSpace(RelFileNode *rel,
 		int			i;
 		IndexFSMPageData *newLocation;
 
-		curAlloc = realloc_fsm_rel(fsmrel, nPages, true);
+		curAlloc = realloc_fsm_rel(fsmrel, interestingPages, true);
 		curAllocPages = curAlloc * INDEXCHUNKPAGES;
 
 		/*
@@ -530,7 +539,7 @@ FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks)
 		(void) lookup_fsm_page_entry(fsmrel, nblocks, &pageIndex);
 		/* Delete all such entries */
 		fsmrel->storedPages = pageIndex;
-		/* XXX should we adjust rel's lastPageCount and sumRequests? */
+		/* XXX should we adjust rel's interestingPages and sumRequests? */
 	}
 	LWLockRelease(FreeSpaceLock);
 }
@@ -587,20 +596,24 @@ PrintFreeSpaceMapStatistics(int elevel)
 {
 	FSMRelation *fsmrel;
 	int			storedPages = 0;
+	double		sumRequests = 0;
 	int			numRels;
-	double		sumRequests;
 	double		needed;
 
 	LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
-	/* Count total space used --- tedious, but seems useful */
+	/*
+	 * Count total space actually used, as well as the unclamped request total
+	 */
 	for (fsmrel = FreeSpaceMap->firstRel;
 		 fsmrel != NULL;
 		 fsmrel = fsmrel->nextPhysical)
+	{
 		storedPages += fsmrel->storedPages;
+		sumRequests += fsm_calc_request_unclamped(fsmrel);
+	}
 
 	/* Copy other stats before dropping lock */
 	numRels = FreeSpaceMap->numRels;
-	sumRequests = FreeSpaceMap->sumRequests;
 	LWLockRelease(FreeSpaceLock);
 
 	/* Convert stats to actual number of page slots needed */
@@ -613,7 +626,8 @@ PrintFreeSpaceMapStatistics(int elevel)
 			  "%.0f page slots are required to track all free space.\n"
 		  "Current limits are:  %d page slots, %d relations, using %.0f KB.",
 			  Min(needed, MaxFSMPages),
-			  needed, MaxFSMPages, MaxFSMRelations,
+			  needed,
+			  MaxFSMPages, MaxFSMRelations,
 			  (double) FreeSpaceShmemSize() / 1024.0)));
 
 	CheckFreeSpaceMapStatistics(NOTICE, numRels, needed);
@@ -687,7 +701,7 @@ DumpFreeSpaceMap(int code, Datum arg)
 		relheader.key = fsmrel->key;
 		relheader.isIndex = fsmrel->isIndex;
 		relheader.avgRequest = fsmrel->avgRequest;
-		relheader.lastPageCount = fsmrel->lastPageCount;
+		relheader.interestingPages = fsmrel->interestingPages;
 		relheader.storedPages = fsmrel->storedPages;
 		if (fwrite(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader))
 			goto write_failed;
@@ -792,17 +806,12 @@ LoadFreeSpaceMap(void)
 		if (fread(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader) ||
 			(relheader.isIndex != false && relheader.isIndex != true) ||
 			relheader.avgRequest >= BLCKSZ ||
-			relheader.lastPageCount < 0 ||
 			relheader.storedPages < 0)
 		{
 			elog(LOG, "bogus rel header in \"%s\"", FSM_CACHE_FILENAME);
 			goto read_failed;
 		}
 
-		/* Make sure lastPageCount doesn't exceed current MaxFSMPages */
-		if (relheader.lastPageCount > MaxFSMPages)
-			relheader.lastPageCount = MaxFSMPages;
-
 		/* Read the per-page data */
 		nPages = relheader.storedPages;
 		if (relheader.isIndex)
@@ -827,7 +836,7 @@ LoadFreeSpaceMap(void)
 		fsmrel = create_fsm_rel(&relheader.key);
 		fsmrel->avgRequest = relheader.avgRequest;
 
-		curAlloc = realloc_fsm_rel(fsmrel, relheader.lastPageCount,
+		curAlloc = realloc_fsm_rel(fsmrel, relheader.interestingPages,
 								   relheader.isIndex);
 		if (relheader.isIndex)
 		{
@@ -932,7 +941,7 @@ create_fsm_rel(RelFileNode *rel)
 		/* New hashtable entry, initialize it (hash_search set the key) */
 		fsmrel->isIndex = false;	/* until we learn different */
 		fsmrel->avgRequest = INITIAL_AVERAGE;
-		fsmrel->lastPageCount = 0;
+		fsmrel->interestingPages = 0;
 		fsmrel->firstChunk = -1;	/* no space allocated */
 		fsmrel->storedPages = 0;
 		fsmrel->nextPage = 0;
@@ -988,7 +997,8 @@ delete_fsm_rel(FSMRelation *fsmrel)
  * The return value is the actual new allocation, in chunks.
  */
 static int
-realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex)
+realloc_fsm_rel(FSMRelation *fsmrel, BlockNumber interestingPages,
+				bool isIndex)
 {
 	int			myRequest;
 	int			myAlloc;
@@ -999,7 +1009,7 @@ realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex)
 	 */
 	fsmrel->storedPages = 0;
 	FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
-	fsmrel->lastPageCount = nPages;
+	fsmrel->interestingPages = interestingPages;
 	fsmrel->isIndex = isIndex;
 	myRequest = fsm_calc_request(fsmrel);
 	FreeSpaceMap->sumRequests += myRequest;
@@ -1012,7 +1022,7 @@ realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex)
 	 * new data in-place.
 	 */
 	curAlloc = fsm_current_allocation(fsmrel);
-	if (myAlloc > curAlloc && (myRequest + 1) > curAlloc && nPages > 0)
+	if (myAlloc > curAlloc && (myRequest + 1) > curAlloc && interestingPages > 0)
 	{
 		/* Remove entry from storage list, and compact */
 		unlink_fsm_rel_storage(fsmrel);
@@ -1649,27 +1659,70 @@ pack_existing_pages(FSMPageData *newLocation, int newPages,
 }
 
 /*
- * Calculate number of chunks "requested" by a rel.
+ * Calculate number of chunks "requested" by a rel.  The "request" is
+ * anything beyond the rel's one guaranteed chunk.
  *
- * Rel's lastPageCount and isIndex settings must be up-to-date when called.
+ * Rel's interestingPages and isIndex settings must be up-to-date when called.
  *
  * See notes at top of file for details.
  */
 static int
 fsm_calc_request(FSMRelation *fsmrel)
 {
-	int			chunkCount;
+	int			req;
 
 	/* Convert page count to chunk count */
 	if (fsmrel->isIndex)
-		chunkCount = (fsmrel->lastPageCount - 1) / INDEXCHUNKPAGES + 1;
+	{
+		/* test to avoid unsigned underflow at zero */
+		if (fsmrel->interestingPages <= INDEXCHUNKPAGES)
+			return 0;
+		/* quotient will fit in int, even if interestingPages doesn't */
+		req = (fsmrel->interestingPages - 1) / INDEXCHUNKPAGES;
+	}
 	else
-		chunkCount = (fsmrel->lastPageCount - 1) / CHUNKPAGES + 1;
-	/* "Request" is anything beyond our one guaranteed chunk */
-	if (chunkCount <= 0)
-		return 0;
+	{
+		if (fsmrel->interestingPages <= CHUNKPAGES)
+			return 0;
+		req = (fsmrel->interestingPages - 1) / CHUNKPAGES;
+	}
+
+	/*
+	 * We clamp the per-relation requests to at most half the arena size;
+	 * this is intended to prevent a single bloated relation from crowding
+	 * out FSM service for every other rel.
+	 */
+	req = Min(req, FreeSpaceMap->totalChunks / 2);
+
+	return req;
+}
+
+/*
+ * Same as above, but without the clamp ... this is just intended for
+ * reporting the total space needed to store all information.
+ */
+static int
+fsm_calc_request_unclamped(FSMRelation *fsmrel)
+{
+	int			req;
+
+	/* Convert page count to chunk count */
+	if (fsmrel->isIndex)
+	{
+		/* test to avoid unsigned underflow at zero */
+		if (fsmrel->interestingPages <= INDEXCHUNKPAGES)
+			return 0;
+		/* quotient will fit in int, even if interestingPages doesn't */
+		req = (fsmrel->interestingPages - 1) / INDEXCHUNKPAGES;
+	}
 	else
-		return chunkCount - 1;
+	{
+		if (fsmrel->interestingPages <= CHUNKPAGES)
+			return 0;
+		req = (fsmrel->interestingPages - 1) / CHUNKPAGES;
+	}
+
+	return req;
 }
 
 /*
@@ -1769,11 +1822,11 @@ DumpFreeSpace(void)
 	for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = fsmrel->nextUsage)
 	{
 		relNum++;
-		fprintf(stderr, "Map %d: rel %u/%u/%u isIndex %d avgRequest %u lastPageCount %d nextPage %d\nMap= ",
+		fprintf(stderr, "Map %d: rel %u/%u/%u isIndex %d avgRequest %u interestingPages %u nextPage %d\nMap= ",
 				relNum,
 				fsmrel->key.spcNode, fsmrel->key.dbNode, fsmrel->key.relNode,
 				(int) fsmrel->isIndex, fsmrel->avgRequest,
-				fsmrel->lastPageCount, fsmrel->nextPage);
+				fsmrel->interestingPages, fsmrel->nextPage);
 		if (fsmrel->isIndex)
 		{
 			IndexFSMPageData *page;
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index a6801cbbb4b..ff0e0bb7101 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.21 2006/07/13 16:49:20 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.22 2006/09/21 20:31:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -115,7 +115,7 @@ struct FSMRelation
 	FSMRelation *priorPhysical; /* prior rel in arena-storage order */
 	bool		isIndex;		/* if true, we store only page numbers */
 	Size		avgRequest;		/* moving average of space requests */
-	int			lastPageCount;	/* pages passed to RecordRelationFreeSpace */
+	BlockNumber	interestingPages;	/* # of pages with useful free space */
 	int			firstChunk;		/* chunk # of my first chunk in arena */
 	int			storedPages;	/* # of pages stored in arena */
 	int			nextPage;		/* index (from 0) to start next search at */
@@ -133,6 +133,7 @@ extern int	MaxFSMPages;
  */
 extern void InitFreeSpaceMap(void);
 extern Size FreeSpaceShmemSize(void);
+extern FSMHeader *GetFreeSpaceMap(void);
 
 extern BlockNumber GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded);
 extern BlockNumber RecordAndGetPageWithFreeSpace(RelFileNode *rel,
@@ -141,13 +142,15 @@ extern BlockNumber RecordAndGetPageWithFreeSpace(RelFileNode *rel,
 							  Size spaceNeeded);
 extern Size GetAvgFSMRequestSize(RelFileNode *rel);
 extern void RecordRelationFreeSpace(RelFileNode *rel,
-						int nPages,
-						PageFreeSpaceInfo *pageSpaces);
+									BlockNumber interestingPages,
+									int nPages,
+									PageFreeSpaceInfo *pageSpaces);
 
 extern BlockNumber GetFreeIndexPage(RelFileNode *rel);
 extern void RecordIndexFreeSpace(RelFileNode *rel,
-					 int nPages,
-					 BlockNumber *pages);
+								 BlockNumber interestingPages,
+								 int nPages,
+								 BlockNumber *pages);
 
 extern void FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks);
 extern void FreeSpaceMapForgetRel(RelFileNode *rel);
@@ -157,7 +160,6 @@ extern void PrintFreeSpaceMapStatistics(int elevel);
 
 extern void DumpFreeSpaceMap(int code, Datum arg);
 extern void LoadFreeSpaceMap(void);
-extern FSMHeader *GetFreeSpaceMap(void);
 
 #ifdef FREESPACE_DEBUG
 extern void DumpFreeSpace(void);
-- 
GitLab