From f54106f77e6d71cbb3fa0924095e5142341fde2b Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Thu, 28 Nov 2013 16:52:54 -0300
Subject: [PATCH] Fix full-table-vacuum request mechanism for MultiXactIds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While autovacuum dutifully launched anti-multixact-wraparound vacuums
when the multixact "age" was reached, the vacuum code was not aware that
it needed to make them be full table vacuums.  As the resulting
partial-table vacuums aren't capable of actually increasing relminmxid,
autovacuum continued to launch anti-wraparound vacuums that didn't have
the intended effect, until age of relfrozenxid caused the vacuum to
finally be a full table one via vacuum_freeze_table_age.

To fix, introduce logic for multixacts similar to that for plain
TransactionIds, using the same GUCs.

Backpatch to 9.3, where permanent MultiXactIds were introduced.

Andres Freund, some cleanup by Álvaro
---
 src/backend/access/transam/multixact.c | 15 +++++++
 src/backend/commands/cluster.c         | 13 +++---
 src/backend/commands/vacuum.c          | 62 +++++++++++++++++++-------
 src/backend/commands/vacuumlazy.c      | 18 ++++++--
 src/include/access/multixact.h         |  2 +
 src/include/commands/vacuum.h          |  5 ++-
 6 files changed, 87 insertions(+), 28 deletions(-)

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index de0193aaf64..90fa030caf2 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -2374,6 +2374,21 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
 	return (diff < 0);
 }
 
+/*
+ * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
+ *
+ * XXX do we need to do something special for InvalidMultiXactId?
+ * (Doesn't look like it.)
+ */
+bool
+MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
+{
+	int32		diff = (int32) (multi1 - multi2);
+
+	return (diff <= 0);
+}
+
+
 /*
  * Decide which of two offsets is earlier.
  */
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index afc6a786508..0b8ac8c8d8e 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -176,7 +176,10 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		/* close relation, keep lock till commit */
 		heap_close(rel, NoLock);
 
-		/* Do the job */
+		/*
+		 * Do the job.  We use a -1 freeze_min_age to avoid having CLUSTER
+		 * freeze tuples earlier than a plain VACUUM would.
+		 */
 		cluster_rel(tableOid, indexOid, false, stmt->verbose, -1, -1);
 	}
 	else
@@ -226,6 +229,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 			StartTransactionCommand();
 			/* functions in indexes may want a snapshot set */
 			PushActiveSnapshot(GetTransactionSnapshot());
+			/* Do the job.  As above, use a -1 freeze_min_age. */
 			cluster_rel(rvtc->tableOid, rvtc->indexOid, true, stmt->verbose,
 						-1, -1);
 			PopActiveSnapshot();
@@ -853,13 +857,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 		*pSwapToastByContent = false;
 
 	/*
-	 * compute xids used to freeze and weed out dead tuples.  We use -1
-	 * freeze_min_age to avoid having CLUSTER freeze tuples earlier than a
-	 * plain VACUUM would.
+	 * compute xids used to freeze and weed out dead tuples.
 	 */
 	vacuum_set_xid_limits(freeze_min_age, freeze_table_age,
 						  OldHeap->rd_rel->relisshared,
-						  &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff);
+						  &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
+						  NULL);
 
 	/*
 	 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 27aea739e6b..7dee79ca5ff 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -376,6 +376,24 @@ get_rel_oids(Oid relid, const RangeVar *vacrel)
 
 /*
  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
+ *
+ * The output parameters are:
+ * - oldestXmin is the cutoff value used to distinguish whether tuples are
+ *   DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
+ * - freezeLimit is the Xid below which all Xids are replaced by
+ *   FrozenTransactionId during vacuum.
+ * - xidFullScanLimit (computed from the the table_freeze_age parameter)
+ *   represents a minimum Xid value; a table whose relfrozenxid is older than
+ *   this will have a full-table vacuum applied to it, to freeze tuples across
+ *   the whole table.  Vacuuming a table younger than this value can use a
+ *   partial scan.
+ * - multiXactCutoff is the value below which all MultiXactIds are removed from
+ *   Xmax.
+ * - mxactFullScanLimit is a value against which a table's relminmxid value is
+ *   compared to produce a full-table vacuum, as with xidFullScanLimit.
+ *
+ * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
+ * not interested.
  */
 void
 vacuum_set_xid_limits(int freeze_min_age,
@@ -383,12 +401,14 @@ vacuum_set_xid_limits(int freeze_min_age,
 					  bool sharedRel,
 					  TransactionId *oldestXmin,
 					  TransactionId *freezeLimit,
-					  TransactionId *freezeTableLimit,
-					  MultiXactId *multiXactCutoff)
+					  TransactionId *xidFullScanLimit,
+					  MultiXactId *multiXactCutoff,
+					  MultiXactId *mxactFullScanLimit)
 {
 	int			freezemin;
 	TransactionId limit;
 	TransactionId safeLimit;
+	MultiXactId	mxactLimit;
 
 	/*
 	 * We can always ignore processes running lazy vacuum.	This is because we
@@ -441,10 +461,22 @@ vacuum_set_xid_limits(int freeze_min_age,
 
 	*freezeLimit = limit;
 
-	if (freezeTableLimit != NULL)
+	/*
+	 * simplistic MultiXactId removal limit: use the same policy as for
+	 * freezing Xids (except we use the oldest known mxact instead of the
+	 * current next value).
+	 */
+	mxactLimit = GetOldestMultiXactId() - freezemin;
+	if (mxactLimit < FirstMultiXactId)
+		mxactLimit = FirstMultiXactId;
+	*multiXactCutoff = mxactLimit;
+
+	if (xidFullScanLimit != NULL)
 	{
 		int			freezetable;
 
+		Assert(mxactFullScanLimit != NULL);
+
 		/*
 		 * Determine the table freeze age to use: as specified by the caller,
 		 * or vacuum_freeze_table_age, but in any case not more than
@@ -459,29 +491,25 @@ vacuum_set_xid_limits(int freeze_min_age,
 		Assert(freezetable >= 0);
 
 		/*
-		 * Compute the cutoff XID, being careful not to generate a "permanent"
-		 * XID.
+		 * Compute XID limit causing a full-table vacuum, being careful not to
+		 * generate a "permanent" XID.
 		 */
 		limit = ReadNewTransactionId() - freezetable;
 		if (!TransactionIdIsNormal(limit))
 			limit = FirstNormalTransactionId;
 
-		*freezeTableLimit = limit;
-	}
-
-	if (multiXactCutoff != NULL)
-	{
-		MultiXactId mxLimit;
+		*xidFullScanLimit = limit;
 
 		/*
-		 * simplistic multixactid freezing: use the same freezing policy as
-		 * for Xids
+		 * Compute MultiXactId limit to cause a full-table vacuum, being
+		 * careful not to generate an invalid multi. We just copy the logic
+		 * (and limits) from plain XIDs here.
 		 */
-		mxLimit = GetOldestMultiXactId() - freezemin;
-		if (mxLimit < FirstMultiXactId)
-			mxLimit = FirstMultiXactId;
+		mxactLimit = ReadNextMultiXactId() - freezetable;
+		if (mxactLimit < FirstMultiXactId)
+			mxactLimit = FirstMultiXactId;
 
-		*multiXactCutoff = mxLimit;
+		*mxactFullScanLimit = mxactLimit;
 	}
 }
 
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 6688ab38565..fe2d9e78fa8 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -180,7 +180,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
 				write_rate;
 	bool		scan_all;		/* should we scan all pages? */
 	bool		scanned_all;	/* did we actually scan all pages? */
-	TransactionId freezeTableLimit;
+	TransactionId xidFullScanLimit;
+	MultiXactId mxactFullScanLimit;
 	BlockNumber new_rel_pages;
 	double		new_rel_tuples;
 	BlockNumber new_rel_allvisible;
@@ -203,10 +204,19 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
 
 	vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
 						  onerel->rd_rel->relisshared,
-						  &OldestXmin, &FreezeLimit, &freezeTableLimit,
-						  &MultiXactCutoff);
+						  &OldestXmin, &FreezeLimit, &xidFullScanLimit,
+						  &MultiXactCutoff, &mxactFullScanLimit);
+
+	/*
+	 * We request a full scan if either the table's frozen Xid is now older
+	 * than or equal to the requested Xid full-table scan limit; or if the
+	 * table's minimum MultiXactId is older than or equal to the requested mxid
+	 * full-table scan limit.
+	 */
 	scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
-											 freezeTableLimit);
+											 xidFullScanLimit);
+	scan_all |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
+											mxactFullScanLimit);
 
 	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
 
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 8a9eddee387..e6db81a8270 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -87,6 +87,8 @@ extern void MultiXactIdSetOldestMember(void);
 extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **xids,
 					  bool allow_old);
 extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
+extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1,
+							MultiXactId multi2);
 
 extern void AtEOXact_MultiXact(void);
 extern void AtPrepare_MultiXact(void);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 08bec256ba8..44a3c3bd52e 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -159,8 +159,9 @@ extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
 					  bool sharedRel,
 					  TransactionId *oldestXmin,
 					  TransactionId *freezeLimit,
-					  TransactionId *freezeTableLimit,
-					  MultiXactId *multiXactCutoff);
+					  TransactionId *xidFullScanLimit,
+					  MultiXactId *multiXactCutoff,
+					  MultiXactId *mxactFullScanLimit);
 extern void vac_update_datfrozenxid(void);
 extern void vacuum_delay_point(void);
 
-- 
GitLab