From 71d05a2c7b82379bb1013a0e338906349c54ed85 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Fri, 17 Jun 2016 17:37:30 -0400
Subject: [PATCH] pg_visibility: Add pg_truncate_visibility_map function.

This requires some core changes as well so that we can properly
WAL-log the truncation.  Specifically, it changes the format of the
XLOG_SMGR_TRUNCATE WAL record, so bump XLOG_PAGE_MAGIC.

Patch by me, reviewed but not fully endorsed by Andres Freund.
---
 .../pg_visibility/pg_visibility--1.0--1.1.sql |  7 ++
 contrib/pg_visibility/pg_visibility--1.1.sql  |  8 +++
 contrib/pg_visibility/pg_visibility.c         | 72 +++++++++++++++++++
 doc/src/sgml/pgvisibility.sgml                | 30 ++++++--
 src/backend/access/rmgrdesc/smgrdesc.c        |  3 +-
 src/backend/catalog/storage.c                 | 16 +++--
 src/include/access/xlog_internal.h            |  2 +-
 src/include/catalog/storage_xlog.h            |  8 +++
 8 files changed, 133 insertions(+), 13 deletions(-)

diff --git a/contrib/pg_visibility/pg_visibility--1.0--1.1.sql b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql
index 2c97dfd03c2..378824c8bf9 100644
--- a/contrib/pg_visibility/pg_visibility--1.0--1.1.sql
+++ b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql
@@ -13,5 +13,12 @@ RETURNS SETOF tid
 AS 'MODULE_PATHNAME', 'pg_check_visible'
 LANGUAGE C STRICT;
 
+CREATE FUNCTION pg_truncate_visibility_map(regclass)
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_truncate_visibility_map'
+LANGUAGE C STRICT
+PARALLEL UNSAFE;  -- let's not make this any more dangerous
+
 REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC;
 REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC;
+REVOKE ALL ON FUNCTION pg_truncate_visibility_map(regclass) FROM PUBLIC;
diff --git a/contrib/pg_visibility/pg_visibility--1.1.sql b/contrib/pg_visibility/pg_visibility--1.1.sql
index b49b644996f..0a29967ee6d 100644
--- a/contrib/pg_visibility/pg_visibility--1.1.sql
+++ b/contrib/pg_visibility/pg_visibility--1.1.sql
@@ -57,6 +57,13 @@ RETURNS SETOF tid
 AS 'MODULE_PATHNAME', 'pg_check_visible'
 LANGUAGE C STRICT;
 
+-- Truncate the visibility map fork.
+CREATE FUNCTION pg_truncate_visibility_map(regclass)
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_truncate_visibility_map'
+LANGUAGE C STRICT
+PARALLEL UNSAFE;  -- let's not make this any more dangerous
+
 -- Don't want these to be available to public.
 REVOKE ALL ON FUNCTION pg_visibility_map(regclass, bigint) FROM PUBLIC;
 REVOKE ALL ON FUNCTION pg_visibility(regclass, bigint) FROM PUBLIC;
@@ -65,3 +72,4 @@ REVOKE ALL ON FUNCTION pg_visibility(regclass) FROM PUBLIC;
 REVOKE ALL ON FUNCTION pg_visibility_map_summary(regclass) FROM PUBLIC;
 REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC;
 REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC;
+REVOKE ALL ON FUNCTION pg_truncate_visibility_map(regclass) FROM PUBLIC;
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index abb92f388a3..70340666630 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -11,10 +11,12 @@
 #include "access/htup_details.h"
 #include "access/visibilitymap.h"
 #include "catalog/pg_type.h"
+#include "catalog/storage_xlog.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "storage/procarray.h"
+#include "storage/smgr.h"
 #include "utils/rel.h"
 
 PG_MODULE_MAGIC;
@@ -40,6 +42,7 @@ PG_FUNCTION_INFO_V1(pg_visibility_rel);
 PG_FUNCTION_INFO_V1(pg_visibility_map_summary);
 PG_FUNCTION_INFO_V1(pg_check_frozen);
 PG_FUNCTION_INFO_V1(pg_check_visible);
+PG_FUNCTION_INFO_V1(pg_truncate_visibility_map);
 
 static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
 static vbits *collect_visibility_data(Oid relid, bool include_pd);
@@ -335,6 +338,75 @@ pg_check_visible(PG_FUNCTION_ARGS)
 	SRF_RETURN_DONE(funcctx);
 }
 
+/*
+ * Remove the visibility map fork for a relation.  If there turn out to be
+ * any bugs in the visibility map code that require rebuilding the VM, this
+ * provides users with a way to do it that is cleaner than shutting down the
+ * server and removing files by hand.
+ *
+ * This is a cut-down version of RelationTruncate.
+ */
+Datum
+pg_truncate_visibility_map(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	Relation	rel;
+
+	rel = relation_open(relid, AccessExclusiveLock);
+
+	if (rel->rd_rel->relkind != RELKIND_RELATION &&
+		rel->rd_rel->relkind != RELKIND_MATVIEW &&
+		rel->rd_rel->relkind != RELKIND_TOASTVALUE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+		   errmsg("\"%s\" is not a table, materialized view, or TOAST table",
+				  RelationGetRelationName(rel))));
+
+	RelationOpenSmgr(rel);
+	rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
+
+	visibilitymap_truncate(rel, 0);
+
+	if (RelationNeedsWAL(rel))
+	{
+		xl_smgr_truncate xlrec;
+
+		xlrec.blkno = 0;
+		xlrec.rnode = rel->rd_node;
+		xlrec.flags = SMGR_TRUNCATE_VM;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+
+		XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
+	}
+
+	/*
+	 * Release the lock right away, not at commit time.
+	 *
+	 * It would be a problem to release the lock prior to commit if this
+	 * truncate operation sends any transactional invalidation messages. Other
+	 * backends would potentially be able to lock the relation without
+	 * processing them in the window of time between when we release the lock
+	 * here and when we sent the messages at our eventual commit.  However,
+	 * we're currently only sending a non-transactional smgr invalidation,
+	 * which will have been posted to shared memory immediately from within
+	 * visibilitymap_truncate.  Therefore, there should be no race here.
+	 *
+	 * The reason why it's desirable to release the lock early here is because
+	 * of the possibility that someone will need to use this to blow away many
+	 * visibility map forks at once.  If we can't release the lock until
+	 * commit time, the transaction doing this will accumulate
+	 * AccessExclusiveLocks on all of those relations at the same time, which
+	 * is undesirable. However, if this turns out to be unsafe we may have no
+	 * choice...
+	 */
+	relation_close(rel, AccessExclusiveLock);
+
+	/* Nothing to return. */
+	PG_RETURN_VOID();
+}
+
 /*
  * Helper function to construct whichever TupleDesc we need for a particular
  * call.
diff --git a/doc/src/sgml/pgvisibility.sgml b/doc/src/sgml/pgvisibility.sgml
index 4cdca7dada9..44e83de7289 100644
--- a/doc/src/sgml/pgvisibility.sgml
+++ b/doc/src/sgml/pgvisibility.sgml
@@ -9,14 +9,16 @@
 
  <para>
   The <filename>pg_visibility</> module provides a means for examining the
-  visibility map (VM) and page-level visibility information.
+  visibility map (VM) and page-level visibility information.  It also
+  provides functions to check the integrity of the visibility map and to
+  force it to be rebuilt.
  </para>
 
  <para>
-  These routines return information about three different bits.  The
-  all-visible bit in the visibility map indicates that every tuple on
-  a given page of a relation is visible to every current transaction.  The
-  all-frozen bit in the visibility map indicates that every tuple on the
+  Three different bits are used to store information about page-level
+  visibility.  The all-visible bit in the visibility map indicates that every
+  tuple on a given page of a relation is visible to every current transaction.
+  The all-frozen bit in the visibility map indicates that every tuple on the
   page is frozen; that is, no future vacuum will need to modify the page
   until such time as a tuple is inserted, updated, deleted, or locked on
   that page.  The page-level <literal>PD_ALL_VISIBLE</literal> bit has the
@@ -25,7 +27,8 @@
   will normally agree, but the page-level bit can sometimes be set while the
   visibility map bit is clear after a crash recovery; or they can disagree
   because of a change which occurs after <literal>pg_visibility</> examines
-  the visibility map and before it examines the data page.
+  the visibility map and before it examines the data page.  Any event which
+  causes data corruption can also cause these bits to disagree.
  </para>
 
  <para>
@@ -118,6 +121,21 @@
      </para>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term><function>pg_truncate_visibility_map(regclass) returns void</function></term>
+
+    <listitem>
+     <para>
+      Truncates the visibility map for the given relation.  This function
+      is only expected to be useful if you suspect that the visibility map
+      for the indicated relation is corrupt and wish to rebuild it.  The first
+      <command>VACUUM</> executed on the given relation after this function
+      is executed will scan every page in the relation and rebuild the
+      visibility map.
+     </para>
+    </listitem>
+   </varlistentry>
   </variablelist>
 
   <para>
diff --git a/src/backend/access/rmgrdesc/smgrdesc.c b/src/backend/access/rmgrdesc/smgrdesc.c
index 0c6e5832a1c..242d79a1366 100644
--- a/src/backend/access/rmgrdesc/smgrdesc.c
+++ b/src/backend/access/rmgrdesc/smgrdesc.c
@@ -37,7 +37,8 @@ smgr_desc(StringInfo buf, XLogReaderState *record)
 		xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec;
 		char	   *path = relpathperm(xlrec->rnode, MAIN_FORKNUM);
 
-		appendStringInfo(buf, "%s to %u blocks", path, xlrec->blkno);
+		appendStringInfo(buf, "%s to %u blocks flags %d", path,
+						 xlrec->blkno, xlrec->flags);
 		pfree(path);
 	}
 }
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 67f19063264..0d8311c4038 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -268,6 +268,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
 
 		xlrec.blkno = nblocks;
 		xlrec.rnode = rel->rd_node;
+		xlrec.flags = SMGR_TRUNCATE_ALL;
 
 		XLogBeginInsert();
 		XLogRegisterData((char *) &xlrec, sizeof(xlrec));
@@ -522,17 +523,22 @@ smgr_redo(XLogReaderState *record)
 		 */
 		XLogFlush(lsn);
 
-		smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno);
+		if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
+		{
+			smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno);
 
-		/* Also tell xlogutils.c about it */
-		XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno);
+			/* Also tell xlogutils.c about it */
+			XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno);
+		}
 
 		/* Truncate FSM and VM too */
 		rel = CreateFakeRelcacheEntry(xlrec->rnode);
 
-		if (smgrexists(reln, FSM_FORKNUM))
+		if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
+			smgrexists(reln, FSM_FORKNUM))
 			FreeSpaceMapTruncateRel(rel, xlrec->blkno);
-		if (smgrexists(reln, VISIBILITYMAP_FORKNUM))
+		if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
+			smgrexists(reln, VISIBILITYMAP_FORKNUM))
 			visibilitymap_truncate(rel, xlrec->blkno);
 
 		FreeFakeRelcacheEntry(rel);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index af2944c1b35..2627519ba08 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -31,7 +31,7 @@
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD091	/* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD092	/* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {
diff --git a/src/include/catalog/storage_xlog.h b/src/include/catalog/storage_xlog.h
index 7207e8be727..500e663b5fb 100644
--- a/src/include/catalog/storage_xlog.h
+++ b/src/include/catalog/storage_xlog.h
@@ -36,10 +36,18 @@ typedef struct xl_smgr_create
 	ForkNumber	forkNum;
 } xl_smgr_create;
 
+/* flags for xl_smgr_truncate */
+#define SMGR_TRUNCATE_HEAP		0x0001
+#define SMGR_TRUNCATE_VM		0x0002
+#define SMGR_TRUNCATE_FSM		0x0004
+#define SMGR_TRUNCATE_ALL		\
+	(SMGR_TRUNCATE_HEAP|SMGR_TRUNCATE_VM|SMGR_TRUNCATE_FSM)
+
 typedef struct xl_smgr_truncate
 {
 	BlockNumber blkno;
 	RelFileNode rnode;
+	int			flags;
 } xl_smgr_truncate;
 
 extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum);
-- 
GitLab