From 71d05a2c7b82379bb1013a0e338906349c54ed85 Mon Sep 17 00:00:00 2001 From: Robert Haas <rhaas@postgresql.org> Date: Fri, 17 Jun 2016 17:37:30 -0400 Subject: [PATCH] pg_visibility: Add pg_truncate_visibility_map function. This requires some core changes as well so that we can properly WAL-log the truncation. Specifically, it changes the format of the XLOG_SMGR_TRUNCATE WAL record, so bump XLOG_PAGE_MAGIC. Patch by me, reviewed but not fully endorsed by Andres Freund. --- .../pg_visibility/pg_visibility--1.0--1.1.sql | 7 ++ contrib/pg_visibility/pg_visibility--1.1.sql | 8 +++ contrib/pg_visibility/pg_visibility.c | 72 +++++++++++++++++++ doc/src/sgml/pgvisibility.sgml | 30 ++++++-- src/backend/access/rmgrdesc/smgrdesc.c | 3 +- src/backend/catalog/storage.c | 16 +++-- src/include/access/xlog_internal.h | 2 +- src/include/catalog/storage_xlog.h | 8 +++ 8 files changed, 133 insertions(+), 13 deletions(-) diff --git a/contrib/pg_visibility/pg_visibility--1.0--1.1.sql b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql index 2c97dfd03c2..378824c8bf9 100644 --- a/contrib/pg_visibility/pg_visibility--1.0--1.1.sql +++ b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql @@ -13,5 +13,12 @@ RETURNS SETOF tid AS 'MODULE_PATHNAME', 'pg_check_visible' LANGUAGE C STRICT; +CREATE FUNCTION pg_truncate_visibility_map(regclass) +RETURNS void +AS 'MODULE_PATHNAME', 'pg_truncate_visibility_map' +LANGUAGE C STRICT +PARALLEL UNSAFE; -- let's not make this any more dangerous + REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_truncate_visibility_map(regclass) FROM PUBLIC; diff --git a/contrib/pg_visibility/pg_visibility--1.1.sql b/contrib/pg_visibility/pg_visibility--1.1.sql index b49b644996f..0a29967ee6d 100644 --- a/contrib/pg_visibility/pg_visibility--1.1.sql +++ b/contrib/pg_visibility/pg_visibility--1.1.sql @@ -57,6 +57,13 @@ RETURNS SETOF tid AS 'MODULE_PATHNAME', 'pg_check_visible' LANGUAGE C STRICT; +-- Truncate the visibility map fork. +CREATE FUNCTION pg_truncate_visibility_map(regclass) +RETURNS void +AS 'MODULE_PATHNAME', 'pg_truncate_visibility_map' +LANGUAGE C STRICT +PARALLEL UNSAFE; -- let's not make this any more dangerous + -- Don't want these to be available to public. REVOKE ALL ON FUNCTION pg_visibility_map(regclass, bigint) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_visibility(regclass, bigint) FROM PUBLIC; @@ -65,3 +72,4 @@ REVOKE ALL ON FUNCTION pg_visibility(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_visibility_map_summary(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_truncate_visibility_map(regclass) FROM PUBLIC; diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index abb92f388a3..70340666630 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -11,10 +11,12 @@ #include "access/htup_details.h" #include "access/visibilitymap.h" #include "catalog/pg_type.h" +#include "catalog/storage_xlog.h" #include "funcapi.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/procarray.h" +#include "storage/smgr.h" #include "utils/rel.h" PG_MODULE_MAGIC; @@ -40,6 +42,7 @@ PG_FUNCTION_INFO_V1(pg_visibility_rel); PG_FUNCTION_INFO_V1(pg_visibility_map_summary); PG_FUNCTION_INFO_V1(pg_check_frozen); PG_FUNCTION_INFO_V1(pg_check_visible); +PG_FUNCTION_INFO_V1(pg_truncate_visibility_map); static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd); static vbits *collect_visibility_data(Oid relid, bool include_pd); @@ -335,6 +338,75 @@ pg_check_visible(PG_FUNCTION_ARGS) SRF_RETURN_DONE(funcctx); } +/* + * Remove the visibility map fork for a relation. If there turn out to be + * any bugs in the visibility map code that require rebuilding the VM, this + * provides users with a way to do it that is cleaner than shutting down the + * server and removing files by hand. + * + * This is a cut-down version of RelationTruncate. + */ +Datum +pg_truncate_visibility_map(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + + rel = relation_open(relid, AccessExclusiveLock); + + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW && + rel->rd_rel->relkind != RELKIND_TOASTVALUE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table, materialized view, or TOAST table", + RelationGetRelationName(rel)))); + + RelationOpenSmgr(rel); + rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber; + + visibilitymap_truncate(rel, 0); + + if (RelationNeedsWAL(rel)) + { + xl_smgr_truncate xlrec; + + xlrec.blkno = 0; + xlrec.rnode = rel->rd_node; + xlrec.flags = SMGR_TRUNCATE_VM; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(xlrec)); + + XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE); + } + + /* + * Release the lock right away, not at commit time. + * + * It would be a problem to release the lock prior to commit if this + * truncate operation sends any transactional invalidation messages. Other + * backends would potentially be able to lock the relation without + * processing them in the window of time between when we release the lock + * here and when we sent the messages at our eventual commit. However, + * we're currently only sending a non-transactional smgr invalidation, + * which will have been posted to shared memory immediately from within + * visibilitymap_truncate. Therefore, there should be no race here. + * + * The reason why it's desirable to release the lock early here is because + * of the possibility that someone will need to use this to blow away many + * visibility map forks at once. If we can't release the lock until + * commit time, the transaction doing this will accumulate + * AccessExclusiveLocks on all of those relations at the same time, which + * is undesirable. However, if this turns out to be unsafe we may have no + * choice... + */ + relation_close(rel, AccessExclusiveLock); + + /* Nothing to return. */ + PG_RETURN_VOID(); +} + /* * Helper function to construct whichever TupleDesc we need for a particular * call. diff --git a/doc/src/sgml/pgvisibility.sgml b/doc/src/sgml/pgvisibility.sgml index 4cdca7dada9..44e83de7289 100644 --- a/doc/src/sgml/pgvisibility.sgml +++ b/doc/src/sgml/pgvisibility.sgml @@ -9,14 +9,16 @@ <para> The <filename>pg_visibility</> module provides a means for examining the - visibility map (VM) and page-level visibility information. + visibility map (VM) and page-level visibility information. It also + provides functions to check the integrity of the visibility map and to + force it to be rebuilt. </para> <para> - These routines return information about three different bits. The - all-visible bit in the visibility map indicates that every tuple on - a given page of a relation is visible to every current transaction. The - all-frozen bit in the visibility map indicates that every tuple on the + Three different bits are used to store information about page-level + visibility. The all-visible bit in the visibility map indicates that every + tuple on a given page of a relation is visible to every current transaction. + The all-frozen bit in the visibility map indicates that every tuple on the page is frozen; that is, no future vacuum will need to modify the page until such time as a tuple is inserted, updated, deleted, or locked on that page. The page-level <literal>PD_ALL_VISIBLE</literal> bit has the @@ -25,7 +27,8 @@ will normally agree, but the page-level bit can sometimes be set while the visibility map bit is clear after a crash recovery; or they can disagree because of a change which occurs after <literal>pg_visibility</> examines - the visibility map and before it examines the data page. + the visibility map and before it examines the data page. Any event which + causes data corruption can also cause these bits to disagree. </para> <para> @@ -118,6 +121,21 @@ </para> </listitem> </varlistentry> + + <varlistentry> + <term><function>pg_truncate_visibility_map(regclass) returns void</function></term> + + <listitem> + <para> + Truncates the visibility map for the given relation. This function + is only expected to be useful if you suspect that the visibility map + for the indicated relation is corrupt and wish to rebuild it. The first + <command>VACUUM</> executed on the given relation after this function + is executed will scan every page in the relation and rebuild the + visibility map. + </para> + </listitem> + </varlistentry> </variablelist> <para> diff --git a/src/backend/access/rmgrdesc/smgrdesc.c b/src/backend/access/rmgrdesc/smgrdesc.c index 0c6e5832a1c..242d79a1366 100644 --- a/src/backend/access/rmgrdesc/smgrdesc.c +++ b/src/backend/access/rmgrdesc/smgrdesc.c @@ -37,7 +37,8 @@ smgr_desc(StringInfo buf, XLogReaderState *record) xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec; char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM); - appendStringInfo(buf, "%s to %u blocks", path, xlrec->blkno); + appendStringInfo(buf, "%s to %u blocks flags %d", path, + xlrec->blkno, xlrec->flags); pfree(path); } } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 67f19063264..0d8311c4038 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -268,6 +268,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) xlrec.blkno = nblocks; xlrec.rnode = rel->rd_node; + xlrec.flags = SMGR_TRUNCATE_ALL; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xlrec)); @@ -522,17 +523,22 @@ smgr_redo(XLogReaderState *record) */ XLogFlush(lsn); - smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno); + if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0) + { + smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno); - /* Also tell xlogutils.c about it */ - XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); + /* Also tell xlogutils.c about it */ + XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); + } /* Truncate FSM and VM too */ rel = CreateFakeRelcacheEntry(xlrec->rnode); - if (smgrexists(reln, FSM_FORKNUM)) + if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 && + smgrexists(reln, FSM_FORKNUM)) FreeSpaceMapTruncateRel(rel, xlrec->blkno); - if (smgrexists(reln, VISIBILITYMAP_FORKNUM)) + if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 && + smgrexists(reln, VISIBILITYMAP_FORKNUM)) visibilitymap_truncate(rel, xlrec->blkno); FreeFakeRelcacheEntry(rel); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index af2944c1b35..2627519ba08 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -31,7 +31,7 @@ /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD091 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD092 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { diff --git a/src/include/catalog/storage_xlog.h b/src/include/catalog/storage_xlog.h index 7207e8be727..500e663b5fb 100644 --- a/src/include/catalog/storage_xlog.h +++ b/src/include/catalog/storage_xlog.h @@ -36,10 +36,18 @@ typedef struct xl_smgr_create ForkNumber forkNum; } xl_smgr_create; +/* flags for xl_smgr_truncate */ +#define SMGR_TRUNCATE_HEAP 0x0001 +#define SMGR_TRUNCATE_VM 0x0002 +#define SMGR_TRUNCATE_FSM 0x0004 +#define SMGR_TRUNCATE_ALL \ + (SMGR_TRUNCATE_HEAP|SMGR_TRUNCATE_VM|SMGR_TRUNCATE_FSM) + typedef struct xl_smgr_truncate { BlockNumber blkno; RelFileNode rnode; + int flags; } xl_smgr_truncate; extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum); -- GitLab