From 3e701a04febb0c423d36ce0c47721a4c276439a2 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 13 Mar 2008 18:00:32 +0000
Subject: [PATCH] Fix heap_page_prune's problem with failing to send cache
 invalidation messages if the calling transaction aborts later on.  Collapsing
 out line pointer redirects is a done deal as soon as we complete the page
 update, so syscache *must* be notified even if the VACUUM FULL as a whole
 doesn't complete.  To fix, add some functionality to inval.c to allow the
 pending inval messages to be sent immediately while heap_page_prune is still
 running.  The implementation is a bit chintzy: it will only work in the
 context of VACUUM FULL.  But that's all we need now, and it can always be
 extended later if needed.  Per my trouble report of a week ago.

---
 src/backend/access/heap/pruneheap.c | 37 +++++++----
 src/backend/utils/cache/inval.c     | 95 ++++++++++++++++++++++++++++-
 src/include/utils/inval.h           |  6 +-
 3 files changed, 124 insertions(+), 14 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 09c55ee7615..3db5d0b82e6 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.7 2008/03/08 21:57:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.8 2008/03/13 18:00:32 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -158,7 +158,14 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
 	 * much logic as possible out of the critical section, and also ensures
 	 * that WAL replay will work the same as the normal case.
 	 *
-	 * First, initialize the new pd_prune_xid value to zero (indicating no
+	 * First, inform inval.c that upcoming CacheInvalidateHeapTuple calls
+	 * are nontransactional.
+	 */
+	if (redirect_move)
+		BeginNonTransactionalInvalidation();
+
+	/*
+	 * Initialize the new pd_prune_xid value to zero (indicating no
 	 * prunable tuples).  If we find any tuples which may soon become
 	 * prunable, we will save the lowest relevant XID in new_prune_xid.
 	 * Also initialize the rest of our working state.
@@ -191,6 +198,18 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
 									 redirect_move);
 	}
 
+	/*
+	 * Send invalidation messages for any tuples we are about to move.
+	 * It is safe to do this now, even though we could theoretically still
+	 * fail before making the actual page update, because a useless cache
+	 * invalidation doesn't hurt anything.  Also, no one else can reload the
+	 * tuples while we have exclusive buffer lock, so it's not too early to
+	 * send the invals.  This avoids sending the invals while inside the
+	 * critical section, which is a good thing for robustness.
+	 */
+	if (redirect_move)
+		EndNonTransactionalInvalidation();
+
 	/* Any error while applying the changes is critical */
 	START_CRIT_SECTION();
 
@@ -587,16 +606,10 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
 	/*
 	 * If we are going to implement a redirect by moving tuples, we have
 	 * to issue a cache invalidation against the redirection target tuple,
-	 * because its CTID will be effectively changed by the move.  It is
-	 * safe to do this now, even though we might fail before reaching the
-	 * actual page update, because a useless cache invalidation doesn't
-	 * hurt anything.  Furthermore we really really don't want to issue
-	 * the inval inside the critical section, since it has a nonzero
-	 * chance of causing an error (eg, due to running out of memory).
-	 *
-	 * XXX this is not really right because CacheInvalidateHeapTuple
-	 * expects transactional semantics, and our move isn't transactional.
-	 * FIXME!!
+	 * because its CTID will be effectively changed by the move.  Note that
+	 * CacheInvalidateHeapTuple only queues the request, it doesn't send it;
+	 * if we fail before reaching EndNonTransactionalInvalidation, nothing
+	 * happens and no harm is done.
 	 */
 	if (OffsetNumberIsValid(redirect_target))
 	{
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 033ae3a2d62..ccb002f0f5d 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -80,7 +80,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.83 2008/01/01 19:45:53 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.84 2008/03/13 18:00:32 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -964,6 +964,99 @@ CommandEndInvalidationMessages(void)
 							   &transInvalInfo->CurrentCmdInvalidMsgs);
 }
 
+
+/*
+ * BeginNonTransactionalInvalidation
+ *		Prepare for invalidation messages for nontransactional updates.
+ *
+ * A nontransactional invalidation is one that must be sent whether or not
+ * the current transaction eventually commits.  We arrange for all invals
+ * queued between this call and EndNonTransactionalInvalidation() to be sent
+ * immediately when the latter is called.
+ *
+ * Currently, this is only used by heap_page_prune(), and only when it is
+ * invoked during VACUUM FULL's first pass over a table.  We expect therefore
+ * that we are not inside a subtransaction and there are no already-pending
+ * invalidations.  This could be relaxed by setting up a new nesting level of
+ * invalidation data, but for now there's no need.  Note that heap_page_prune
+ * knows that this function does not change any state, and therefore there's
+ * no need to worry about cleaning up if there's an elog(ERROR) before
+ * reaching EndNonTransactionalInvalidation (the invals will just be thrown
+ * away if that happens).
+ */
+void
+BeginNonTransactionalInvalidation(void)
+{
+	/* Must be at top of stack */
+	Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
+
+	/* Must not have any previously-queued activity */
+	Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL);
+	Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL);
+	Assert(transInvalInfo->CurrentCmdInvalidMsgs.cclist == NULL);
+	Assert(transInvalInfo->CurrentCmdInvalidMsgs.rclist == NULL);
+	Assert(transInvalInfo->RelcacheInitFileInval == false);
+}
+
+/*
+ * EndNonTransactionalInvalidation
+ *		Process queued-up invalidation messages for nontransactional updates.
+ *
+ * We expect to find messages in CurrentCmdInvalidMsgs only (else there
+ * was a CommandCounterIncrement within the "nontransactional" update).
+ * We must process them locally and send them out to the shared invalidation
+ * message queue.
+ *
+ * We must also reset the lists to empty and explicitly free memory (we can't
+ * rely on end-of-transaction cleanup for that).
+ */
+void
+EndNonTransactionalInvalidation(void)
+{
+	InvalidationChunk *chunk;
+	InvalidationChunk *next;
+
+	/* Must be at top of stack */
+	Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
+
+	/* Must not have any prior-command messages */
+	Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL);
+	Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL);
+
+	/*
+	 * At present, this function is only used for CTID-changing updates;
+	 * since the relcache init file doesn't store any tuple CTIDs, we
+	 * don't have to invalidate it.  That might not be true forever
+	 * though, in which case we'd need code similar to AtEOXact_Inval.
+	 */
+
+	/* Send out the invals */
+	ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
+								LocalExecuteInvalidationMessage);
+	ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
+								SendSharedInvalidMessage);
+
+	/* Clean up and release memory */
+	for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.cclist;
+		 chunk != NULL;
+		 chunk = next)
+	{
+		next = chunk->next;
+		pfree(chunk);
+	}
+	for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.rclist;
+		 chunk != NULL;
+		 chunk = next)
+	{
+		next = chunk->next;
+		pfree(chunk);
+	}
+	transInvalInfo->CurrentCmdInvalidMsgs.cclist = NULL;
+	transInvalInfo->CurrentCmdInvalidMsgs.rclist = NULL;
+	transInvalInfo->RelcacheInitFileInval = false;
+}
+
+
 /*
  * CacheInvalidateHeapTuple
  *		Register the given tuple for invalidation at end of command
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index df0e5f397b3..80c8c2ab331 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.41 2008/01/01 19:45:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.42 2008/03/13 18:00:32 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,6 +37,10 @@ extern void PostPrepare_Inval(void);
 
 extern void CommandEndInvalidationMessages(void);
 
+extern void BeginNonTransactionalInvalidation(void);
+
+extern void EndNonTransactionalInvalidation(void);
+
 extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple);
 
 extern void CacheInvalidateRelcache(Relation relation);
-- 
GitLab