From 52ca7572c3642ccbb46a619c03efe1928811ceae Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Mon, 30 Oct 2017 15:52:13 +0100
Subject: [PATCH] Fix autovacuum work item error handling

In autovacuum's "work item" processing, a few strings were allocated in
the current transaction's memory context, which goes away during error
handling; if an error happened during execution of the work item, the
pfree() calls to clean up afterwards would try to release already-released
memory, possibly leading to a crash.  In branch master, this was already
fixed by commit 335f3d04e4c8, so backpatch that to REL_10_STABLE to fix
the problem there too.

As a secondary problem, verify that the autovacuum worker is connected
to the right database for each work item; otherwise some items would be
discarded by workers in other databases.

Reported-by: Justin Pryzby
Discussion: https://postgr.es/m/20171014035732.GB31726@telsasoft.com
---
 src/backend/postmaster/autovacuum.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 776b1c0a9d5..58f1714b03f 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2444,8 +2444,10 @@ do_autovacuum(void)
 		 */
 		PG_TRY();
 		{
+			/* Use PortalContext for any per-table allocations */
+			MemoryContextSwitchTo(PortalContext);
+
 			/* have at it */
-			MemoryContextSwitchTo(TopTransactionContext);
 			autovacuum_do_vac_analyze(tab, bstrategy);
 
 			/*
@@ -2482,6 +2484,9 @@ do_autovacuum(void)
 		}
 		PG_END_TRY();
 
+		/* Make sure we're back in AutovacMemCxt */
+		MemoryContextSwitchTo(AutovacMemCxt);
+
 		did_vacuum = true;
 
 		/* the PGXACT flags are reset at the next end of transaction */
@@ -2525,6 +2530,8 @@ deleted:
 			continue;
 		if (workitem->avw_active)
 			continue;
+		if (workitem->avw_database != MyDatabaseId)
+			continue;
 
 		/* claim this one, and release lock while performing it */
 		workitem->avw_active = true;
@@ -2533,8 +2540,7 @@ deleted:
 		perform_work_item(workitem);
 
 		/*
-		 * Check for config changes before acquiring lock for further
-		 * jobs.
+		 * Check for config changes before acquiring lock for further jobs.
 		 */
 		CHECK_FOR_INTERRUPTS();
 		if (got_SIGHUP)
@@ -2601,10 +2607,9 @@ perform_work_item(AutoVacuumWorkItem *workitem)
 	/*
 	 * Save the relation name for a possible error message, to avoid a catalog
 	 * lookup in case of an error.  If any of these return NULL, then the
-	 * relation has been dropped since last we checked; skip it. Note: they
-	 * must live in a long-lived memory context because we call vacuum and
-	 * analyze in different transactions.
+	 * relation has been dropped since last we checked; skip it.
 	 */
+	Assert(CurrentMemoryContext == AutovacMemCxt);
 
 	cur_relname = get_rel_name(workitem->avw_relation);
 	cur_nspname = get_namespace_name(get_rel_namespace(workitem->avw_relation));
@@ -2614,6 +2619,9 @@ perform_work_item(AutoVacuumWorkItem *workitem)
 
 	autovac_report_workitem(workitem, cur_nspname, cur_datname);
 
+	/* clean up memory before each work item */
+	MemoryContextResetAndDeleteChildren(PortalContext);
+
 	/*
 	 * We will abort the current work item if something errors out, and
 	 * continue with the next one; in particular, this happens if we are
@@ -2622,9 +2630,10 @@ perform_work_item(AutoVacuumWorkItem *workitem)
 	 */
 	PG_TRY();
 	{
-		/* have at it */
-		MemoryContextSwitchTo(TopTransactionContext);
+		/* Use PortalContext for any per-work-item allocations */
+		MemoryContextSwitchTo(PortalContext);
 
+		/* have at it */
 		switch (workitem->avw_type)
 		{
 			case AVW_BRINSummarizeRange:
@@ -2668,6 +2677,9 @@ perform_work_item(AutoVacuumWorkItem *workitem)
 	}
 	PG_END_TRY();
 
+	/* Make sure we're back in AutovacMemCxt */
+	MemoryContextSwitchTo(AutovacMemCxt);
+
 	/* We intentionally do not set did_vacuum here */
 
 	/* be tidy */
-- 
GitLab