From 5dff230eb1c3c72dd9f0e8fbf2040d65b96e1fba Mon Sep 17 00:00:00 2001 From: Robert Haas <rhaas@postgresql.org> Date: Fri, 20 Jan 2017 15:55:45 -0500 Subject: [PATCH] Avoid useless respawining the autovacuum launcher at high speed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When (1) autovacuum = off and (2) there's at least one database with an XID age greater than autovacuum_freeze_max_age and (3) all tables in that database that need vacuuming are already being processed by a worker and (4) the autovacuum launcher is started, a kind of infinite loop occurs. The launcher starts a worker and immediately exits. The worker, finding no worker to do, immediately starts the launcher, supposedly so that the next database can be processed. But because datfrozenxid for that database hasn't been advanced yet, the new worker gets put right back into the same database as the old one, where it once again starts the launcher and exits. High-speed ping pong ensues. There are several possible ways to break the cycle; this seems like the safest one. Amit Khandekar (code) and Robert Haas (comments), reviewed by Ãlvaro Herrera. Discussion: http://postgr.es/m/CAJ3gD9eWejf72HKquKSzax0r+epS=nAbQKNnykkMA0E8c+rMDg@mail.gmail.com --- src/backend/postmaster/autovacuum.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 7a75d828f77..85fd22e53cb 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -1927,6 +1927,8 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; + bool did_vacuum = false; + bool found_concurrent_worker = false; /* * StartTransactionCommand and CommitTransactionCommand will automatically @@ -2243,6 +2245,7 @@ do_autovacuum(void) if (worker->wi_tableoid == relid) { skipit = true; + found_concurrent_worker = true; break; } @@ -2373,6 +2376,8 @@ do_autovacuum(void) } PG_END_TRY(); + did_vacuum = true; + /* the PGXACT flags are reset at the next end of transaction */ /* be tidy */ @@ -2410,8 +2415,25 @@ deleted: /* * Update pg_database.datfrozenxid, and truncate pg_clog if possible. We * only need to do this once, not after each table. + * + * Even if we didn't vacuum anything, it may still be important to do + * this, because one indirect effect of vac_update_datfrozenxid() is to + * update ShmemVariableCache->xidVacLimit. That might need to be done + * even if we haven't vacuumed anything, because relations with older + * relfrozenxid values or other databases with older datfrozenxid values + * might have been dropped, allowing xidVacLimit to advance. + * + * However, it's also important not to do this blindly in all cases, + * because when autovacuum=off this will restart the autovacuum launcher. + * If we're not careful, an infinite loop can result, where workers find + * no work to do and restart the launcher, which starts another worker in + * the same database that finds no work to do. To prevent that, we skip + * this if (1) we found no work to do and (2) we skipped at least one + * table due to concurrent autovacuum activity. In that case, the other + * worker has already done it, or will do so when it finishes. */ - vac_update_datfrozenxid(); + if (did_vacuum || !found_concurrent_worker) + vac_update_datfrozenxid(); /* Finally close out the last transaction. */ CommitTransactionCommand(); -- GitLab