Newer
Older
/*-------------------------------------------------------------------------
*
* autovacuum.c
*
* PostgreSQL Integrated Autovacuum Daemon
*
Alvaro Herrera
committed
* The autovacuum system is structured in two different kinds of processes: the
* autovacuum launcher and the autovacuum worker. The launcher is an
* always-running process, started by the postmaster when the autovacuum GUC
* parameter is set. The launcher schedules autovacuum workers to be started
* when appropriate. The workers are the processes which execute the actual
* vacuuming; they connect to a database as determined in the launcher, and
* once connected they examine the catalogs to select the tables to vacuum.
*
* The autovacuum launcher cannot start the worker processes by itself,
* because doing so would cause robustness issues (namely, failure to shut
* them down on exceptional conditions, and also, since the launcher is
* connected to shared memory and is thus subject to corruption there, it is
* not as robust as the postmaster). So it leaves that task to the postmaster.
*
* There is an autovacuum shared memory area, where the launcher stores
* information about the database it wants vacuumed. When it wants a new
* worker to start, it sets a flag in shared memory and sends a signal to the
* postmaster. Then postmaster knows nothing more than it must start a worker;
* so it forks a new child, which turns into a worker. This new process
Alvaro Herrera
committed
* connects to shared memory, and there it can inspect the information that the
* launcher has set up.
*
* If the fork() call fails in the postmaster, it sets a flag in the shared
* memory area, and sends a signal to the launcher. The launcher, upon
* noticing the flag, can try starting the worker again by resending the
* signal. Note that the failure can only be transient (fork failure due to
Alvaro Herrera
committed
* high load, memory pressure, too many processes, etc); more permanent
* problems, like failure to connect to a database, are detected later in the
* worker and dealt with just by having the worker exit normally. The launcher
* will launch a new worker again later, per schedule.
*
* When the worker is done vacuuming it sends SIGUSR2 to the launcher. The
Alvaro Herrera
committed
* launcher then wakes up and is able to launch another worker, if the schedule
* is so tight that a new worker is needed immediately. At this time the
* launcher can also balance the settings for the various remaining workers'
* cost-based vacuum delay feature.
*
* Note that there can be more than one worker in a database concurrently.
* They will store the table they are currently vacuuming in shared memory, so
* that other workers avoid being blocked waiting for the vacuum lock for that
* table. They will also reload the pgstats data just before vacuuming each
* table, to avoid vacuuming a table that was just finished being vacuumed by
* another worker and thus is no longer noted in shared memory. However,
* there is a window (caused by pgstat delay) on which a worker may choose a
* table that was already vacuumed; this is a bug in the current design.
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/postmaster/autovacuum.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <signal.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include "access/heapam.h"
#include "access/reloptions.h"
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/dependency.h"
#include "catalog/namespace.h"
#include "catalog/pg_database.h"
#include "commands/dbcommands.h"
#include "commands/vacuum.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
Alvaro Herrera
committed
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/procsignal.h"
#include "storage/sinvaladt.h"
#include "tcop/tcopprot.h"
#include "utils/fmgroids.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
#include "utils/tqual.h"
/*
* GUC parameters
*/
bool autovacuum_start_daemon = false;
int autovacuum_max_workers;
int autovacuum_naptime;
int autovacuum_vac_thresh;
double autovacuum_vac_scale;
int autovacuum_anl_thresh;
double autovacuum_anl_scale;
int autovacuum_freeze_max_age;
int autovacuum_vac_cost_delay;
int autovacuum_vac_cost_limit;
int Log_autovacuum_min_duration = -1;
Alvaro Herrera
committed
/* how long to keep pgstat data in the launcher, in milliseconds */
#define STATS_READ_DELAY 1000
/* the minimum allowed time between two awakenings of the launcher */
#define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */
#define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */
/* Flags to tell if we are in an autovacuum process */
static bool am_autovacuum_launcher = false;
static bool am_autovacuum_worker = false;
/* Flags set by signal handlers */
static volatile sig_atomic_t got_SIGHUP = false;
static volatile sig_atomic_t got_SIGUSR2 = false;
static volatile sig_atomic_t got_SIGTERM = false;
/* Comparison point for determining whether freeze_max_age is exceeded */
static TransactionId recentXid;
/* Default freeze ages to use for autovacuum (varies by database) */
static int default_freeze_min_age;
static int default_freeze_table_age;
/* Memory context for long-lived data */
/* struct to keep track of databases in launcher */
typedef struct avl_dbase
Oid adl_datid; /* hash key -- must be first */
TimestampTz adl_next_worker;
int adl_score;
} avl_dbase;
/* struct to keep track of databases in worker */
typedef struct avw_dbase
{
Oid adw_datid;
char *adw_name;
TransactionId adw_frozenxid;
PgStat_StatDBEntry *adw_entry;
} avw_dbase;
/* struct to keep track of tables to vacuum and/or analyze, in 1st pass */
typedef struct av_relation
{
Oid ar_toastrelid; /* hash key - must be first */
bool ar_hasrelopts;
AutoVacOpts ar_reloptions; /* copy of AutoVacOpts from the main table's
* reloptions, or NULL if none */
} av_relation;
Alvaro Herrera
committed
/* struct to keep track of tables to vacuum and/or analyze, after rechecking */
typedef struct autovac_table
{
Alvaro Herrera
committed
Oid at_relid;
bool at_dovacuum;
bool at_doanalyze;
int at_freeze_min_age;
int at_freeze_table_age;
Alvaro Herrera
committed
int at_vacuum_cost_delay;
int at_vacuum_cost_limit;
bool at_dobalance;
bool at_wraparound;
Alvaro Herrera
committed
char *at_relname;
char *at_nspname;
char *at_datname;
} autovac_table;
/*-------------
* This struct holds information about a single worker's whereabouts. We keep
* an array of these in shared memory, sized according to
* autovacuum_max_workers.
*
* wi_links entry into free list or running list
* wi_dboid OID of the database this worker is supposed to work on
* wi_tableoid OID of the table currently being vacuumed, if any
* wi_sharedrel flag indicating whether table is marked relisshared
Alvaro Herrera
committed
* wi_proc pointer to PGPROC of the running worker, NULL if not started
* wi_launchtime Time at which this worker was launched
* wi_cost_* Vacuum cost-based delay parameters current in this worker
*
* All fields are protected by AutovacuumLock, except for wi_tableoid which is
* protected by AutovacuumScheduleLock (which is read-only for everyone except
* that worker itself).
*-------------
*/
typedef struct WorkerInfoData
{
SHM_QUEUE wi_links;
Oid wi_dboid;
Oid wi_tableoid;
Alvaro Herrera
committed
PGPROC *wi_proc;
bool wi_dobalance;
int wi_cost_delay;
int wi_cost_limit;
int wi_cost_limit_base;
typedef struct WorkerInfoData *WorkerInfo;
Alvaro Herrera
committed
/*
* Possible signals received by the launcher from remote processes. These are
* stored atomically in shared memory so that other processes can set them
* without locking.
*/
Alvaro Herrera
committed
{
AutoVacForkFailed, /* failed trying to start a worker */
AutoVacRebalance, /* rebalance the cost limits */
Heikki Linnakangas
committed
AutoVacNumSignals /* must be last */
Alvaro Herrera
committed
/*-------------
* The main autovacuum shmem struct. On shared memory we store this main
* struct and the array of WorkerInfo structs. This struct keeps:
*
Alvaro Herrera
committed
* av_signal set by other processes to indicate various conditions
* av_launcherpid the PID of the autovacuum launcher
* av_freeWorkers the WorkerInfo freelist
* av_runningWorkers the WorkerInfo non-free queue
* av_startingWorker pointer to WorkerInfo currently being started (cleared by
* the worker itself as soon as it's up and running)
*
Alvaro Herrera
committed
* This struct is protected by AutovacuumLock, except for av_signal and parts
* of the worker list (see above).
*-------------
*/
typedef struct
{
sig_atomic_t av_signal[AutoVacNumSignals];
pid_t av_launcherpid;
WorkerInfo av_freeWorkers;
WorkerInfo av_startingWorker;
} AutoVacuumShmemStruct;
static AutoVacuumShmemStruct *AutoVacuumShmem;
/* the database list in the launcher, and the context that contains it */
static Dllist *DatabaseList = NULL;
static MemoryContext DatabaseListCxt = NULL;
/* Pointer to my own WorkerInfo, valid on each worker */
/* PID of launcher, valid only in worker while shutting down */
#ifdef EXEC_BACKEND
static pid_t avlauncher_forkexec(void);
static pid_t avworker_forkexec(void);
NON_EXEC_STATIC void AutoVacWorkerMain(int argc, char *argv[]);
NON_EXEC_STATIC void AutoVacLauncherMain(int argc, char *argv[]);
static void launcher_determine_sleep(bool canlaunch, bool recursing,
static void launch_worker(TimestampTz now);
static List *get_database_list(void);
static void rebuild_database_list(Oid newdb);
static int db_comparator(const void *a, const void *b);
static void autovac_balance_cost(void);
static void do_autovacuum(void);
static void FreeWorkerInfo(int code, Datum arg);
static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
TupleDesc pg_class_desc);
static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
Form_pg_class classForm,
PgStat_StatTabEntry *tabentry,
bool *dovacuum, bool *doanalyze, bool *wraparound);
Alvaro Herrera
committed
static void autovacuum_do_vac_analyze(autovac_table *tab,
BufferAccessStrategy bstrategy);
static AutoVacOpts *extract_autovac_opts(HeapTuple tup,
TupleDesc pg_class_desc);
Alvaro Herrera
committed
static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
PgStat_StatDBEntry *shared,
PgStat_StatDBEntry *dbentry);
Alvaro Herrera
committed
static void autovac_report_activity(autovac_table *tab);
static void avl_sighup_handler(SIGNAL_ARGS);
static void avl_sigusr2_handler(SIGNAL_ARGS);
static void avl_sigterm_handler(SIGNAL_ARGS);
Alvaro Herrera
committed
static void autovac_refresh_stats(void);
/********************************************************************
********************************************************************/
#ifdef EXEC_BACKEND
* forkexec routine for the autovacuum launcher process.
* Format up the arglist, then fork and exec.
static pid_t
avlauncher_forkexec(void)
char *av[10];
int ac = 0;
av[ac++] = "postgres";
av[ac++] = "--forkavlauncher";
av[ac++] = NULL; /* filled in by postmaster_forkexec */
av[ac] = NULL;
Alvaro Herrera
committed
Assert(ac < lengthof(av));
return postmaster_forkexec(ac, av);
}
/*
* We need this set from the outside, before InitProcess is called
*/
void
AutovacuumLauncherIAm(void)
{
am_autovacuum_launcher = true;
}
#endif
/*
* Main entry point for autovacuum launcher process, to be called from the
* postmaster.
*/
int
StartAutoVacLauncher(void)
{
pid_t AutoVacPID;
#ifdef EXEC_BACKEND
switch ((AutoVacPID = avlauncher_forkexec()))
#endif
{
case -1:
ereport(LOG,
(errmsg("could not fork autovacuum launcher process: %m")));
return 0;
#ifndef EXEC_BACKEND
case 0:
/* in postmaster child ... */
/* Close the postmaster's sockets */
ClosePostmasterPorts(false);
/* Lose the postmaster's on-exit routines */
on_exit_reset();
AutoVacLauncherMain(0, NULL);
break;
#endif
default:
return (int) AutoVacPID;
}
/* shouldn't get here */
return 0;
}
/*
* Main loop for the autovacuum launcher process.
NON_EXEC_STATIC void
AutoVacLauncherMain(int argc, char *argv[])
sigjmp_buf local_sigjmp_buf;
/* we are a postmaster subprocess now */
IsUnderPostmaster = true;
am_autovacuum_launcher = true;
/* reset MyProcPid */
MyProcPid = getpid();
Andrew Dunstan
committed
/* record Start Time for logging */
MyStartTime = time(NULL);
/* Identify myself via ps */
init_ps_display("autovacuum launcher process", "", "", "");
ereport(LOG,
(errmsg("autovacuum launcher started")));
Alvaro Herrera
committed
if (PostAuthDelay)
pg_usleep(PostAuthDelay * 1000000L);
SetProcessingMode(InitProcessing);
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (autovacuum probably never has any
* child processes, but for consistency we make all postmaster child
* processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Set up signal handlers. We operate on databases much like a regular
* backend, so we use the same signal handling. See equivalent code in
* tcop/postgres.c.
*/
pqsignal(SIGHUP, avl_sighup_handler);
pqsignal(SIGINT, StatementCancelHandler);
pqsignal(SIGTERM, avl_sigterm_handler);
pqsignal(SIGQUIT, quickdie);
pqsignal(SIGALRM, handle_sig_alarm);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, procsignal_sigusr1_handler);
pqsignal(SIGUSR2, avl_sigusr2_handler);
pqsignal(SIGFPE, FloatExceptionHandler);
pqsignal(SIGCHLD, SIG_DFL);
/* Early initialization */
BaseInit();
/*
* Create a per-backend PGPROC struct in shared memory, except in the
* EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
* this before we can use LWLocks (and in the EXEC_BACKEND case we already
* had to do some stuff with LWLocks).
*/
#ifndef EXEC_BACKEND
InitProcess();
#endif
InitPostgres(NULL, InvalidOid, NULL, NULL);
SetProcessingMode(NormalProcessing);
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks.
*/
AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
"Autovacuum Launcher",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(AutovacMemCxt);
/*
* If an exception is encountered, processing resumes here.
*
* This code is a stripped down version of PostgresMain error recovery.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* since not using PG_TRY, must reset error stack by hand */
error_context_stack = NULL;
/* Prevents interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Forget any pending QueryCancel request */
QueryCancelPending = false;
disable_sig_alarm(true);
QueryCancelPending = false; /* again in case timeout occurred */
/* Report the error to the server log */
EmitErrorReport();
/* Abort the current transaction in order to recover */
AbortCurrentTransaction();
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.
*/
MemoryContextSwitchTo(AutovacMemCxt);
FlushErrorState();
/* Flush any leaked data in the top-level context */
MemoryContextResetAndDeleteChildren(AutovacMemCxt);
/* don't leave dangling pointers to freed memory */
DatabaseListCxt = NULL;
DatabaseList = NULL;
Alvaro Herrera
committed
/*
* Make sure pgstat also considers our stat data as gone. Note: we
* mustn't use autovac_refresh_stats here.
*/
pgstat_clear_snapshot();
/* Now we can allow interrupts again */
RESUME_INTERRUPTS();
/* if in shutdown mode, no need for anything further; just go away */
if (got_SIGTERM)
goto shutdown;
/*
* Sleep at least 1 second after any error. We don't want to be
* filling the error logs as fast as we can.
*/
pg_usleep(1000000L);
}
/* We can now handle ereport(ERROR) */
PG_exception_stack = &local_sigjmp_buf;
/* must unblock signals before calling rebuild_database_list */
PG_SETMASK(&UnBlockSig);
/*
* Force zero_damaged_pages OFF in the autovac process, even if it is set
* in postgresql.conf. We don't really want such a dangerous option being
* applied non-interactively.
*/
SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
/*
* Force statement_timeout to zero to avoid a timeout setting from
* preventing regular maintenance from being executed.
*/
SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
/*
* Force default_transaction_isolation to READ COMMITTED. We don't want
* to pay the overhead of serializable mode, nor add any risk of causing
* deadlocks or delaying other transactions.
*/
SetConfigOption("default_transaction_isolation", "read committed",
PGC_SUSET, PGC_S_OVERRIDE);
/*
* In emergency mode, just start a worker (unless shutdown was requested)
* and go away.
*/
if (!AutoVacuumingActive())
{
if (!got_SIGTERM)
do_start_worker();
}
AutoVacuumShmem->av_launcherpid = MyProcPid;
/*
* Create the initial database list. The invariant we want this list to
* keep is that it's ordered by decreasing next_time. As soon as an entry
* is updated to a higher time, it will be moved to the front (which is
* correct because the only operation is to add autovacuum_naptime to the
* entry, and time always increases).
*/
rebuild_database_list(InvalidOid);
/* loop until shutdown request */
while (!got_SIGTERM)
TimestampTz current_time = 0;
int rc;
/*
* This loop is a bit different from the normal use of WaitLatch,
* because we'd like to sleep before the first launch of a child
* process. So it's WaitLatch, then ResetLatch, then check for
* wakening conditions.
*/
launcher_determine_sleep((AutoVacuumShmem->av_freeWorkers != NULL),
false, &nap);
/* Allow sinval catchup interrupts while sleeping */
EnableCatchupInterrupt();
* Wait until naptime expires or we get some type of signal (all the
* signal handlers will wake us by calling SetLatch).
rc = WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
(nap.tv_sec * 1000L) + (nap.tv_usec / 1000L));
ResetLatch(&MyProc->procLatch);
DisableCatchupInterrupt();
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children.
*/
if (rc & WL_POSTMASTER_DEATH)
proc_exit(1);
/* the normal shutdown case */
if (got_SIGTERM)
break;
if (got_SIGHUP)
{
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
/* shutdown requested in config file? */
if (!AutoVacuumingActive())
Alvaro Herrera
committed
break;
/* rebalance in case the default cost parameters changed */
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
autovac_balance_cost();
LWLockRelease(AutovacuumLock);
/* rebuild the list in case the naptime changed */
rebuild_database_list(InvalidOid);
}
Alvaro Herrera
committed
/*
* a worker finished, or postmaster signalled failure to start a
* worker
*/
if (got_SIGUSR2)
{
got_SIGUSR2 = false;
/* rebalance cost limits, if needed */
Alvaro Herrera
committed
if (AutoVacuumShmem->av_signal[AutoVacRebalance])
{
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
Alvaro Herrera
committed
AutoVacuumShmem->av_signal[AutoVacRebalance] = false;
autovac_balance_cost();
LWLockRelease(AutovacuumLock);
}
Alvaro Herrera
committed
if (AutoVacuumShmem->av_signal[AutoVacForkFailed])
{
/*
* If the postmaster failed to start a new worker, we sleep
* for a little while and resend the signal. The new worker's
* state is still in memory, so this is sufficient. After
* that, we restart the main loop.
*
* XXX should we put a limit to the number of times we retry?
* I don't think it makes much sense, because a future start
* of a worker will continue to fail in the same way.
*/
AutoVacuumShmem->av_signal[AutoVacForkFailed] = false;
Alvaro Herrera
committed
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
continue;
}
}
/*
* There are some conditions that we need to check before trying to
* start a worker. First, we need to make sure that there is a
* worker slot available. Second, we need to make sure that no
*/
Alvaro Herrera
committed
current_time = GetCurrentTimestamp();
LWLockAcquire(AutovacuumLock, LW_SHARED);
can_launch = (AutoVacuumShmem->av_freeWorkers != NULL);
if (AutoVacuumShmem->av_startingWorker != NULL)
{
WorkerInfo worker = AutoVacuumShmem->av_startingWorker;
/*
* We can't launch another worker when another one is still
Alvaro Herrera
committed
* starting up (or failed while doing so), so just sleep for a bit
* more; that worker will wake us up again as soon as it's ready.
* We will only wait autovacuum_naptime seconds (up to a maximum
* of 60 seconds) for this to happen however. Note that failure
* to connect to a particular database is not a problem here,
* because the worker removes itself from the startingWorker
* pointer before trying to connect. Problems detected by the
* postmaster (like fork() failure) are also reported and handled
* differently. The only problems that may cause this code to
* fire are errors in the earlier sections of AutoVacWorkerMain,
* before the worker removes the WorkerInfo from the
* startingWorker pointer.
*/
Alvaro Herrera
committed
waittime = Min(autovacuum_naptime, 60) * 1000;
Alvaro Herrera
committed
if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time,
Alvaro Herrera
committed
waittime))
{
LWLockRelease(AutovacuumLock);
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
/*
* No other process can put a worker in starting mode, so if
* startingWorker is still INVALID after exchanging our lock,
* we assume it's the same one we saw above (so we don't
* recheck the launch time).
*/
if (AutoVacuumShmem->av_startingWorker != NULL)
{
worker = AutoVacuumShmem->av_startingWorker;
worker->wi_dboid = InvalidOid;
worker->wi_tableoid = InvalidOid;
Alvaro Herrera
committed
worker->wi_proc = NULL;
worker->wi_launchtime = 0;
worker->wi_links.next = (SHM_QUEUE *) AutoVacuumShmem->av_freeWorkers;
AutoVacuumShmem->av_freeWorkers = worker;
AutoVacuumShmem->av_startingWorker = NULL;
elog(WARNING, "worker took too long to start; canceled");
}
}
else
can_launch = false;
LWLockRelease(AutovacuumLock); /* either shared or exclusive */
Alvaro Herrera
committed
/* if we can't do anything, just go back to sleep */
if (!can_launch)
continue;
Alvaro Herrera
committed
/* We're OK to start a new worker */
Alvaro Herrera
committed
elem = DLGetTail(DatabaseList);
if (elem != NULL)
{
Alvaro Herrera
committed
/*
* launch a worker if next_worker is right now or it is in the
* past
Alvaro Herrera
committed
*/
if (TimestampDifferenceExceeds(avdb->adl_next_worker,
current_time, 0))
launch_worker(current_time);
Alvaro Herrera
committed
}
else
{
/*
* Special case when the list is empty: start a worker right away.
* This covers the initial case, when no database is in pgstats
* (thus the list is empty). Note that the constraints in
* launcher_determine_sleep keep us from starting workers too
* quickly (at most once every autovacuum_naptime when the list is
* empty).
*/
launch_worker(current_time);
}
}
/* Normal exit from the autovac launcher is here */
ereport(LOG,
(errmsg("autovacuum launcher shutting down")));
AutoVacuumShmem->av_launcherpid = 0;
}
/*
* Determine the time to sleep, based on the database list.
*
* The "canlaunch" parameter indicates whether we can start a worker right now,
* for example due to the workers being all busy. If this is false, we will
* cause a long sleep, which will be interrupted when a worker exits.
*/
launcher_determine_sleep(bool canlaunch, bool recursing, struct timeval * nap)
{
/*
* We sleep until the next scheduled vacuum. We trust that when the
* database list was built, care was taken so that no entries have times
* in the past; if the first entry has too close a next_worker value, or a
* time in the past, we will sleep a small nominal time.
*/
if (!canlaunch)
{
nap->tv_sec = autovacuum_naptime;
nap->tv_usec = 0;
}
else if ((elem = DLGetTail(DatabaseList)) != NULL)
{
avl_dbase *avdb = DLE_VAL(elem);
TimestampTz current_time = GetCurrentTimestamp();
TimestampTz next_wakeup;
long secs;
int usecs;
next_wakeup = avdb->adl_next_worker;
TimestampDifference(current_time, next_wakeup, &secs, &usecs);
nap->tv_sec = secs;
nap->tv_usec = usecs;
}
else
{
/* list is empty, sleep for whole autovacuum_naptime seconds */
nap->tv_sec = autovacuum_naptime;
nap->tv_usec = 0;
}
/*
* If the result is exactly zero, it means a database had an entry with
* time in the past. Rebuild the list so that the databases are evenly
* distributed again, and recalculate the time to sleep. This can happen
* if there are more tables needing vacuum than workers, and they all take
* longer to vacuum than autovacuum_naptime.
*
* We only recurse once. rebuild_database_list should always return times
* in the future, but it seems best not to trust too much on that.
*/
if (nap->tv_sec == 0 && nap->tv_usec == 0 && !recursing)
{
rebuild_database_list(InvalidOid);
launcher_determine_sleep(canlaunch, true, nap);
return;
}
Alvaro Herrera
committed
/* The smallest time we'll allow the launcher to sleep. */
if (nap->tv_sec <= 0 && nap->tv_usec <= MIN_AUTOVAC_SLEEPTIME * 1000)
{
nap->tv_sec = 0;
Alvaro Herrera
committed
nap->tv_usec = MIN_AUTOVAC_SLEEPTIME * 1000;
}
/*
* If the sleep time is too large, clamp it to an arbitrary maximum (plus
* any fractional seconds, for simplicity). This avoids an essentially
* infinite sleep in strange cases like the system clock going backwards a
* few years.
*/
if (nap->tv_sec > MAX_AUTOVAC_SLEEPTIME)
nap->tv_sec = MAX_AUTOVAC_SLEEPTIME;
}
/*
* Build an updated DatabaseList. It must only contain databases that appear
* in pgstats, and must be sorted by next_worker from highest to lowest,
* distributed regularly across the next autovacuum_naptime interval.
*
* Receives the Oid of the database that made this list be generated (we call
* this the "new" database, because when the database was already present on
* the list, we expect that this function is not called at all). The
* preexisting list, if any, will be used to preserve the order of the
* databases in the autovacuum_naptime period. The new database is put at the
* end of the interval. The actual values are not saved, which should not be
* much of a problem.
*/
static void
rebuild_database_list(Oid newdb)
{
List *dblist;
ListCell *cell;
MemoryContext newcxt;
MemoryContext oldcxt;
MemoryContext tmpcxt;
HASHCTL hctl;
int score;
int nelems;
HTAB *dbhash;
/* use fresh stats */
Alvaro Herrera
committed
autovac_refresh_stats();
newcxt = AllocSetContextCreate(AutovacMemCxt,
"AV dblist",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
tmpcxt = AllocSetContextCreate(newcxt,
"tmp AV dblist",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldcxt = MemoryContextSwitchTo(tmpcxt);
/*
* Implementing this is not as simple as it sounds, because we need to put
* the new database at the end of the list; next the databases that were
* already on the list, and finally (at the tail of the list) all the
* other databases that are not on the existing list.
*
* To do this, we build an empty hash table of scored databases. We will
* start with the lowest score (zero) for the new database, then
* increasing scores for the databases in the existing list, in order, and
* lastly increasing scores for all databases gotten via
* get_database_list() that are not already on the hash.
*
* Then we will put all the hash elements into an array, sort the array by
* score, and finally put the array elements into the new doubly linked
* list.
*/
hctl.keysize = sizeof(Oid);
hctl.entrysize = sizeof(avl_dbase);
hctl.hash = oid_hash;
hctl.hcxt = tmpcxt;
dbhash = hash_create("db hash", 20, &hctl, /* magic number here FIXME */
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
/* start by inserting the new database */
score = 0;
if (OidIsValid(newdb))
{
PgStat_StatDBEntry *entry;
/* only consider this database if it has a pgstat entry */
entry = pgstat_fetch_stat_dbentry(newdb);
if (entry != NULL)
{
/* we assume it isn't found because the hash was just created */
db = hash_search(dbhash, &newdb, HASH_ENTER, NULL);
/* hash_search already filled in the key */
db->adl_score = score++;
/* next_worker is filled in later */
}
}
/* Now insert the databases from the existing list */
if (DatabaseList != NULL)
{
elem = DLGetHead(DatabaseList);
while (elem != NULL)
{
avl_dbase *avdb = DLE_VAL(elem);
avl_dbase *db;
bool found;
PgStat_StatDBEntry *entry;
elem = DLGetSucc(elem);
/*
* skip databases with no stat entries -- in particular, this gets
* rid of dropped databases
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
*/
entry = pgstat_fetch_stat_dbentry(avdb->adl_datid);
if (entry == NULL)
continue;
db = hash_search(dbhash, &(avdb->adl_datid), HASH_ENTER, &found);
if (!found)
{
/* hash_search already filled in the key */
db->adl_score = score++;
/* next_worker is filled in later */
}
}
}
/* finally, insert all qualifying databases not previously inserted */
dblist = get_database_list();
foreach(cell, dblist)
{
avw_dbase *avdb = lfirst(cell);
avl_dbase *db;
bool found;
PgStat_StatDBEntry *entry;
/* only consider databases with a pgstat entry */
entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
if (entry == NULL)
continue;
db = hash_search(dbhash, &(avdb->adw_datid), HASH_ENTER, &found);