Skip to content
Snippets Groups Projects
autovacuum.c 20.4 KiB
Newer Older
/*-------------------------------------------------------------------------
 *
 * autovacuum.c
 *
 * PostgreSQL Integrated Autovacuum Daemon
 *
 *
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.2 2005/07/29 19:30:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <signal.h>
#include <time.h>
#include <sys/types.h>
#include <unistd.h>

#include "access/genam.h"
#include "access/heapam.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_autovacuum.h"
#include "commands/vacuum.h"
#include "libpq/hba.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "tcop/tcopprot.h"
#include "utils/flatfiles.h"
#include "utils/fmgroids.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relcache.h"


/*
 * GUC parameters
 */
bool		autovacuum_start_daemon = false;
int			autovacuum_naptime;
int			autovacuum_vac_thresh;
double		autovacuum_vac_scale;
int			autovacuum_anl_thresh;
double		autovacuum_anl_scale;

/* Flag to tell if we are in the autovacuum daemon process */
static bool am_autovacuum = false;

/* Last time autovac daemon started/stopped (only valid in postmaster) */
static time_t last_autovac_start_time = 0;
static time_t last_autovac_stop_time = 0;

/* struct to keep list of candidate databases for vacuum */
typedef struct autovac_dbase
{
	Oid				oid;
	char		   *name;
} autovac_dbase;


#ifdef EXEC_BACKEND
static pid_t autovac_forkexec(void);
#endif
NON_EXEC_STATIC void AutoVacMain(int argc, char *argv[]);
static void do_autovacuum(bool whole_db, PgStat_StatDBEntry *dbentry);
static List *autovac_get_database_list(void);
static void test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
			 Form_pg_class classForm, Form_pg_autovacuum avForm,
			 List **vacuum_tables, List **analyze_tables);
static void autovacuum_do_vac_analyze(List *relids, bool dovacuum);


/*
 * Main entry point for autovacuum controller process.
 *
 * This code is heavily based on pgarch.c, q.v.
 */
int
autovac_start(void)
{
	time_t		curtime;
	pid_t		AutoVacPID;

	/* Do nothing if no autovacuum process needed */
	if (!AutoVacuumingActive())
		return 0;

	/*
	 * Do nothing if too soon since last autovacuum exit.  This limits
	 * how often the daemon runs.  Since the time per iteration can be
	 * quite variable, it seems more useful to measure/control the time
	 * since last subprocess exit than since last subprocess launch.
	 *
	 * However, we *also* check the time since last subprocess launch;
	 * this prevents thrashing under fork-failure conditions.
	 *
	 * Note that since we will be re-called from the postmaster main loop,
	 * we will get another chance later if we do nothing now.
	 *
	 * XXX todo: implement sleep scale factor that existed in contrib code.
	 */
	curtime = time(NULL);
	if ((unsigned int) (curtime - last_autovac_stop_time) <
		(unsigned int) autovacuum_naptime)
		return 0;

	if ((unsigned int) (curtime - last_autovac_start_time) <
		(unsigned int) autovacuum_naptime)
		return 0;

	last_autovac_start_time = curtime;

#ifdef EXEC_BACKEND
	switch((AutoVacPID = autovac_forkexec()))
#else
	switch((AutoVacPID = fork_process()))
#endif
	{
		case -1:
			ereport(LOG,
				    (errmsg("could not fork autovacuum process: %m")));
			return 0;

#ifndef EXEC_BACKEND
		case 0:
			/* in postmaster child ... */
			/* Close the postmaster's sockets */
			ClosePostmasterPorts(false);

			AutoVacMain(0, NULL);
			break;
#endif
		default:
			return (int) AutoVacPID;
	}

	/* shouldn't get here */
	return 0;
}

/*
 * autovac_stopped --- called by postmaster when subprocess exit is detected
 */
void
autovac_stopped(void)
{
	last_autovac_stop_time = time(NULL);
}

#ifdef EXEC_BACKEND
/*
 * autovac_forkexec()
 *
 * Format up the arglist for the autovacuum process, then fork and exec.
 */
static pid_t
autovac_forkexec(void)
{
	char	   *av[10];
	int			ac = 0;

	av[ac++] = "postgres";
	av[ac++] = "-forkautovac";
	av[ac++] = NULL;		/* filled in by postmaster_forkexec */
	av[ac] = NULL;

	Assert(ac < lengthof(av));

	return postmaster_forkexec(ac, av);
}
#endif	/* EXEC_BACKEND */

/*
 * AutoVacMain
 */
NON_EXEC_STATIC void
AutoVacMain(int argc, char *argv[])
{
	ListCell	   *cell;
	List		   *dblist;
	sigjmp_buf		local_sigjmp_buf;

	/* we are a postmaster subprocess now */
	IsUnderPostmaster = true;
	am_autovacuum = true;

	/* reset MyProcPid */
	MyProcPid = getpid();

	/* Lose the postmaster's on-exit routines */
	on_exit_reset();

	/*
	 * Set up signal handlers.  We operate on databases much like a
	 * regular backend, so we use the same signal handling.  See
	 * equivalent code in tcop/postgres.c.
	 *
	 * Currently, we don't pay attention to postgresql.conf changes
	 * that happen during a single daemon iteration, so we can ignore
	 * SIGHUP.
	 */
	pqsignal(SIGHUP, SIG_IGN);
	/*
	 * Presently, SIGINT will lead to autovacuum shutdown, because that's
	 * how we handle ereport(ERROR).  It could be improved however.
	 */
	pqsignal(SIGINT, StatementCancelHandler);
	pqsignal(SIGTERM, die);
	pqsignal(SIGQUIT, quickdie);
	pqsignal(SIGALRM, handle_sig_alarm);

	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, CatchupInterruptHandler);
	/* We don't listen for async notifies */
	pqsignal(SIGUSR2, SIG_IGN);
	pqsignal(SIGCHLD, SIG_DFL);

	/* Identify myself via ps */
	init_ps_display("autovacuum process", "", "");
	set_ps_display("");

	/* Early initialization */
	BaseInit();

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * We can now go away.  Note that because we'll call InitProcess,
		 * a callback will be registered to do ProcKill, which will clean
		 * up necessary state.
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

	/* Get a list of databases */
	dblist = autovac_get_database_list();

	/*
	 * Get the next Xid that was current as of the last checkpoint.
	 * We need it to determine whether databases are about to need
	 * database-wide vacuums.
	 */
	nextXid = GetRecentNextXid();

	/*
	 * Choose a database to connect to.  We pick the database that was least
	 * recently auto-vacuumed, or one that needs database-wide vacuum (to
	 * prevent Xid wraparound-related data loss).
	 *
	 * Note that a database with no stats entry is not considered, except
	 * for Xid wraparound purposes.  The theory is that if no one has ever
	 * connected to it since the stats were last initialized, it doesn't
	 * need vacuuming.
	 *
	 * XXX This could be improved if we had more info about whether it needs
	 * vacuuming before connecting to it.  Perhaps look through the pgstats
	 * data for the database's tables?  One idea is to keep track of the
	 * number of new and dead tuples per database in pgstats.  However it
	 * isn't clear how to construct a metric that measures that and not
	 * cause starvation for less busy databases.
		autovac_dbase  *tmp = lfirst(cell);
		bool			this_whole_db;

		/*
		 * We look for the database that most urgently needs a database-wide
		 * vacuum.  We decide that a database-wide vacuum is needed 100000
		 * transactions sooner than vacuum.c's vac_truncate_clog() would
		 * decide to start giving warnings.  If any such db is found, we
		 * ignore all other dbs.
		 */
		tmp->age = (int32) (nextXid - tmp->frozenxid);
		this_whole_db = (tmp->age > (int32) ((MaxTransactionId >> 3) * 3 - 100000));
		if (whole_db || this_whole_db)
		{
			if (!this_whole_db)
				continue;
			if (db == NULL || tmp->age > db->age)
			{
				db = tmp;
				whole_db = true;
			}
			continue;
		}
		/*
		 * Otherwise, skip a database with no pgstat entry; it means it hasn't
		 * seen any activity.
		 */
		tmp->entry = pgstat_fetch_stat_dbentry(tmp->oid);
		if (!tmp->entry)
			continue;

		/*
		 * Don't try to access a database that was dropped.  This could only
		 * happen if we read the pg_database flat file right before it was
		 * modified, after the database was dropped from the pg_database
		 * table.  (This is of course a not-very-bulletproof test, but it's
		 * cheap to make.  If we do mistakenly choose a recently dropped
		 * database, InitPostgres will fail and we'll drop out until the
		 * next autovac run.)
		 */
		if (tmp->entry->destroy != 0)
			continue;

		/*
		 * Else remember the db with oldest autovac time.
		 */
		if (db == NULL ||
			tmp->entry->last_autovac_time < db->entry->last_autovac_time)
			db = tmp;
	}

	if (db)
	{
		/*
		 * Connect to the selected database
		 */
		InitPostgres(db->name, NULL);
		SetProcessingMode(NormalProcessing);
		pgstat_report_autovac();
		set_ps_display(db->name);
		ereport(LOG,
				(errmsg("autovacuum: processing database \"%s\"", db->name)));
		/*
		 * And do an appropriate amount of work on it
		 */
		do_autovacuum(whole_db, db->entry);
	}

	/* One iteration done, go away */
	proc_exit(0);
}

/*
 * autovac_get_database_list
 *
 * 		Return a list of all databases.  Note we cannot use pg_database,
 *		because we aren't connected yet; we use the flat database file.
 */
static List *
autovac_get_database_list(void)
{
	char   *filename;
	List   *dblist = NIL;
	char	thisname[NAMEDATALEN];
	FILE   *db_file;
	Oid		db_id;
	Oid		db_tablespace;

	filename = database_getflatfilename();
	db_file = AllocateFile(filename, "r");
	if (db_file == NULL)
		ereport(FATAL,
				(errcode_for_file_access(),
				 errmsg("could not open file \"%s\": %m", filename)));

	while (read_pg_database_line(db_file, thisname, &db_id,
								 &db_tablespace, &db_frozenxid))
	{
		autovac_dbase	*db;

		db = (autovac_dbase *) palloc(sizeof(autovac_dbase));

		db->oid = db_id;
		db->name = pstrdup(thisname);
		db->frozenxid = db_frozenxid;
		/* these get set later: */

		dblist = lappend(dblist, db);
	}

	FreeFile(db_file);
	pfree(filename);

	return dblist;
}

/*
 * Process a database.
 *
 * If whole_db is true, the database is processed as a whole, and the
 * dbentry parameter is ignored.  If it's false, dbentry must be a valid
 * pointer to the database entry in the stats databases' hash table, and
 * it will be used to determine whether vacuum or analyze is needed on a
 * per-table basis.
 *
 * Note that test_rel_for_autovac generates two separate lists, one for
 * vacuum and other for analyze.  This is to facilitate processing all
 * analyzes first, and then all vacuums.
 *
 * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
 * order not to ignore shutdown commands for too long.
 */
static void
do_autovacuum(bool whole_db, PgStat_StatDBEntry *dbentry)
{
	Relation		classRel,
					avRel;
	HeapTuple		tuple;
	HeapScanDesc	relScan;
	List		   *vacuum_tables = NIL,
				   *analyze_tables = NIL;
	MemoryContext	AutovacMemCxt;

	Assert(whole_db || PointerIsValid(dbentry));

	/* Memory context where cross-transaction state is stored */
	AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
										  "Autovacuum context",
										  ALLOCSET_DEFAULT_MINSIZE,
										  ALLOCSET_DEFAULT_INITSIZE,
										  ALLOCSET_DEFAULT_MAXSIZE);

	/* Start a transaction so our commands have one to play into. */
	StartTransactionCommand();

	/*
	 * StartTransactionCommand and CommitTransactionCommand will
	 * automatically switch to other contexts.  We need this one
	 * to keep the list of relations to vacuum/analyze across
	 * transactions.
	 */
	MemoryContextSwitchTo(AutovacMemCxt);

	if (whole_db)
	{
		elog(DEBUG2, "autovacuum: VACUUM ANALYZE whole database");
		autovacuum_do_vac_analyze(NIL, true);
	}
	else
	{
		/* the hash entry where pgstat stores shared relations */
		PgStat_StatDBEntry *shared = pgstat_fetch_stat_dbentry(InvalidOid);
		classRel = heap_open(RelationRelationId, AccessShareLock);
		avRel = heap_open(AutovacuumRelationId, AccessShareLock);
		relScan = heap_beginscan(classRel, SnapshotNow, 0, NULL);
		/* Scan pg_class looking for tables to vacuum */
		while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
		{
			Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
			Form_pg_autovacuum avForm = NULL;
			PgStat_StatTabEntry *tabentry;
			SysScanDesc	avScan;
			HeapTuple	avTup;
			ScanKeyData	entry[1];
			Oid			relid;
			/* Skip non-table entries. */
			/* XXX possibly allow RELKIND_TOASTVALUE entries here too? */
			if (classForm->relkind != RELKIND_RELATION)
				continue;
			/*
			 * Skip temp tables (i.e. those in temp namespaces).  We cannot
			 * safely process other backends' temp tables.
			 */
			if (isTempNamespace(classForm->relnamespace))
				continue;
			relid = HeapTupleGetOid(tuple);
			/* See if we have a pg_autovacuum entry for this relation. */
			ScanKeyInit(&entry[0],
						Anum_pg_autovacuum_vacrelid,
						BTEqualStrategyNumber, F_OIDEQ,
						ObjectIdGetDatum(relid));
			avScan = systable_beginscan(avRel, AutovacuumRelidIndexId, true,
										SnapshotNow, 1, entry);
			avTup = systable_getnext(avScan);
			if (HeapTupleIsValid(avTup))
				avForm = (Form_pg_autovacuum) GETSTRUCT(avTup);
			if (classForm->relisshared && PointerIsValid(shared))
				tabentry = hash_search(shared->tables, &relid,
									   HASH_FIND, NULL);
			else
				tabentry = hash_search(dbentry->tables, &relid,
									   HASH_FIND, NULL);
			test_rel_for_autovac(relid, tabentry, classForm, avForm,
								 &vacuum_tables, &analyze_tables);
		heap_endscan(relScan);
		heap_close(avRel, AccessShareLock);
		heap_close(classRel, AccessShareLock);

		CHECK_FOR_INTERRUPTS();

		/*
		 * Perform operations on collected tables.
		 */
		if (analyze_tables)
			autovacuum_do_vac_analyze(analyze_tables, false);
		/* get back to proper context */
		MemoryContextSwitchTo(AutovacMemCxt);
		if (vacuum_tables)
			autovacuum_do_vac_analyze(vacuum_tables, true);
	}

	/* Finally close out the last transaction. */
	CommitTransactionCommand();
}

/*
 * test_rel_for_autovac
 *
 * Check whether a table needs to be vacuumed or analyzed.  Add it to the
 * respective list if so.
 *
 * A table needs to be vacuumed if the number of dead tuples exceeds a
 * threshold.  This threshold is calculated as
 *
 * threshold = vac_base_thresh + vac_scale_factor * reltuples
 *
 * For analyze, the analysis done is that the number of tuples inserted,
 * deleted and updated since the last analyze exceeds a threshold calculated
 * in the same fashion as above.  Note that the collector actually stores
 * the number of tuples (both live and dead) that there were as of the last
 * analyze.  This is asymmetric to the VACUUM case.
 *
 * A table whose pg_autovacuum.enabled value is false, is automatically
 * skipped.  Thus autovacuum can be disabled for specific tables.  Also,
 * when the stats collector does not have data about a table, it will be
 * skipped.
 *
 * A table whose vac_base_thresh value is <0 takes the base value from the
 * autovacuum_vacuum_threshold GUC variable.  Similarly, a vac_scale_factor
 * value <0 is substituted with the value of
 * autovacuum_vacuum_scale_factor GUC variable.  Ditto for analyze.
 */
static void
test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
					 Form_pg_class classForm,
					 Form_pg_autovacuum avForm,
					 List **vacuum_tables, List **analyze_tables)
{
	Relation		rel;
	float4			reltuples;	/* pg_class.reltuples */
	/* constants from pg_autovacuum or GUC variables */
	int				vac_base_thresh,
					anl_base_thresh;
	float4			vac_scale_factor,
					anl_scale_factor;
	/* thresholds calculated from above constants */
	float4			vacthresh,
					anlthresh;
	/* number of vacuum (resp. analyze) tuples at this time */
	float4			vactuples,
					anltuples;

	/* User disabled it in pg_autovacuum? */
	if (avForm && !avForm->enabled)
		return;

	/*
	 * Skip a table not found in stat hash.  If it's not acted upon,
	 * there's no need to vacuum it.  (Note that database-level check
	 * will take care of Xid wraparound.)
	 */
	if (!PointerIsValid(tabentry))
	rel = RelationIdGetRelation(relid);
	/* The table was recently dropped? */
	if (!PointerIsValid(rel))
		return;

	reltuples = rel->rd_rel->reltuples;
	vactuples = tabentry->n_dead_tuples;
	anltuples = tabentry->n_live_tuples + tabentry->n_dead_tuples -
		tabentry->last_anl_tuples;

	/*
	 * If there is a tuple in pg_autovacuum, use it; else, use the GUC
	 * defaults.  Note that the fields may contain "-1" (or indeed any
	 * negative value), which means use the GUC defaults for each setting.
	 */
	if (avForm != NULL)
	{
		vac_scale_factor = (avForm->vac_scale_factor < 0) ?
			autovacuum_vac_scale : avForm->vac_scale_factor;
		vac_base_thresh = (avForm->vac_base_thresh < 0) ?
			autovacuum_vac_thresh : avForm->vac_base_thresh;

		anl_scale_factor = (avForm->anl_scale_factor < 0) ?
			autovacuum_anl_scale : avForm->anl_scale_factor;
		anl_base_thresh = (avForm->anl_base_thresh < 0) ?
			autovacuum_anl_thresh : avForm->anl_base_thresh;
	}
	else
	{
		vac_scale_factor = autovacuum_vac_scale;
		vac_base_thresh = autovacuum_vac_thresh;

		anl_scale_factor = autovacuum_anl_scale;
		anl_base_thresh = autovacuum_anl_thresh;
	}

	vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
	anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;

	/*
	 * Note that we don't need to take special consideration for stat
	 * reset, because if that happens, the last vacuum and analyze counts
	 * will be reset too.
	 */

	elog(DEBUG2, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
		 RelationGetRelationName(rel),
		 vactuples, vacthresh, anltuples, anlthresh);

	/* Determine if this table needs vacuum or analyze. */
	if (vactuples > vacthresh)
	{
		elog(DEBUG2, "will VACUUM ANALYZE %s",
			 RelationGetRelationName(rel));
		*vacuum_tables = lappend_oid(*vacuum_tables, relid);
	}
	else if (anltuples > anlthresh)
	{
		/* ANALYZE refuses to work with pg_statistics */
		if (relid != StatisticRelationId)
		{
			elog(DEBUG2, "will ANALYZE %s",
					RelationGetRelationName(rel));
			*analyze_tables = lappend_oid(*analyze_tables, relid);
		}
	}

	RelationClose(rel);
}

/*
 * autovacuum_do_vac_analyze
 * 		Vacuum or analyze a list of tables; or all tables if relids = NIL
 *
 * We must be in AutovacMemCxt when this routine is called.
 */
static void
autovacuum_do_vac_analyze(List *relids, bool dovacuum)
{
	VacuumStmt		*vacstmt = makeNode(VacuumStmt);

	/*
	 * Point QueryContext to the autovac memory context to fake out the
	 * PreventTransactionChain check inside vacuum().  Note that this
	 * is also why we palloc vacstmt instead of just using a local variable.
	 */
	QueryContext = CurrentMemoryContext;

	/* Set up command parameters */
	vacstmt->vacuum = dovacuum;
	vacstmt->full = false;
	vacstmt->analyze = true;
	vacstmt->freeze = false;
	vacstmt->verbose = false;
	vacstmt->relation = NULL;	/* all tables, or not used if relids != NIL */
	vacstmt->va_cols = NIL;

	vacuum(vacstmt, relids);
}

/*
 * AutoVacuumingActive
 * 		Check GUC vars and report whether the autovacuum process should be
 * 		running.
 */
bool
AutoVacuumingActive(void)
{
	if (!autovacuum_start_daemon || !pgstat_collect_startcollector ||
		!pgstat_collect_tuplelevel)
		return false;
	return true;
}

/*
 * autovac_init
 * 		This is called at postmaster initialization.
 *
 * Annoy the user if he got it wrong.
 */
void
autovac_init(void)
{
	if (!autovacuum_start_daemon)
		return;

	if (!pgstat_collect_startcollector || !pgstat_collect_tuplelevel)
	{
		ereport(WARNING,
				(errmsg("autovacuum not started because of misconfiguration"),
				 errhint("Enable options \"stats_start_collector\" and \"stats_row_level\".")));
		/*
		 * Set the GUC var so we don't fork autovacuum uselessly, and also to
		 * help debugging.
		 */
		autovacuum_start_daemon = false;
	}
}

/*
 * IsAutoVacuumProcess
 * 		Return whether this process is an autovacuum process.
 */
bool
IsAutoVacuumProcess(void)
{
	return am_autovacuum;
}