From 0914ae1c1481c2721d57d1c4c3da95a1e0582f7a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 6 Apr 2006 20:38:00 +0000
Subject: [PATCH] Remove the pgstats logic for delaying destruction of stats
 table entries. Per recent discussion, this seems to be making the stats less
 accurate rather than more so, particularly on Windows where PID values may be
 reused very quickly.  Patch by Peter Brant.

---
 src/backend/postmaster/autovacuum.c |  19 +--
 src/backend/postmaster/pgstat.c     | 185 ++++------------------------
 src/include/pgstat.h                |  27 +---
 3 files changed, 33 insertions(+), 198 deletions(-)

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index d98f47f9b73..e9e7ae3691f 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.15 2006/03/07 17:32:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.16 2006/04/06 20:38:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -384,19 +384,7 @@ AutoVacMain(int argc, char *argv[])
 			continue;
 
 		/*
-		 * Don't try to access a database that was dropped.  This could only
-		 * happen if we read the pg_database flat file right before it was
-		 * modified, after the database was dropped from the pg_database
-		 * table.  (This is of course a not-very-bulletproof test, but it's
-		 * cheap to make.  If we do mistakenly choose a recently dropped
-		 * database, InitPostgres will fail and we'll drop out until the next
-		 * autovac run.)
-		 */
-		if (tmp->entry->destroy != 0)
-			continue;
-
-		/*
-		 * Else remember the db with oldest autovac time.
+		 * Remember the db with oldest autovac time.
 		 */
 		if (db == NULL ||
 			tmp->entry->last_autovac_time < db->entry->last_autovac_time)
@@ -417,6 +405,9 @@ AutoVacMain(int argc, char *argv[])
 
 		/*
 		 * Connect to the selected database
+		 *
+		 * Note: if we have selected a just-deleted database (due to using
+		 * stale stats info), we'll fail and exit here.
 		 */
 		InitPostgres(db->name, NULL);
 		SetProcessingMode(NormalProcessing);
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index d8c0e5e20f5..cfd3997409d 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -13,7 +13,7 @@
  *
  *	Copyright (c) 2001-2006, PostgreSQL Global Development Group
  *
- *	$PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.121 2006/03/05 15:58:36 momjian Exp $
+ *	$PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.122 2006/04/06 20:38:00 tgl Exp $
  * ----------
  */
 #include "postgres.h"
@@ -69,12 +69,6 @@
 #define PGSTAT_STAT_INTERVAL	500		/* How often to write the status file;
 										 * in milliseconds. */
 
-#define PGSTAT_DESTROY_DELAY	10000	/* How long to keep destroyed objects
-										 * known, to give delayed UDP packets
-										 * time to arrive; in milliseconds. */
-
-#define PGSTAT_DESTROY_COUNT	(PGSTAT_DESTROY_DELAY / PGSTAT_STAT_INTERVAL)
-
 #define PGSTAT_RESTART_INTERVAL 60		/* How often to attempt to restart a
 										 * failed statistics collector; in
 										 * seconds. */
@@ -141,7 +135,6 @@ static int	pgStatXactRollback = 0;
 
 static TransactionId pgStatDBHashXact = InvalidTransactionId;
 static HTAB *pgStatDBHash = NULL;
-static HTAB *pgStatBeDead = NULL;
 static PgStat_StatBeEntry *pgStatBeTable = NULL;
 static int	pgStatNumBackends = 0;
 
@@ -1552,7 +1545,6 @@ PgstatCollectorMain(int argc, char *argv[])
 	int			readPipe;
 	int			len = 0;
 	struct itimerval timeout;
-	HASHCTL		hash_ctl;
 	bool		need_timer = false;
 
 	MyProcPid = getpid();		/* reset MyProcPid */
@@ -1614,16 +1606,6 @@ PgstatCollectorMain(int argc, char *argv[])
 	pgStatRunningInCollector = true;
 	pgstat_read_statsfile(&pgStatDBHash, InvalidOid, NULL, NULL);
 
-	/*
-	 * Create the dead backend hashtable
-	 */
-	memset(&hash_ctl, 0, sizeof(hash_ctl));
-	hash_ctl.keysize = sizeof(int);
-	hash_ctl.entrysize = sizeof(PgStat_StatBeDead);
-	hash_ctl.hash = tag_hash;
-	pgStatBeDead = hash_create("Dead Backends", PGSTAT_BE_HASH_SIZE,
-							   &hash_ctl, HASH_ELEM | HASH_FUNCTION);
-
 	/*
 	 * Create the known backends table
 	 */
@@ -2085,7 +2067,6 @@ static int
 pgstat_add_backend(PgStat_MsgHdr *msg)
 {
 	PgStat_StatBeEntry *beentry;
-	PgStat_StatBeDead *deadbe;
 
 	/*
 	 * Check that the backend ID is valid
@@ -2111,32 +2092,13 @@ pgstat_add_backend(PgStat_MsgHdr *msg)
 	if (beentry->procpid > 0 && beentry->procpid == msg->m_procpid)
 		return 0;
 
-	/*
-	 * Lookup if this backend is known to be dead. This can be caused due to
-	 * messages arriving in the wrong order - e.g. postmaster's BETERM message
-	 * might have arrived before we received all the backends stats messages,
-	 * or even a new backend with the same backendid was faster in sending his
-	 * BESTART.
-	 *
-	 * If the backend is known to be dead, we ignore this add.
-	 */
-	deadbe = (PgStat_StatBeDead *) hash_search(pgStatBeDead,
-											   (void *) &(msg->m_procpid),
-											   HASH_FIND, NULL);
-	if (deadbe)
-		return 1;
-
-	/*
-	 * Backend isn't known to be dead. If it's slot is currently used, we have
-	 * to kick out the old backend.
-	 */
-	if (beentry->procpid > 0)
-		pgstat_sub_backend(beentry->procpid);
-
 	/* Must be able to distinguish between empty and non-empty slots */
 	Assert(msg->m_procpid > 0);
 
-	/* Put this new backend into the slot */
+	/*
+	 * Put this new backend into the slot (possibly overwriting an old entry,
+	 * if we missed its BETERM or the BETERM hasn't arrived yet).
+	 */
 	beentry->procpid = msg->m_procpid;
 	beentry->start_timestamp = GetCurrentTimestamp();
 	beentry->activity_start_timestamp = 0;
@@ -2185,7 +2147,6 @@ pgstat_get_db_entry(Oid databaseid, bool create)
 		result->n_xact_rollback = 0;
 		result->n_blocks_fetched = 0;
 		result->n_blocks_hit = 0;
-		result->destroy = 0;
 		result->last_autovac_time = 0;
 
 		memset(&hash_ctl, 0, sizeof(hash_ctl));
@@ -2211,8 +2172,6 @@ static void
 pgstat_sub_backend(int procpid)
 {
 	int			i;
-	PgStat_StatBeDead *deadbe;
-	bool		found;
 
 	/*
 	 * Search in the known-backends table for the slot containing this PID.
@@ -2222,28 +2181,7 @@ pgstat_sub_backend(int procpid)
 		if (pgStatBeTable[i].procpid == procpid)
 		{
 			/*
-			 * That's him. Add an entry to the known to be dead backends. Due
-			 * to possible misorder in the arrival of UDP packets it's
-			 * possible that even if we know the backend is dead, there could
-			 * still be messages queued that arrive later. Those messages must
-			 * not cause our number of backends statistics to get screwed up,
-			 * so we remember for a couple of seconds that this PID is dead
-			 * and ignore them (only the counting of backends, not the table
-			 * access stats they sent).
-			 */
-			deadbe = (PgStat_StatBeDead *) hash_search(pgStatBeDead,
-													   (void *) &procpid,
-													   HASH_ENTER,
-													   &found);
-
-			if (!found)
-			{
-				deadbe->backendid = i + 1;
-				deadbe->destroy = PGSTAT_DESTROY_COUNT;
-			}
-
-			/*
-			 * Declare the backend slot empty.
+			 * That's him.  Mark the backend slot empty.
 			 */
 			pgStatBeTable[i].procpid = 0;
 			return;
@@ -2270,7 +2208,6 @@ pgstat_write_statsfile(void)
 	HASH_SEQ_STATUS tstat;
 	PgStat_StatDBEntry *dbentry;
 	PgStat_StatTabEntry *tabentry;
-	PgStat_StatBeDead *deadbe;
 	FILE	   *fpout;
 	int			i;
 	int32		format_id;
@@ -2300,31 +2237,6 @@ pgstat_write_statsfile(void)
 	hash_seq_init(&hstat, pgStatDBHash);
 	while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
 	{
-		/*
-		 * If this database is marked destroyed, count down and do so if it
-		 * reaches 0.
-		 */
-		if (dbentry->destroy > 0)
-		{
-			if (--(dbentry->destroy) == 0)
-			{
-				if (dbentry->tables != NULL)
-					hash_destroy(dbentry->tables);
-
-				if (hash_search(pgStatDBHash,
-								(void *) &(dbentry->databaseid),
-								HASH_REMOVE, NULL) == NULL)
-					ereport(ERROR,
-							(errmsg("database hash table corrupted "
-									"during cleanup --- abort")));
-			}
-
-			/*
-			 * Don't include statistics for it.
-			 */
-			continue;
-		}
-
 		/*
 		 * Write out the DB entry including the number of live backends.
 		 * We don't write the tables pointer since it's of no use to any
@@ -2339,30 +2251,6 @@ pgstat_write_statsfile(void)
 		hash_seq_init(&tstat, dbentry->tables);
 		while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
 		{
-			/*
-			 * If table entry marked for destruction, same as above for the
-			 * database entry.
-			 */
-			if (tabentry->destroy > 0)
-			{
-				if (--(tabentry->destroy) == 0)
-				{
-					if (hash_search(dbentry->tables,
-									(void *) &(tabentry->tableid),
-									HASH_REMOVE, NULL) == NULL)
-						ereport(ERROR,
-								(errmsg("tables hash table for "
-										"database %u corrupted during "
-										"cleanup --- abort",
-										dbentry->databaseid)));
-				}
-				continue;
-			}
-
-			/*
-			 * At least we think this is still a live table.  Emit its access
-			 * stats.
-			 */
 			fputc('T', fpout);
 			fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
 		}
@@ -2428,26 +2316,6 @@ pgstat_write_statsfile(void)
 						PGSTAT_STAT_TMPFILE, PGSTAT_STAT_FILENAME)));
 		unlink(PGSTAT_STAT_TMPFILE);
 	}
-
-	/*
-	 * Clear out the dead backends table
-	 */
-	hash_seq_init(&hstat, pgStatBeDead);
-	while ((deadbe = (PgStat_StatBeDead *) hash_seq_search(&hstat)) != NULL)
-	{
-		/*
-		 * Count down the destroy delay and remove entries where it reaches 0.
-		 */
-		if (--(deadbe->destroy) <= 0)
-		{
-			if (hash_search(pgStatBeDead,
-							(void *) &(deadbe->procpid),
-							HASH_REMOVE, NULL) == NULL)
-				ereport(ERROR,
-						(errmsg("dead-server-process hash table corrupted "
-								"during cleanup --- abort")));
-		}
-	}
 }
 
 /*
@@ -2595,7 +2463,6 @@ pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
 
 				memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
 				dbentry->tables = NULL;
-				dbentry->destroy = 0;
 				dbentry->n_backends = 0;
 
 				/*
@@ -3005,12 +2872,8 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
 	dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
 
 	/*
-	 * If the database is marked for destroy, this is a delayed UDP packet and
-	 * not worth being counted.
+	 * Update database-wide stats.
 	 */
-	if (dbentry->destroy > 0)
-		return;
-
 	dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
 	dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
 
@@ -3043,8 +2906,6 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
 
 			tabentry->blocks_fetched = tabmsg[i].t_blocks_fetched;
 			tabentry->blocks_hit = tabmsg[i].t_blocks_hit;
-
-			tabentry->destroy = 0;
 		}
 		else
 		{
@@ -3085,7 +2946,6 @@ static void
 pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
 {
 	PgStat_StatDBEntry *dbentry;
-	PgStat_StatTabEntry *tabentry;
 	int			i;
 
 	/*
@@ -3102,23 +2962,15 @@ pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
 	if (!dbentry || !dbentry->tables)
 		return;
 
-	/*
-	 * If the database is marked for destroy, this is a delayed UDP packet and
-	 * the tables will go away at DB destruction.
-	 */
-	if (dbentry->destroy > 0)
-		return;
-
 	/*
 	 * Process all table entries in the message.
 	 */
 	for (i = 0; i < msg->m_nentries; i++)
 	{
-		tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
-											   (void *) &(msg->m_tableid[i]),
-													   HASH_FIND, NULL);
-		if (tabentry)
-			tabentry->destroy = PGSTAT_DESTROY_COUNT;
+		/* Remove from hashtable if present; we don't care if it's not. */
+		(void) hash_search(dbentry->tables,
+						   (void *) &(msg->m_tableid[i]),
+						   HASH_REMOVE, NULL);
 	}
 }
 
@@ -3146,10 +2998,20 @@ pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
 	dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
 
 	/*
-	 * Mark the database for destruction.
+	 * If found, remove it.
 	 */
 	if (dbentry)
-		dbentry->destroy = PGSTAT_DESTROY_COUNT;
+	{
+		if (dbentry->tables != NULL)
+			hash_destroy(dbentry->tables);
+
+		if (hash_search(pgStatDBHash,
+						(void *) &(dbentry->databaseid),
+						HASH_REMOVE, NULL) == NULL)
+			ereport(ERROR,
+					(errmsg("database hash table corrupted "
+							"during cleanup --- abort")));
+	}
 }
 
 
@@ -3191,7 +3053,6 @@ pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
 	dbentry->n_xact_rollback = 0;
 	dbentry->n_blocks_fetched = 0;
 	dbentry->n_blocks_hit = 0;
-	dbentry->destroy = 0;
 
 	memset(&hash_ctl, 0, sizeof(hash_ctl));
 	hash_ctl.keysize = sizeof(Oid);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 2442559b21f..00631b3e12b 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -5,7 +5,7 @@
  *
  *	Copyright (c) 2001-2006, PostgreSQL Global Development Group
  *
- *	$PostgreSQL: pgsql/src/include/pgstat.h,v 1.42 2006/03/05 15:58:53 momjian Exp $
+ *	$PostgreSQL: pgsql/src/include/pgstat.h,v 1.43 2006/04/06 20:38:00 tgl Exp $
  * ----------
  */
 #ifndef PGSTAT_H
@@ -271,16 +271,18 @@ typedef union PgStat_Msg
  * ------------------------------------------------------------
  */
 
-#define PGSTAT_FILE_FORMAT_ID	0x01A5BC94
+#define PGSTAT_FILE_FORMAT_ID	0x01A5BC95
 
 /* ----------
  * PgStat_StatDBEntry			The collector's data per database
+ *
+ * Note: n_backends is not maintained within the collector.  It's computed
+ * when a backend reads the stats file for use.
  * ----------
  */
 typedef struct PgStat_StatDBEntry
 {
 	Oid			databaseid;
-	int			destroy;
 	int			n_backends;
 	PgStat_Counter n_xact_commit;
 	PgStat_Counter n_xact_rollback;
@@ -325,24 +327,6 @@ typedef struct PgStat_StatBeEntry
 } PgStat_StatBeEntry;
 
 
-/* ----------
- * PgStat_StatBeDead			Because UDP packets can arrive out of
- *								order, we need to keep some information
- *								about backends that are known to be
- *								dead for some seconds. This info is held
- *								in a hash table of these structs.
- *
- * (This struct is not used in the stats file.)
- * ----------
- */
-typedef struct PgStat_StatBeDead
-{
-	int			procpid;
-	int			backendid;
-	int			destroy;
-} PgStat_StatBeDead;
-
-
 /* ----------
  * PgStat_StatTabEntry			The collector's data per table (or index)
  * ----------
@@ -350,7 +334,6 @@ typedef struct PgStat_StatBeDead
 typedef struct PgStat_StatTabEntry
 {
 	Oid			tableid;
-	int			destroy;
 
 	PgStat_Counter numscans;
 
-- 
GitLab