From 97903c3d94b6d6ce089c90c34c1000653007b020 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 11 Jan 2007 23:06:03 +0000
Subject: [PATCH] Fix a performance problem in databases with large numbers of
 tables (or other types of pg_class entry): the function
 pgstat_vacuum_tabstat, invoked during VACUUM startup, had runtime
 proportional to the number of stats table entries times the number of
 pg_class rows; in other words O(N^2) if the stats collector's information is
 reasonably complete. Replace list searching with a hash table to bring it
 back to O(N) behavior.  Per report from kim at myemma.com.

Back-patch as far as 8.1; 8.0 and before use different coding here.
---
 src/backend/postmaster/pgstat.c | 87 ++++++++++++++++++++++-----------
 1 file changed, 59 insertions(+), 28 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 8cdc5f6aa91..97c11d0fb00 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -13,7 +13,7 @@
  *
  *	Copyright (c) 2001-2007, PostgreSQL Global Development Group
  *
- *	$PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.142 2007/01/05 22:19:36 momjian Exp $
+ *	$PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.143 2007/01/11 23:06:03 tgl Exp $
  * ----------
  */
 #include "postgres.h"
@@ -159,6 +159,7 @@ static void pgstat_write_statsfile(void);
 static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb);
 static void backend_read_statsfile(void);
 static void pgstat_read_current_status(void);
+static HTAB *pgstat_collect_oids(Oid catalogid);
 
 static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
 static void pgstat_send(void *msg, int len);
@@ -657,10 +658,7 @@ pgstat_report_tabstat(void)
 void
 pgstat_vacuum_tabstat(void)
 {
-	List	   *oidlist;
-	Relation	rel;
-	HeapScanDesc scan;
-	HeapTuple	tup;
+	HTAB	   *htab;
 	PgStat_MsgTabpurge msg;
 	HASH_SEQ_STATUS hstat;
 	PgStat_StatDBEntry *dbentry;
@@ -679,15 +677,7 @@ pgstat_vacuum_tabstat(void)
 	/*
 	 * Read pg_database and make a list of OIDs of all existing databases
 	 */
-	oidlist = NIL;
-	rel = heap_open(DatabaseRelationId, AccessShareLock);
-	scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
-	while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
-	{
-		oidlist = lappend_oid(oidlist, HeapTupleGetOid(tup));
-	}
-	heap_endscan(scan);
-	heap_close(rel, AccessShareLock);
+	htab = pgstat_collect_oids(DatabaseRelationId);
 
 	/*
 	 * Search the database hash table for dead databases and tell the
@@ -698,12 +688,14 @@ pgstat_vacuum_tabstat(void)
 	{
 		Oid			dbid = dbentry->databaseid;
 
-		if (!list_member_oid(oidlist, dbid))
+		CHECK_FOR_INTERRUPTS();
+
+		if (hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL)
 			pgstat_drop_database(dbid);
 	}
 
 	/* Clean up */
-	list_free(oidlist);
+	hash_destroy(htab);
 
 	/*
 	 * Lookup our own database entry; if not found, nothing more to do.
@@ -717,15 +709,7 @@ pgstat_vacuum_tabstat(void)
 	/*
 	 * Similarly to above, make a list of all known relations in this DB.
 	 */
-	oidlist = NIL;
-	rel = heap_open(RelationRelationId, AccessShareLock);
-	scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
-	while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
-	{
-		oidlist = lappend_oid(oidlist, HeapTupleGetOid(tup));
-	}
-	heap_endscan(scan);
-	heap_close(rel, AccessShareLock);
+	htab = pgstat_collect_oids(RelationRelationId);
 
 	/*
 	 * Initialize our messages table counter to zero
@@ -738,13 +722,17 @@ pgstat_vacuum_tabstat(void)
 	hash_seq_init(&hstat, dbentry->tables);
 	while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
 	{
-		if (list_member_oid(oidlist, tabentry->tableid))
+		Oid			tabid = tabentry->tableid;
+
+		CHECK_FOR_INTERRUPTS();
+
+		if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL)
 			continue;
 
 		/*
 		 * Not there, so add this table's Oid to the message
 		 */
-		msg.m_tableid[msg.m_nentries++] = tabentry->tableid;
+		msg.m_tableid[msg.m_nentries++] = tabid;
 
 		/*
 		 * If the message is full, send it out and reinitialize to empty
@@ -776,7 +764,50 @@ pgstat_vacuum_tabstat(void)
 	}
 
 	/* Clean up */
-	list_free(oidlist);
+	hash_destroy(htab);
+}
+
+
+/* ----------
+ * pgstat_collect_oids() -
+ *
+ *	Collect the OIDs of either all databases or all tables, according to
+ *	the parameter, into a temporary hash table.  Caller should hash_destroy
+ *	the result when done with it.
+ * ----------
+ */
+static HTAB *
+pgstat_collect_oids(Oid catalogid)
+{
+	HTAB	   *htab;
+	HASHCTL		hash_ctl;
+	Relation	rel;
+	HeapScanDesc scan;
+	HeapTuple	tup;
+
+	memset(&hash_ctl, 0, sizeof(hash_ctl));
+	hash_ctl.keysize = sizeof(Oid);
+	hash_ctl.entrysize = sizeof(Oid);
+	hash_ctl.hash = oid_hash;
+	htab = hash_create("Temporary table of OIDs",
+					   PGSTAT_TAB_HASH_SIZE,
+					   &hash_ctl,
+					   HASH_ELEM | HASH_FUNCTION);
+
+	rel = heap_open(catalogid, AccessShareLock);
+	scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
+	while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Oid		thisoid = HeapTupleGetOid(tup);
+
+		CHECK_FOR_INTERRUPTS();
+
+		(void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
+	}
+	heap_endscan(scan);
+	heap_close(rel, AccessShareLock);
+
+	return htab;
 }
 
 
-- 
GitLab