From c973051ae69228129aeb8eb413d451ba4b326cad Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 4 Apr 2009 17:40:36 +0000
Subject: [PATCH] A session that does not have any live snapshots does not have
 to be waited for when we are waiting for old snapshots to go away during a
 concurrent index build.  In particular, this rule lets us avoid waiting for
 idle-in-transaction sessions.

This logic could be improved further if we had some way to wake up when
the session we are currently waiting for goes idle-in-transaction.  However
that would be a significantly more complex/invasive patch, so it'll have to
wait for some other day.

Simon Riggs, with some improvements by Tom.
---
 src/backend/commands/indexcmds.c    | 67 ++++++++++++++++++++++++-----
 src/backend/storage/ipc/procarray.c | 53 +++++++++++++++--------
 src/include/storage/lock.h          |  8 +++-
 src/include/storage/procarray.h     |  5 ++-
 4 files changed, 101 insertions(+), 32 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index ab075090819..f2ff5b6da21 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.183 2009/03/31 22:12:47 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.184 2009/04/04 17:40:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -130,12 +130,14 @@ DefineIndex(RangeVar *heapRelation,
 	int			numberOfAttributes;
 	VirtualTransactionId *old_lockholders;
 	VirtualTransactionId *old_snapshots;
+	int			n_old_snapshots;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	Snapshot	snapshot;
 	Relation	pg_index;
 	HeapTuple	indexTuple;
 	Form_pg_index indexForm;
+	int			i;
 
 	/*
 	 * count attributes in index
@@ -611,7 +613,7 @@ DefineIndex(RangeVar *heapRelation,
 	 * snapshot treats as committed.  If such a recently-committed transaction
 	 * deleted tuples in the table, we will not include them in the index; yet
 	 * those transactions which see the deleting one as still-in-progress will
-	 * expect them to be there once we mark the index as valid.
+	 * expect such tuples to be there once we mark the index as valid.
 	 *
 	 * We solve this by waiting for all endangered transactions to exit before
 	 * we mark the index as valid.
@@ -634,10 +636,13 @@ DefineIndex(RangeVar *heapRelation,
 	 * transactions that might have older snapshots.  Obtain a list of VXIDs
 	 * of such transactions, and wait for them individually.
 	 *
-	 * We can exclude any running transactions that have xmin >= the xmax of
-	 * our reference snapshot, since they are clearly not interested in any
-	 * missing older tuples.  Transactions in other DBs aren't a problem
-	 * either, since they'll never even be able to see this index.
+	 * We can exclude any running transactions that have xmin > the xmin of
+	 * our reference snapshot; their oldest snapshot must be newer than ours.
+	 * We can also exclude any transactions that have xmin = zero, since they
+	 * evidently have no live snapshot at all (and any one they might be
+	 * in process of taking is certainly newer than ours).  Transactions in
+	 * other DBs can be ignored too, since they'll never even be able to see
+	 * this index.
 	 *
 	 * We can also exclude autovacuum processes and processes running manual
 	 * lazy VACUUMs, because they won't be fazed by missing index entries
@@ -647,14 +652,54 @@ DefineIndex(RangeVar *heapRelation,
 	 *
 	 * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
 	 * check for that.
+	 *
+	 * If a process goes idle-in-transaction with xmin zero, we do not need
+	 * to wait for it anymore, per the above argument.  We do not have the
+	 * infrastructure right now to stop waiting if that happens, but we can
+	 * at least avoid the folly of waiting when it is idle at the time we
+	 * would begin to wait.  We do this by repeatedly rechecking the output of
+	 * GetCurrentVirtualXIDs.  If, during any iteration, a particular vxid
+	 * doesn't show up in the output, we know we can forget about it.
 	 */
-	old_snapshots = GetCurrentVirtualXIDs(snapshot->xmax, false,
-										  PROC_IS_AUTOVACUUM | PROC_IN_VACUUM);
+	old_snapshots = GetCurrentVirtualXIDs(snapshot->xmin, true, false,
+										  PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
+										  &n_old_snapshots);
 
-	while (VirtualTransactionIdIsValid(*old_snapshots))
+	for (i = 0; i < n_old_snapshots; i++)
 	{
-		VirtualXactLockTableWait(*old_snapshots);
-		old_snapshots++;
+		if (!VirtualTransactionIdIsValid(old_snapshots[i]))
+			continue;			/* found uninteresting in previous cycle */
+
+		if (i > 0)
+		{
+			/* see if anything's changed ... */
+			VirtualTransactionId *newer_snapshots;
+			int			n_newer_snapshots;
+			int			j;
+			int			k;
+
+			newer_snapshots = GetCurrentVirtualXIDs(snapshot->xmin,
+													true, false,
+										  PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
+													&n_newer_snapshots);
+			for (j = i; j < n_old_snapshots; j++)
+			{
+				if (!VirtualTransactionIdIsValid(old_snapshots[j]))
+					continue;		/* found uninteresting in previous cycle */
+				for (k = 0; k < n_newer_snapshots; k++)
+				{
+					if (VirtualTransactionIdEquals(old_snapshots[j],
+												   newer_snapshots[k]))
+						break;
+				}
+				if (k >= n_newer_snapshots)		/* not there anymore */
+					SetInvalidVirtualTransactionId(old_snapshots[j]);
+			}
+			pfree(newer_snapshots);
+		}
+
+		if (VirtualTransactionIdIsValid(old_snapshots[i]))
+			VirtualXactLockTableWait(old_snapshots[i]);
 	}
 
 	/*
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 8acd2014a0d..30f66c089c6 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -23,7 +23,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.48 2009/03/31 05:18:33 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.49 2009/04/04 17:40:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1022,25 +1022,42 @@ IsBackendPid(int pid)
 /*
  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
  *
- * The array is palloc'd and is terminated with an invalid VXID.
+ * The array is palloc'd. The number of valid entries is returned into *nvxids.
  *
- * If limitXmin is not InvalidTransactionId, we skip any backends
- * with xmin >= limitXmin.	If allDbs is false, we skip backends attached
- * to other databases.  If excludeVacuum isn't zero, we skip processes for
- * which (excludeVacuum & vacuumFlags) is not zero.  Also, our own process
- * is always skipped.
+ * The arguments allow filtering the set of VXIDs returned.  Our own process
+ * is always skipped.  In addition:
+ *	If limitXmin is not InvalidTransactionId, skip processes with
+ *		xmin > limitXmin.
+ *	If excludeXmin0 is true, skip processes with xmin = 0.
+ *	If allDbs is false, skip processes attached to other databases.
+ *	If excludeVacuum isn't zero, skip processes for which
+ *		(vacuumFlags & excludeVacuum) is not zero.
+ *
+ * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
+ * allow skipping backends whose oldest live snapshot is no older than
+ * some snapshot we have.  Since we examine the procarray with only shared
+ * lock, there are race conditions: a backend could set its xmin just after
+ * we look.  Indeed, on multiprocessors with weak memory ordering, the
+ * other backend could have set its xmin *before* we look.  We know however
+ * that such a backend must have held shared ProcArrayLock overlapping our
+ * own hold of ProcArrayLock, else we would see its xmin update.  Therefore,
+ * any snapshot the other backend is taking concurrently with our scan cannot
+ * consider any transactions as still running that we think are committed
+ * (since backends must hold ProcArrayLock exclusive to commit).
  */
 VirtualTransactionId *
-GetCurrentVirtualXIDs(TransactionId limitXmin, bool allDbs, int excludeVacuum)
+GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
+					  bool allDbs, int excludeVacuum,
+					  int *nvxids)
 {
 	VirtualTransactionId *vxids;
 	ProcArrayStruct *arrayP = procArray;
 	int			count = 0;
 	int			index;
 
-	/* allocate result space with room for a terminator */
+	/* allocate what's certainly enough result space */
 	vxids = (VirtualTransactionId *)
-		palloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
+		palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
 
 	LWLockAcquire(ProcArrayLock, LW_SHARED);
 
@@ -1056,15 +1073,18 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool allDbs, int excludeVacuum)
 
 		if (allDbs || proc->databaseId == MyDatabaseId)
 		{
-			/* Fetch xmin just once - might change on us? */
+			/* Fetch xmin just once - might change on us */
 			TransactionId pxmin = proc->xmin;
 
+			if (excludeXmin0 && !TransactionIdIsValid(pxmin))
+				continue;
+
 			/*
-			 * Note that InvalidTransactionId precedes all other XIDs, so a
-			 * proc that hasn't set xmin yet will always be included.
+			 * InvalidTransactionId precedes all other XIDs, so a proc that
+			 * hasn't set xmin yet will not be rejected by this test.
 			 */
 			if (!TransactionIdIsValid(limitXmin) ||
-				TransactionIdPrecedes(pxmin, limitXmin))
+				TransactionIdPrecedesOrEquals(pxmin, limitXmin))
 			{
 				VirtualTransactionId vxid;
 
@@ -1077,10 +1097,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool allDbs, int excludeVacuum)
 
 	LWLockRelease(ProcArrayLock);
 
-	/* add the terminator */
-	vxids[count].backendId = InvalidBackendId;
-	vxids[count].localTransactionId = InvalidLocalTransactionId;
-
+	*nvxids = count;
 	return vxids;
 }
 
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index 7ec3a24cb6e..e2b27ccb98b 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.115 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.116 2009/04/04 17:40:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,6 +67,12 @@ typedef struct
 #define VirtualTransactionIdIsValid(vxid) \
 	(((vxid).backendId != InvalidBackendId) && \
 	 LocalTransactionIdIsValid((vxid).localTransactionId))
+#define VirtualTransactionIdEquals(vxid1, vxid2) \
+	((vxid1).backendId == (vxid2).backendId && \
+	 (vxid1).localTransactionId == (vxid2).localTransactionId)
+#define SetInvalidVirtualTransactionId(vxid) \
+	((vxid).backendId = InvalidBackendId, \
+	 (vxid).localTransactionId = InvalidLocalTransactionId)
 #define GET_VXID_FROM_PGPROC(vxid, proc) \
 	((vxid).backendId = (proc).backendId, \
 	 (vxid).localTransactionId = (proc).lxid)
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index 8e4ad63586b..bd49e096522 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.24 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.25 2009/04/04 17:40:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -40,7 +40,8 @@ extern int	BackendXidGetPid(TransactionId xid);
 extern bool IsBackendPid(int pid);
 
 extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
-					  bool allDbs, int excludeVacuum);
+					  bool excludeXmin0, bool allDbs, int excludeVacuum,
+					  int *nvxids);
 extern int	CountActiveBackends(void);
 extern int	CountDBBackends(Oid databaseid);
 extern int	CountUserBackends(Oid roleid);
-- 
GitLab