From b95a720a487b5027af1b9e4a12542800598ff5de Mon Sep 17 00:00:00 2001
From: Simon Riggs <simon@2ndQuadrant.com>
Date: Sat, 13 Feb 2010 01:32:20 +0000
Subject: [PATCH] Re-enable max_standby_delay = -1 using deadlock detection on
 startup process. If startup waits on a buffer pin we send a request to all
 backends to cancel themselves if they are holding the buffer pin required and
 they are also waiting on a lock. If not, startup waits until
 max_standby_delay before cancelling any backend waiting for the requested
 buffer pin.

---
 src/backend/storage/ipc/procsignal.c |  5 ++-
 src/backend/storage/ipc/standby.c    | 62 +++++++++++++++++++++-------
 src/backend/storage/lmgr/proc.c      | 14 ++++++-
 src/backend/tcop/postgres.c          | 16 ++++++-
 src/backend/utils/misc/guc.c         |  4 +-
 src/include/storage/proc.h           |  3 +-
 src/include/storage/procsignal.h     |  3 +-
 src/include/storage/standby.h        |  5 ++-
 8 files changed, 86 insertions(+), 26 deletions(-)

diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index 6c38d423f23..03f61b20eee 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/ipc/procsignal.c,v 1.4 2010/01/23 16:37:12 sriggs Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/ipc/procsignal.c,v 1.5 2010/02/13 01:32:19 sriggs Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -272,6 +272,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
 
+	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
+		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+
 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 3b3534886c9..fc4295465a1 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.11 2010/02/11 19:35:22 sriggs Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.12 2010/02/13 01:32:19 sriggs Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -127,6 +127,9 @@ WaitExceedsMaxStandbyDelay(void)
 	long	delay_secs;
 	int		delay_usecs;
 
+	if (MaxStandbyDelay == -1)
+		return false;
+
 	/* Are we past max_standby_delay? */
 	TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(),
 						&delay_secs, &delay_usecs);
@@ -351,15 +354,15 @@ ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid)
  * they hold one of the buffer pins that is blocking Startup process. If so,
  * backends will take an appropriate error action, ERROR or FATAL.
  *
- * A secondary purpose of this is to avoid deadlocks that might occur between
- * the Startup process and lock waiters. Deadlocks occur because if queries
+ * We also check for deadlocks before we wait, though applications that cause
+ * these will be extremely rare.  Deadlocks occur because if queries
  * wait on a lock, that must be behind an AccessExclusiveLock, which can only
- * be clared if the Startup process replays a transaction completion record.
- * If Startup process is waiting then that is a deadlock. If we allowed a
- * setting of max_standby_delay that meant "wait forever" we would then need
- * special code to protect against deadlock. Such deadlocks are rare, so the
- * code would be almost certainly buggy, so we avoid both long waits and
- * deadlocks using the same mechanism.
+ * be cleared if the Startup process replays a transaction completion record.
+ * If Startup process is also waiting then that is a deadlock. The deadlock
+ * can occur if the query is waiting and then the Startup sleeps, or if
+ * Startup is sleeping and the the query waits on a lock. We protect against
+ * only the former sequence here, the latter sequence is checked prior to
+ * the query sleeping, in CheckRecoveryConflictDeadlock().
  */
 void
 ResolveRecoveryConflictWithBufferPin(void)
@@ -368,11 +371,23 @@ ResolveRecoveryConflictWithBufferPin(void)
 
 	Assert(InHotStandby);
 
-	/*
-	 * Signal immediately or set alarm for later.
-	 */
 	if (MaxStandbyDelay == 0)
-		SendRecoveryConflictWithBufferPin();
+	{
+		/*
+		 * We don't want to wait, so just tell everybody holding the pin to 
+		 * get out of town.
+		 */
+		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+	}
+	else if (MaxStandbyDelay == -1)
+	{
+		/*
+		 * Send out a request to check for buffer pin deadlocks before we wait.
+		 * This is fairly cheap, so no need to wait for deadlock timeout before
+		 * trying to send it out.
+		 */
+		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+	}
 	else
 	{
 		TimestampTz now;
@@ -386,13 +401,25 @@ ResolveRecoveryConflictWithBufferPin(void)
 							&standby_delay_secs, &standby_delay_usecs);
 
 		if (standby_delay_secs >= MaxStandbyDelay)
-			SendRecoveryConflictWithBufferPin();
+		{
+			/*
+			 * We're already behind, so clear a path as quickly as possible.
+			 */
+			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+		}
 		else
 		{
 			TimestampTz fin_time;			/* Expected wake-up time by timer */
 			long	timer_delay_secs;		/* Amount of time we set timer for */
 			int		timer_delay_usecs = 0;
 
+			/*
+			 * Send out a request to check for buffer pin deadlocks before we wait.
+			 * This is fairly cheap, so no need to wait for deadlock timeout before
+			 * trying to send it out.
+			 */
+			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+
 			/*
 			 * How much longer we should wait?
 			 */
@@ -435,15 +462,18 @@ ResolveRecoveryConflictWithBufferPin(void)
 }
 
 void
-SendRecoveryConflictWithBufferPin(void)
+SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
 {
+	Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
+		   reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+
 	/*
 	 * We send signal to all backends to ask them if they are holding
 	 * the buffer pin which is delaying the Startup process. We must
 	 * not set the conflict flag yet, since most backends will be innocent.
 	 * Let the SIGUSR1 handling in each backend decide their own fate.
 	 */
-	CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false);
+	CancelDBBackends(InvalidOid, reason, false);
 }
 
 /*
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index a96a558da32..1e103743b2e 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.215 2010/02/08 04:33:54 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.216 2010/02/13 01:32:19 sriggs Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,6 +45,7 @@
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
+#include "storage/procsignal.h"
 #include "storage/spin.h"
 
 
@@ -556,6 +557,15 @@ HaveNFreeProcs(int n)
 	return (n <= 0);
 }
 
+bool
+IsWaitingForLock(void)
+{
+	if (lockAwaited == NULL)
+		return false;
+
+	return true;
+}
+
 /*
  * Cancel any pending wait for lock, when aborting a transaction.
  *
@@ -1670,7 +1680,7 @@ CheckStandbyTimeout(void)
 	now = GetCurrentTimestamp();
 
 	if (now >= statement_fin_time)
-		SendRecoveryConflictWithBufferPin();
+		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 	else
 	{
 		/* Not time yet, so (re)schedule the interrupt */
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 35a4d087ce7..8d33000e616 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.587 2010/01/23 17:04:05 sriggs Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.588 2010/02/13 01:32:19 sriggs Exp $
  *
  * NOTES
  *	  this is the "main" module of the postgres backend and
@@ -2278,6 +2278,9 @@ errdetail_recovery_conflict(void)
 		case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
 				errdetail("User query might have needed to see row versions that must be removed.");
 				break;
+		case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+				errdetail("User transaction caused buffer deadlock with recovery.");
+				break;
 		case PROCSIG_RECOVERY_CONFLICT_DATABASE:
 				errdetail("User was connected to a database that must be dropped.");
 				break;
@@ -2754,6 +2757,15 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
 		RecoveryConflictReason = reason;
 		switch (reason)
 		{
+			case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+					/*
+					 * If we aren't waiting for a lock we can never deadlock.
+					 */
+					if (!IsWaitingForLock())
+						return;
+
+					/* Intentional drop through to check wait for pin */
+
 			case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
 					/*
 					 * If we aren't blocking the Startup process there is
@@ -2819,6 +2831,8 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
 					elog(FATAL, "Unknown conflict mode");
 		}
 
+		Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending));
+
 		/*
 		 * If it's safe to interrupt, and we're waiting for input or a lock,
 		 * service the interrupt immediately
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 642fc6796a3..fd533a1e586 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.538 2010/02/01 13:40:28 sriggs Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.539 2010/02/13 01:32:20 sriggs Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -1381,7 +1381,7 @@ static struct config_int ConfigureNamesInt[] =
 			NULL
 		},
 		&MaxStandbyDelay,
-		30, 0, INT_MAX, NULL, NULL
+		30, -1, INT_MAX, NULL, NULL
 	},
 
 	{
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 5a4421bfa5c..b1fc78d3ede 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.119 2010/01/23 16:37:12 sriggs Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.120 2010/02/13 01:32:20 sriggs Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -189,6 +189,7 @@ extern void ProcQueueInit(PROC_QUEUE *queue);
 extern int	ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
 extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
 extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
+extern bool IsWaitingForLock(void);
 extern void LockWaitCancel(void);
 
 extern void ProcWaitForSignal(void);
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 2a67a62a922..0b5316e691f 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/procsignal.h,v 1.4 2010/01/23 16:37:12 sriggs Exp $
+ * $PostgreSQL: pgsql/src/include/storage/procsignal.h,v 1.5 2010/02/13 01:32:20 sriggs Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,6 +38,7 @@ typedef enum
 	PROCSIG_RECOVERY_CONFLICT_LOCK,
 	PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
 	PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+	PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
 
 	NUM_PROCSIGNALS				/* Must be last! */
 } ProcSignalReason;
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index 2a0b56ff0b7..081fa51ba00 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.7 2010/01/31 19:01:11 sriggs Exp $
+ * $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.8 2010/02/13 01:32:20 sriggs Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -16,6 +16,7 @@
 
 #include "access/xlog.h"
 #include "storage/lock.h"
+#include "storage/procsignal.h"
 #include "storage/relfilenode.h"
 
 extern int	vacuum_defer_cleanup_age;
@@ -30,7 +31,7 @@ extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
 extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
 
 extern void ResolveRecoveryConflictWithBufferPin(void);
-extern void SendRecoveryConflictWithBufferPin(void);
+extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
 extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
 
 /*
-- 
GitLab