diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 95d4b37bef3ac6ae4a9ed7d18ffecf71cf043fa2..5e1ce1775cb7f867ae1a7ebe1e9798c8a42185ff 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -95,6 +95,7 @@ static int	counts_for_pid = 0;
 static int *sh_acquire_counts;
 static int *ex_acquire_counts;
 static int *block_counts;
+static int *spin_delay_counts;
 #endif
 
 #ifdef LOCK_DEBUG
@@ -134,6 +135,7 @@ init_lwlock_stats(void)
 
 	sh_acquire_counts = calloc(numLocks, sizeof(int));
 	ex_acquire_counts = calloc(numLocks, sizeof(int));
+	spin_delay_counts = calloc(numLocks, sizeof(int));
 	block_counts = calloc(numLocks, sizeof(int));
 	counts_for_pid = MyProcPid;
 	on_shmem_exit(print_lwlock_stats, 0);
@@ -151,10 +153,10 @@ print_lwlock_stats(int code, Datum arg)
 
 	for (i = 0; i < numLocks; i++)
 	{
-		if (sh_acquire_counts[i] || ex_acquire_counts[i] || block_counts[i])
-			fprintf(stderr, "PID %d lwlock %d: shacq %u exacq %u blk %u\n",
+		if (sh_acquire_counts[i] || ex_acquire_counts[i] || block_counts[i] || spin_delay_counts[i])
+			fprintf(stderr, "PID %d lwlock %d: shacq %u exacq %u blk %u spindelay %u\n",
 					MyProcPid, i, sh_acquire_counts[i], ex_acquire_counts[i],
-					block_counts[i]);
+					block_counts[i], spin_delay_counts[i]);
 	}
 
 	LWLockRelease(0);
@@ -395,7 +397,11 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 		bool		mustwait;
 
 		/* Acquire mutex.  Time spent holding mutex should be short! */
+#ifdef LWLOCK_STATS
+		spin_delay_counts[lockid] += SpinLockAcquire(&lock->mutex);
+#else
 		SpinLockAcquire(&lock->mutex);
+#endif
 
 		/* If retrying, allow LWLockRelease to release waiters again */
 		if (retry)
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index bc8d89f8c1791f628000719648466507f6d8540a..13f1daeb7078df3587b488464b25de2221169d80 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -46,7 +46,7 @@ s_lock_stuck(volatile slock_t *lock, const char *file, int line)
 /*
  * s_lock(lock) - platform-independent portion of waiting for a spinlock.
  */
-void
+int
 s_lock(volatile slock_t *lock, const char *file, int line)
 {
 	/*
@@ -155,6 +155,7 @@ s_lock(volatile slock_t *lock, const char *file, int line)
 		if (spins_per_delay > MIN_SPINS_PER_DELAY)
 			spins_per_delay = Max(spins_per_delay - 1, MIN_SPINS_PER_DELAY);
 	}
+	return delays;
 }
 
 
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index d4a783f63d440b8b92eb13d1b1d3360cfaebca9d..48dc4de6350ac550083d530cd93efb068c3254e3 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -12,10 +12,11 @@
  *	void S_INIT_LOCK(slock_t *lock)
  *		Initialize a spinlock (to the unlocked state).
  *
- *	void S_LOCK(slock_t *lock)
+ *	int S_LOCK(slock_t *lock)
  *		Acquire a spinlock, waiting if necessary.
  *		Time out and abort() if unable to acquire the lock in a
  *		"reasonable" amount of time --- typically ~ 1 minute.
+ *		Should return number of "delays"; see s_lock.c
  *
  *	void S_UNLOCK(slock_t *lock)
  *		Unlock a previously acquired lock.
@@ -978,10 +979,7 @@ extern int	tas_sema(volatile slock_t *lock);
 
 #if !defined(S_LOCK)
 #define S_LOCK(lock) \
-	do { \
-		if (TAS(lock)) \
-			s_lock((lock), __FILE__, __LINE__); \
-	} while (0)
+	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__) : 0)
 #endif	 /* S_LOCK */
 
 #if !defined(S_LOCK_FREE)
@@ -1015,7 +1013,7 @@ extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
 /*
  * Platform-independent out-of-line support routines
  */
-extern void s_lock(volatile slock_t *lock, const char *file, int line);
+extern int s_lock(volatile slock_t *lock, const char *file, int line);
 
 /* Support for dynamic adjustment of spins_per_delay */
 #define DEFAULT_SPINS_PER_DELAY  100