From b03d196be055450c7260749f17347c2d066b4254 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 29 Aug 2013 13:48:59 +0300
Subject: [PATCH] Use a non-locking initial test in TAS_SPIN on x86_64.

Testing done in 2011 by Tom Lane concluded that this is a win on Intel Xeons
and AMD Opterons, but it was not changed back then, because of an old
comment in tas() that suggested that it's a huge loss on older Opterons.
However, didn't have separate TAS() and TAS_SPIN() macros back then, so the
comment referred to doing a non-locked initial test even on the first
access, in uncontended case. I don't have access to older Opterons, but I'm
pretty sure that doing an initial unlocked test is unlikely to be a loss
while spinning, even though it might be for the first access.

We probably should do the same on 32-bit x86, but I'm afraid of changing it
without any testing. Hence just add a note to the x86 implementation
suggesting that we probably should do the same there.
---
 src/include/storage/s_lock.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 180c013a8ba..38e09ce8d9d 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -145,6 +145,12 @@ tas(volatile slock_t *lock)
 	 * Use a non-locking test before asserting the bus lock.  Note that the
 	 * extra test appears to be a small loss on some x86 platforms and a small
 	 * win on others; it's by no means clear that we should keep it.
+	 *
+	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
+	 * macros.  Nowadays it probably would be better to do a non-locking test
+	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
+	 * testing to verify that.  Without some empirical evidence, better to
+	 * leave it alone.
 	 */
 	__asm__ __volatile__(
 		"	cmpb	$0,%1	\n"
@@ -200,16 +206,22 @@ typedef unsigned char slock_t;
 
 #define TAS(lock) tas(lock)
 
+/*
+ * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
+ * but only when spinning.
+ *
+ * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
+ * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
+ * available at:
+ * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
+ */
+#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
+
 static __inline__ int
 tas(volatile slock_t *lock)
 {
 	register slock_t _res = 1;
 
-	/*
-	 * On Opteron, using a non-locking test before the locking instruction
-	 * is a huge loss.  On EM64T, it appears to be a wash or small loss,
-	 * so we needn't bother to try to distinguish the sub-architectures.
-	 */
 	__asm__ __volatile__(
 		"	lock			\n"
 		"	xchgb	%0,%1	\n"
-- 
GitLab