diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index af2dea1c2abcba9847372a535fdf88daebc96a48..71b645839db053bdf9bcec61c4257977c51cea00 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -273,60 +273,6 @@ AC_DEFINE(HAVE__BUILTIN_TYPES_COMPATIBLE_P, 1,
 fi])# PGAC_C_TYPES_COMPATIBLE
 
 
-# PGAC_C_BUILTIN_BSWAP16
-# -------------------------
-# Check if the C compiler understands __builtin_bswap16(),
-# and define HAVE__BUILTIN_BSWAP16 if so.
-AC_DEFUN([PGAC_C_BUILTIN_BSWAP16],
-[AC_CACHE_CHECK(for __builtin_bswap16, pgac_cv__builtin_bswap16,
-[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-[static unsigned long int x = __builtin_bswap16(0xaabb);]
-)],
-[pgac_cv__builtin_bswap16=yes],
-[pgac_cv__builtin_bswap16=no])])
-if test x"$pgac_cv__builtin_bswap16" = xyes ; then
-AC_DEFINE(HAVE__BUILTIN_BSWAP16, 1,
-          [Define to 1 if your compiler understands __builtin_bswap16.])
-fi])# PGAC_C_BUILTIN_BSWAP16
-
-
-
-# PGAC_C_BUILTIN_BSWAP32
-# -------------------------
-# Check if the C compiler understands __builtin_bswap32(),
-# and define HAVE__BUILTIN_BSWAP32 if so.
-AC_DEFUN([PGAC_C_BUILTIN_BSWAP32],
-[AC_CACHE_CHECK(for __builtin_bswap32, pgac_cv__builtin_bswap32,
-[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-[static unsigned long int x = __builtin_bswap32(0xaabbccdd);]
-)],
-[pgac_cv__builtin_bswap32=yes],
-[pgac_cv__builtin_bswap32=no])])
-if test x"$pgac_cv__builtin_bswap32" = xyes ; then
-AC_DEFINE(HAVE__BUILTIN_BSWAP32, 1,
-          [Define to 1 if your compiler understands __builtin_bswap32.])
-fi])# PGAC_C_BUILTIN_BSWAP32
-
-
-
-# PGAC_C_BUILTIN_BSWAP64
-# -------------------------
-# Check if the C compiler understands __builtin_bswap64(),
-# and define HAVE__BUILTIN_BSWAP64 if so.
-AC_DEFUN([PGAC_C_BUILTIN_BSWAP64],
-[AC_CACHE_CHECK(for __builtin_bswap64, pgac_cv__builtin_bswap64,
-[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-[static unsigned long int x = __builtin_bswap64(0xaabbccddeeff0011);]
-)],
-[pgac_cv__builtin_bswap64=yes],
-[pgac_cv__builtin_bswap64=no])])
-if test x"$pgac_cv__builtin_bswap64" = xyes ; then
-AC_DEFINE(HAVE__BUILTIN_BSWAP64, 1,
-          [Define to 1 if your compiler understands __builtin_bswap64.])
-fi])# PGAC_C_BUILTIN_BSWAP64
-
-
-
 # PGAC_C_BUILTIN_CONSTANT_P
 # -------------------------
 # Check if the C compiler understands __builtin_constant_p(),
@@ -423,6 +369,33 @@ fi])# PGAC_C_COMPUTED_GOTO
 
 
 
+# PGAC_CHECK_BUILTIN_FUNC
+# -----------------------
+# This is similar to AC_CHECK_FUNCS(), except that it will work for compiler
+# builtin functions, as that usually fails to.
+# The first argument is the function name, eg [__builtin_clzl], and the
+# second is its argument list, eg [unsigned long x].  The current coding
+# works only for a single argument named x; we might generalize that later.
+# It's assumed that the function's result type is coercible to int.
+# On success, we define "HAVEfuncname" (there's usually more than enough
+# underscores already, so we don't add another one).
+AC_DEFUN([PGAC_CHECK_BUILTIN_FUNC],
+[AC_CACHE_CHECK(for $1, pgac_cv$1,
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([
+int
+call$1($2)
+{
+    return $1(x);
+}], [])],
+[pgac_cv$1=yes],
+[pgac_cv$1=no])])
+if test x"${pgac_cv$1}" = xyes ; then
+AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE$1]), 1,
+                   [Define to 1 if your compiler understands $1.])
+fi])# PGAC_CHECK_BUILTIN_FUNC
+
+
+
 # PGAC_PROG_VARCC_VARFLAGS_OPT
 # -----------------------
 # Given a compiler, variable name and a string, check if the compiler
diff --git a/configure b/configure
index 849a7ebdf3de0c987379a8f159726c4e7c0d2d6e..de3153f2ebc9f5c73af212c75254e83c11ec2737 100755
--- a/configure
+++ b/configure
@@ -13960,78 +13960,6 @@ if test x"$pgac_cv__types_compatible" = xyes ; then
 
 $as_echo "#define HAVE__BUILTIN_TYPES_COMPATIBLE_P 1" >>confdefs.h
 
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap16" >&5
-$as_echo_n "checking for __builtin_bswap16... " >&6; }
-if ${pgac_cv__builtin_bswap16+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-static unsigned long int x = __builtin_bswap16(0xaabb);
-
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  pgac_cv__builtin_bswap16=yes
-else
-  pgac_cv__builtin_bswap16=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap16" >&5
-$as_echo "$pgac_cv__builtin_bswap16" >&6; }
-if test x"$pgac_cv__builtin_bswap16" = xyes ; then
-
-$as_echo "#define HAVE__BUILTIN_BSWAP16 1" >>confdefs.h
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap32" >&5
-$as_echo_n "checking for __builtin_bswap32... " >&6; }
-if ${pgac_cv__builtin_bswap32+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-static unsigned long int x = __builtin_bswap32(0xaabbccdd);
-
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  pgac_cv__builtin_bswap32=yes
-else
-  pgac_cv__builtin_bswap32=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap32" >&5
-$as_echo "$pgac_cv__builtin_bswap32" >&6; }
-if test x"$pgac_cv__builtin_bswap32" = xyes ; then
-
-$as_echo "#define HAVE__BUILTIN_BSWAP32 1" >>confdefs.h
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap64" >&5
-$as_echo_n "checking for __builtin_bswap64... " >&6; }
-if ${pgac_cv__builtin_bswap64+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-static unsigned long int x = __builtin_bswap64(0xaabbccddeeff0011);
-
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  pgac_cv__builtin_bswap64=yes
-else
-  pgac_cv__builtin_bswap64=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap64" >&5
-$as_echo "$pgac_cv__builtin_bswap64" >&6; }
-if test x"$pgac_cv__builtin_bswap64" = xyes ; then
-
-$as_echo "#define HAVE__BUILTIN_BSWAP64 1" >>confdefs.h
-
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5
 $as_echo_n "checking for __builtin_constant_p... " >&6; }
@@ -14687,12 +14615,49 @@ fi
 
 fi
 
-# On PPC, check if assembler supports LWARX instruction's mutex hint bit
 case $host_cpu in
+  x86_64)
+    # On x86_64, check if we can compile a popcntq instruction
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports x86_64 popcntq" >&5
+$as_echo_n "checking whether assembler supports x86_64 popcntq... " >&6; }
+if ${pgac_cv_have_x86_64_popcntq+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+long long x = 1; long long r;
+	 __asm__ __volatile__ (" popcntq %1,%0\n" : "=q"(r) : "rm"(x));
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  pgac_cv_have_x86_64_popcntq=yes
+else
+  pgac_cv_have_x86_64_popcntq=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_x86_64_popcntq" >&5
+$as_echo "$pgac_cv_have_x86_64_popcntq" >&6; }
+    if test x"$pgac_cv_have_x86_64_popcntq" = xyes ; then
+
+$as_echo "#define HAVE_X86_64_POPCNTQ 1" >>confdefs.h
+
+    fi
+  ;;
   ppc*|powerpc*)
+    # On PPC, check if assembler supports LWARX instruction's mutex hint bit
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports lwarx hint bit" >&5
 $as_echo_n "checking whether assembler supports lwarx hint bit... " >&6; }
-    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+if ${pgac_cv_have_ppc_mutex_hint+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 int
@@ -14710,7 +14675,8 @@ else
   pgac_cv_have_ppc_mutex_hint=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_ppc_mutex_hint" >&5
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_ppc_mutex_hint" >&5
 $as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; }
     if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
 
@@ -15223,6 +15189,237 @@ fi
 done
 
 
+# These typically are compiler builtins, for which AC_CHECK_FUNCS fails.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap16" >&5
+$as_echo_n "checking for __builtin_bswap16... " >&6; }
+if ${pgac_cv__builtin_bswap16+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+call__builtin_bswap16(int x)
+{
+    return __builtin_bswap16(x);
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv__builtin_bswap16=yes
+else
+  pgac_cv__builtin_bswap16=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap16" >&5
+$as_echo "$pgac_cv__builtin_bswap16" >&6; }
+if test x"${pgac_cv__builtin_bswap16}" = xyes ; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BUILTIN_BSWAP16 1
+_ACEOF
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap32" >&5
+$as_echo_n "checking for __builtin_bswap32... " >&6; }
+if ${pgac_cv__builtin_bswap32+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+call__builtin_bswap32(int x)
+{
+    return __builtin_bswap32(x);
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv__builtin_bswap32=yes
+else
+  pgac_cv__builtin_bswap32=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap32" >&5
+$as_echo "$pgac_cv__builtin_bswap32" >&6; }
+if test x"${pgac_cv__builtin_bswap32}" = xyes ; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BUILTIN_BSWAP32 1
+_ACEOF
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap64" >&5
+$as_echo_n "checking for __builtin_bswap64... " >&6; }
+if ${pgac_cv__builtin_bswap64+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+call__builtin_bswap64(long int x)
+{
+    return __builtin_bswap64(x);
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv__builtin_bswap64=yes
+else
+  pgac_cv__builtin_bswap64=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap64" >&5
+$as_echo "$pgac_cv__builtin_bswap64" >&6; }
+if test x"${pgac_cv__builtin_bswap64}" = xyes ; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BUILTIN_BSWAP64 1
+_ACEOF
+
+fi
+# We assume that we needn't test all widths of these explicitly:
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
+$as_echo_n "checking for __builtin_clz... " >&6; }
+if ${pgac_cv__builtin_clz+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+call__builtin_clz(unsigned int x)
+{
+    return __builtin_clz(x);
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv__builtin_clz=yes
+else
+  pgac_cv__builtin_clz=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5
+$as_echo "$pgac_cv__builtin_clz" >&6; }
+if test x"${pgac_cv__builtin_clz}" = xyes ; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BUILTIN_CLZ 1
+_ACEOF
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5
+$as_echo_n "checking for __builtin_ctz... " >&6; }
+if ${pgac_cv__builtin_ctz+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+call__builtin_ctz(unsigned int x)
+{
+    return __builtin_ctz(x);
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv__builtin_ctz=yes
+else
+  pgac_cv__builtin_ctz=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctz" >&5
+$as_echo "$pgac_cv__builtin_ctz" >&6; }
+if test x"${pgac_cv__builtin_ctz}" = xyes ; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BUILTIN_CTZ 1
+_ACEOF
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
+$as_echo_n "checking for __builtin_popcount... " >&6; }
+if ${pgac_cv__builtin_popcount+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+call__builtin_popcount(unsigned int x)
+{
+    return __builtin_popcount(x);
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv__builtin_popcount=yes
+else
+  pgac_cv__builtin_popcount=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5
+$as_echo "$pgac_cv__builtin_popcount" >&6; }
+if test x"${pgac_cv__builtin_popcount}" = xyes ; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BUILTIN_POPCOUNT 1
+_ACEOF
+
+fi
+
 ac_fn_c_check_func "$LINENO" "fseeko" "ac_cv_func_fseeko"
 if test "x$ac_cv_func_fseeko" = xyes; then :
   $as_echo "#define HAVE_FSEEKO 1" >>confdefs.h
diff --git a/configure.in b/configure.in
index 71f8885fe2248d4794f4dc9127d2b6c75ac62952..0373c155f0ba19d403a3468bf948e3dac2f29ae9 100644
--- a/configure.in
+++ b/configure.in
@@ -1485,9 +1485,6 @@ PGAC_C_FUNCNAME_SUPPORT
 PGAC_C_STATIC_ASSERT
 PGAC_C_TYPEOF
 PGAC_C_TYPES_COMPATIBLE
-PGAC_C_BUILTIN_BSWAP16
-PGAC_C_BUILTIN_BSWAP32
-PGAC_C_BUILTIN_BSWAP64
 PGAC_C_BUILTIN_CONSTANT_P
 PGAC_C_BUILTIN_UNREACHABLE
 PGAC_C_COMPUTED_GOTO
@@ -1542,16 +1539,29 @@ Use --without-zlib to disable zlib support.])],
                 [#include <zlib.h>])
 fi
 
-# On PPC, check if assembler supports LWARX instruction's mutex hint bit
 case $host_cpu in
+  x86_64)
+    # On x86_64, check if we can compile a popcntq instruction
+    AC_CACHE_CHECK([whether assembler supports x86_64 popcntq],
+                   [pgac_cv_have_x86_64_popcntq],
+    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+	[long long x = 1; long long r;
+	 __asm__ __volatile__ (" popcntq %1,%0\n" : "=q"(r) : "rm"(x));])],
+	[pgac_cv_have_x86_64_popcntq=yes],
+	[pgac_cv_have_x86_64_popcntq=no])])
+    if test x"$pgac_cv_have_x86_64_popcntq" = xyes ; then
+	AC_DEFINE(HAVE_X86_64_POPCNTQ, 1, [Define to 1 if the assembler supports X86_64's POPCNTQ instruction.])
+    fi
+  ;;
   ppc*|powerpc*)
-    AC_MSG_CHECKING([whether assembler supports lwarx hint bit])
-    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+    # On PPC, check if assembler supports LWARX instruction's mutex hint bit
+    AC_CACHE_CHECK([whether assembler supports lwarx hint bit],
+                   [pgac_cv_have_ppc_mutex_hint],
+    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
 	[int a = 0; int *p = &a; int r;
 	 __asm__ __volatile__ (" lwarx %0,0,%1,1\n" : "=&r"(r) : "r"(p));])],
 	[pgac_cv_have_ppc_mutex_hint=yes],
-	[pgac_cv_have_ppc_mutex_hint=no])
-    AC_MSG_RESULT([$pgac_cv_have_ppc_mutex_hint])
+	[pgac_cv_have_ppc_mutex_hint=no])])
     if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
 	AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.])
     fi
@@ -1631,6 +1641,15 @@ AC_CHECK_FUNCS(m4_normalize([
 	wcstombs_l
 ]))
 
+# These typically are compiler builtins, for which AC_CHECK_FUNCS fails.
+PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap16], [int x])
+PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap32], [int x])
+PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap64], [long int x])
+# We assume that we needn't test all widths of these explicitly:
+PGAC_CHECK_BUILTIN_FUNC([__builtin_clz], [unsigned int x])
+PGAC_CHECK_BUILTIN_FUNC([__builtin_ctz], [unsigned int x])
+PGAC_CHECK_BUILTIN_FUNC([__builtin_popcount], [unsigned int x])
+
 AC_REPLACE_FUNCS(fseeko)
 case $host_os in
 	# NetBSD uses a custom fseeko/ftello built on fsetpos/fgetpos
diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c
index de7bc82a2348824efc8337eb799936113fecc659..8d3b3f5e405869d048e6ee1078accb6d84d5046d 100644
--- a/contrib/intarray/_intbig_gist.c
+++ b/contrib/intarray/_intbig_gist.c
@@ -5,6 +5,7 @@
 
 #include "access/gist.h"
 #include "access/stratnum.h"
+#include "port/pg_bitutils.h"
 
 #include "_int.h"
 
@@ -19,27 +20,6 @@ PG_FUNCTION_INFO_V1(g_intbig_penalty);
 PG_FUNCTION_INFO_V1(g_intbig_picksplit);
 PG_FUNCTION_INFO_V1(g_intbig_union);
 PG_FUNCTION_INFO_V1(g_intbig_same);
-
-/* Number of one-bits in an unsigned byte */
-static const uint8 number_of_ones[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
-
 PG_FUNCTION_INFO_V1(_intbig_in);
 PG_FUNCTION_INFO_V1(_intbig_out);
 
@@ -207,12 +187,7 @@ g_intbig_compress(PG_FUNCTION_ARGS)
 static int32
 sizebitvec(BITVECP sign)
 {
-	int32		size = 0,
-				i;
-
-	LOOPBYTE
-		size += number_of_ones[(unsigned char) sign[i]];
-	return size;
+	return pg_popcount(sign, SIGLEN);
 }
 
 static int
@@ -225,7 +200,8 @@ hemdistsign(BITVECP a, BITVECP b)
 	LOOPBYTE
 	{
 		diff = (unsigned char) (a[i] ^ b[i]);
-		dist += number_of_ones[diff];
+		/* Using the popcount functions here isn't likely to win */
+		dist += pg_number_of_ones[diff];
 	}
 	return dist;
 }
diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c
index 28bf7ad9634f1695597cd68f809862354a66ab9a..c33ac4eedd263bc4e025b51b18490d71ec915363 100644
--- a/contrib/ltree/_ltree_gist.c
+++ b/contrib/ltree/_ltree_gist.c
@@ -9,6 +9,8 @@
 
 #include "access/gist.h"
 #include "access/stratnum.h"
+#include "port/pg_bitutils.h"
+
 #include "crc32.h"
 #include "ltree.h"
 
@@ -23,26 +25,6 @@ PG_FUNCTION_INFO_V1(_ltree_consistent);
 #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
 #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 
-/* Number of one-bits in an unsigned byte */
-static const uint8 number_of_ones[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
-
 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
 
 
@@ -209,12 +191,7 @@ _ltree_union(PG_FUNCTION_ARGS)
 static int32
 sizebitvec(BITVECP sign)
 {
-	int32		size = 0,
-				i;
-
-	ALOOPBYTE
-		size += number_of_ones[(unsigned char) sign[i]];
-	return size;
+	return pg_popcount((const char *) sign, ASIGLEN);
 }
 
 static int
@@ -227,7 +204,8 @@ hemdistsign(BITVECP a, BITVECP b)
 	ALOOPBYTE
 	{
 		diff = (unsigned char) (a[i] ^ b[i]);
-		dist += number_of_ones[diff];
+		/* Using the popcount functions here isn't likely to win */
+		dist += pg_number_of_ones[diff];
 	}
 	return dist;
 }
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index f1e05478daaa31efc53a68abe42fa8009922e281..e79db8a4f0388882a7e08f4a13703ee062ff6978 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -7,6 +7,7 @@
 
 #include "access/stratnum.h"
 #include "fmgr.h"
+#include "port/pg_bitutils.h"
 
 
 typedef struct
@@ -39,26 +40,6 @@ PG_FUNCTION_INFO_V1(gtrgm_same);
 PG_FUNCTION_INFO_V1(gtrgm_penalty);
 PG_FUNCTION_INFO_V1(gtrgm_picksplit);
 
-/* Number of one-bits in an unsigned byte */
-static const uint8 number_of_ones[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
-
 
 Datum
 gtrgm_in(PG_FUNCTION_ARGS)
@@ -634,12 +615,7 @@ gtrgm_same(PG_FUNCTION_ARGS)
 static int32
 sizebitvec(BITVECP sign)
 {
-	int32		size = 0,
-				i;
-
-	LOOPBYTE
-		size += number_of_ones[(unsigned char) sign[i]];
-	return size;
+	return pg_popcount(sign, SIGLEN);
 }
 
 static int
@@ -652,7 +628,8 @@ hemdistsign(BITVECP a, BITVECP b)
 	LOOPBYTE
 	{
 		diff = (unsigned char) (a[i] ^ b[i]);
-		dist += number_of_ones[diff];
+		/* Using the popcount functions here isn't likely to win */
+		dist += pg_number_of_ones[diff];
 	}
 	return dist;
 }
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 931ae81fd6d38504d33a84fba0346691e310a8cc..64dfe06b261f04c120e283098bd0527b04389203 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -89,6 +89,7 @@
 #include "access/visibilitymap.h"
 #include "access/xlog.h"
 #include "miscadmin.h"
+#include "port/pg_bitutils.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "storage/smgr.h"
@@ -115,43 +116,11 @@
 #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
 #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
 
-/* tables for fast counting of set bits for visible and frozen */
-static const uint8 number_of_ones_for_visible[256] = {
-	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
-	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
-	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
-	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
-	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
-	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
-	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
-	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
-	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
-	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
-	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
-	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
-	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
-	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
-	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
-	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
-};
-static const uint8 number_of_ones_for_frozen[256] = {
-	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
-	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
-	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
-	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
-	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
-	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
-	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
-	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
-	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
-	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
-	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
-	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
-	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
-	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
-	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
-	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
-};
+/* Masks for counting subsets of bits in the visibility map. */
+#define VISIBLE_MASK64	UINT64CONST(0x5555555555555555) /* The lower bit of each
+														 * bit pair */
+#define FROZEN_MASK64	UINT64CONST(0xaaaaaaaaaaaaaaaa) /* The upper bit of each
+														 * bit pair */
 
 /* prototypes for internal routines */
 static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
@@ -408,18 +377,16 @@ void
 visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
 {
 	BlockNumber mapBlock;
+	BlockNumber nvisible = 0;
+	BlockNumber nfrozen = 0;
 
 	/* all_visible must be specified */
 	Assert(all_visible);
 
-	*all_visible = 0;
-	if (all_frozen)
-		*all_frozen = 0;
-
 	for (mapBlock = 0;; mapBlock++)
 	{
 		Buffer		mapBuffer;
-		unsigned char *map;
+		uint64	   *map;
 		int			i;
 
 		/*
@@ -436,17 +403,30 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro
 		 * immediately stale anyway if anyone is concurrently setting or
 		 * clearing bits, and we only really need an approximate value.
 		 */
-		map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
+		map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
 
-		for (i = 0; i < MAPSIZE; i++)
+		StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0,
+						 "unsupported MAPSIZE");
+		if (all_frozen == NULL)
 		{
-			*all_visible += number_of_ones_for_visible[map[i]];
-			if (all_frozen)
-				*all_frozen += number_of_ones_for_frozen[map[i]];
+			for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
+				nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
+		}
+		else
+		{
+			for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
+			{
+				nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
+				nfrozen += pg_popcount64(map[i] & FROZEN_MASK64);
+			}
 		}
 
 		ReleaseBuffer(mapBuffer);
 	}
+
+	*all_visible = nvisible;
+	if (all_frozen)
+		*all_frozen = nfrozen;
 }
 
 /*
diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c
index 1e907cabc6e12dfc01b658e1b5a3b782881c9774..e2c1276f219ff2bff31f056d96e55ef87aedc7a6 100644
--- a/src/backend/lib/bloomfilter.c
+++ b/src/backend/lib/bloomfilter.c
@@ -37,6 +37,7 @@
 
 #include "access/hash.h"
 #include "lib/bloomfilter.h"
+#include "port/pg_bitutils.h"
 
 #define MAX_HASH_FUNCS		10
 
@@ -187,19 +188,7 @@ double
 bloom_prop_bits_set(bloom_filter *filter)
 {
 	int			bitset_bytes = filter->m / BITS_PER_BYTE;
-	uint64		bits_set = 0;
-	int			i;
-
-	for (i = 0; i < bitset_bytes; i++)
-	{
-		unsigned char byte = filter->bitset[i];
-
-		while (byte)
-		{
-			bits_set++;
-			byte &= (byte - 1);
-		}
-	}
+	uint64		bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
 
 	return bits_set / (double) filter->m;
 }
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 62cd00903c03e49a2bd083d10fe0f2550bdd37a6..54f8567c01c8d79eefab581f2c251d7eaebb7218 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -22,6 +22,7 @@
 
 #include "access/hash.h"
 #include "nodes/pg_list.h"
+#include "port/pg_bitutils.h"
 
 
 #define WORDNUM(x)	((x) / BITS_PER_BITMAPWORD)
@@ -51,79 +52,18 @@
 
 #define HAS_MULTIPLE_ONES(x)	((bitmapword) RIGHTMOST_ONE(x) != (x))
 
-
-/*
- * Lookup tables to avoid need for bit-by-bit groveling
- *
- * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
- * in a nonzero byte value x.  The entry for x=0 is never used.
- *
- * leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a
- * nonzero byte value x.  The entry for x=0 is never used.
- *
- * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
- *
- * We could make these tables larger and reduce the number of iterations
- * in the functions that use them, but bytewise shifts and masks are
- * especially fast on many machines, so working a byte at a time seems best.
- */
-
-static const uint8 rightmost_one_pos[256] = {
-	0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-
-static const uint8 leftmost_one_pos[256] = {
-	0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-};
-
-static const uint8 number_of_ones[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
+/* Select appropriate bit-twiddling functions for bitmap word size */
+#if BITS_PER_BITMAPWORD == 32
+#define bmw_leftmost_one_pos(w)		pg_leftmost_one_pos32(w)
+#define bmw_rightmost_one_pos(w)	pg_rightmost_one_pos32(w)
+#define bmw_popcount(w)				pg_popcount32(w)
+#elif BITS_PER_BITMAPWORD == 64
+#define bmw_leftmost_one_pos(w)		pg_leftmost_one_pos64(w)
+#define bmw_rightmost_one_pos(w)	pg_rightmost_one_pos64(w)
+#define bmw_popcount(w)				pg_popcount64(w)
+#else
+#error "invalid BITS_PER_BITMAPWORD"
+#endif
 
 
 /*
@@ -607,12 +547,7 @@ bms_singleton_member(const Bitmapset *a)
 			if (result >= 0 || HAS_MULTIPLE_ONES(w))
 				elog(ERROR, "bitmapset has multiple members");
 			result = wordnum * BITS_PER_BITMAPWORD;
-			while ((w & 255) == 0)
-			{
-				w >>= 8;
-				result += 8;
-			}
-			result += rightmost_one_pos[w & 255];
+			result += bmw_rightmost_one_pos(w);
 		}
 	}
 	if (result < 0)
@@ -650,12 +585,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member)
 			if (result >= 0 || HAS_MULTIPLE_ONES(w))
 				return false;
 			result = wordnum * BITS_PER_BITMAPWORD;
-			while ((w & 255) == 0)
-			{
-				w >>= 8;
-				result += 8;
-			}
-			result += rightmost_one_pos[w & 255];
+			result += bmw_rightmost_one_pos(w);
 		}
 	}
 	if (result < 0)
@@ -681,12 +611,9 @@ bms_num_members(const Bitmapset *a)
 	{
 		bitmapword	w = a->words[wordnum];
 
-		/* we assume here that bitmapword is an unsigned type */
-		while (w != 0)
-		{
-			result += number_of_ones[w & 255];
-			w >>= 8;
-		}
+		/* No need to count the bits in a zero word */
+		if (w != 0)
+			result += bmw_popcount(w);
 	}
 	return result;
 }
@@ -1041,12 +968,7 @@ bms_first_member(Bitmapset *a)
 			a->words[wordnum] &= ~w;
 
 			result = wordnum * BITS_PER_BITMAPWORD;
-			while ((w & 255) == 0)
-			{
-				w >>= 8;
-				result += 8;
-			}
-			result += rightmost_one_pos[w & 255];
+			result += bmw_rightmost_one_pos(w);
 			return result;
 		}
 	}
@@ -1096,12 +1018,7 @@ bms_next_member(const Bitmapset *a, int prevbit)
 			int			result;
 
 			result = wordnum * BITS_PER_BITMAPWORD;
-			while ((w & 255) == 0)
-			{
-				w >>= 8;
-				result += 8;
-			}
-			result += rightmost_one_pos[w & 255];
+			result += bmw_rightmost_one_pos(w);
 			return result;
 		}
 
@@ -1168,14 +1085,9 @@ bms_prev_member(const Bitmapset *a, int prevbit)
 		if (w != 0)
 		{
 			int			result;
-			int			shift = BITS_PER_BITMAPWORD - 8;
 
 			result = wordnum * BITS_PER_BITMAPWORD;
-
-			while ((w >> shift) == 0)
-				shift -= 8;
-
-			result += shift + leftmost_one_pos[(w >> shift) & 255];
+			result += bmw_leftmost_one_pos(w);
 			return result;
 		}
 
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index a19879364674d67ddf48822c9b30b1a9e72a42d3..4f256260fd11387f96bfcd02e6410a8eb27ffc51 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -16,6 +16,7 @@
 
 #include "access/gist.h"
 #include "access/tuptoaster.h"
+#include "port/pg_bitutils.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 #include "utils/pg_crc.h"
@@ -70,26 +71,6 @@ typedef struct
 #define GETARR(x)	( (int32*)( (char*)(x)+GTHDRSIZE ) )
 #define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int32) )
 
-/* Number of one-bits in an unsigned byte */
-static const uint8 number_of_ones[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
-
 static int32 sizebitvec(BITVECP sign);
 
 Datum
@@ -503,12 +484,7 @@ gtsvector_same(PG_FUNCTION_ARGS)
 static int32
 sizebitvec(BITVECP sign)
 {
-	int32		size = 0,
-				i;
-
-	LOOPBYTE
-		size += number_of_ones[(unsigned char) sign[i]];
-	return size;
+	return pg_popcount(sign, SIGLEN);
 }
 
 static int
@@ -521,7 +497,8 @@ hemdistsign(BITVECP a, BITVECP b)
 	LOOPBYTE
 	{
 		diff = (unsigned char) (a[i] ^ b[i]);
-		dist += number_of_ones[diff];
+		/* Using the popcount functions here isn't likely to win */
+		dist += pg_number_of_ones[diff];
 	}
 	return dist;
 }
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index b38b0ae189982a4cadb11a4d8403cc85648f05da..6cd4cfed0aac68b2d702fdfee46ba5a6bca6f230 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -736,6 +736,9 @@
 /* Define to 1 if you have the `X509_get_signature_nid' function. */
 #undef HAVE_X509_GET_SIGNATURE_NID
 
+/* Define to 1 if the assembler supports X86_64's POPCNTQ instruction. */
+#undef HAVE_X86_64_POPCNTQ
+
 /* Define to 1 if the system has the type `_Bool'. */
 #undef HAVE__BOOL
 
@@ -748,12 +751,21 @@
 /* Define to 1 if your compiler understands __builtin_bswap64. */
 #undef HAVE__BUILTIN_BSWAP64
 
+/* Define to 1 if your compiler understands __builtin_clz. */
+#undef HAVE__BUILTIN_CLZ
+
 /* Define to 1 if your compiler understands __builtin_constant_p. */
 #undef HAVE__BUILTIN_CONSTANT_P
 
+/* Define to 1 if your compiler understands __builtin_ctz. */
+#undef HAVE__BUILTIN_CTZ
+
 /* Define to 1 if your compiler understands __builtin_$op_overflow. */
 #undef HAVE__BUILTIN_OP_OVERFLOW
 
+/* Define to 1 if your compiler understands __builtin_popcount. */
+#undef HAVE__BUILTIN_POPCOUNT
+
 /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
 #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
 
diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32
index 160fa1279e4815ff2c2a0006844c2800ad0a2b84..dfd697238363ae27c114582dd543323bded4dbb7 100644
--- a/src/include/pg_config.h.win32
+++ b/src/include/pg_config.h.win32
@@ -575,6 +575,9 @@
 /* Define to 1 if you have the `X509_get_signature_nid' function. */
 #define HAVE_X509_GET_SIGNATURE_NID 1
 
+/* Define to 1 if the assembler supports X86_64's POPCNTQ instruction. */
+/* #undef HAVE_X86_64_POPCNTQ */
+
 /* Define to 1 if the system has the type `_Bool'. */
 /* #undef HAVE__BOOL */
 
@@ -587,12 +590,21 @@
 /* Define to 1 if your compiler understands __builtin_bswap64. */
 /* #undef HAVE__BUILTIN_BSWAP64 */
 
+/* Define to 1 if your compiler understands __builtin_clz. */
+/* #undef HAVE__BUILTIN_CLZ */
+
 /* Define to 1 if your compiler understands __builtin_constant_p. */
 /* #undef HAVE__BUILTIN_CONSTANT_P */
 
+/* Define to 1 if your compiler understands __builtin_ctz. */
+/* #undef HAVE__BUILTIN_CTZ */
+
 /* Define to 1 if your compiler understands __builtin_$op_overflow. */
 /* #undef HAVE__BUILTIN_OP_OVERFLOW */
 
+/* Define to 1 if your compiler understands __builtin_popcount. */
+/* #undef HAVE__BUILTIN_POPCOUNT */
+
 /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
 /* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */
 
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
new file mode 100644
index 0000000000000000000000000000000000000000..fe7c3d0ffc04e7a2fba96556e203926a4ac1b65d
--- /dev/null
+++ b/src/include/port/pg_bitutils.h
@@ -0,0 +1,139 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_bitutils.h
+ *	  Miscellaneous functions for bit-wise operations.
+ *
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * src/include/port/pg_bitutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_BITUTILS_H
+#define PG_BITUTILS_H
+
+extern PGDLLIMPORT const uint8 pg_leftmost_one_pos[256];
+extern PGDLLIMPORT const uint8 pg_rightmost_one_pos[256];
+extern PGDLLIMPORT const uint8 pg_number_of_ones[256];
+
+/*
+ * pg_leftmost_one_pos32
+ *		Returns the position of the most significant set bit in "word",
+ *		measured from the least significant bit.  word must not be 0.
+ */
+static inline int
+pg_leftmost_one_pos32(uint32 word)
+{
+#ifdef HAVE__BUILTIN_CLZ
+	Assert(word != 0);
+
+	return 31 - __builtin_clz(word);
+#else
+	int			shift = 32 - 8;
+
+	Assert(word != 0);
+
+	while ((word >> shift) == 0)
+		shift -= 8;
+
+	return shift + pg_leftmost_one_pos[(word >> shift) & 255];
+#endif							/* HAVE__BUILTIN_CLZ */
+}
+
+/*
+ * pg_leftmost_one_pos64
+ *		As above, but for a 64-bit word.
+ */
+static inline int
+pg_leftmost_one_pos64(uint64 word)
+{
+#ifdef HAVE__BUILTIN_CLZ
+	Assert(word != 0);
+
+#if defined(HAVE_LONG_INT_64)
+	return 63 - __builtin_clzl(word);
+#elif defined(HAVE_LONG_LONG_INT_64)
+	return 63 - __builtin_clzll(word);
+#else
+#error must have a working 64-bit integer datatype
+#endif
+#else							/* !HAVE__BUILTIN_CLZ */
+	int			shift = 64 - 8;
+
+	Assert(word != 0);
+
+	while ((word >> shift) == 0)
+		shift -= 8;
+
+	return shift + pg_leftmost_one_pos[(word >> shift) & 255];
+#endif							/* HAVE__BUIILTIN_CLZ */
+}
+
+/*
+ * pg_rightmost_one_pos32
+ *		Returns the position of the least significant set bit in "word",
+ *		measured from the least significant bit.  word must not be 0.
+ */
+static inline int
+pg_rightmost_one_pos32(uint32 word)
+{
+#ifdef HAVE__BUILTIN_CTZ
+	Assert(word != 0);
+
+	return __builtin_ctz(word);
+#else
+	int			result = 0;
+
+	Assert(word != 0);
+
+	while ((word & 255) == 0)
+	{
+		word >>= 8;
+		result += 8;
+	}
+	result += pg_rightmost_one_pos[word & 255];
+	return result;
+#endif							/* HAVE__BUILTIN_CTZ */
+}
+
+/*
+ * pg_rightmost_one_pos64
+ *		As above, but for a 64-bit word.
+ */
+static inline int
+pg_rightmost_one_pos64(uint64 word)
+{
+#ifdef HAVE__BUILTIN_CTZ
+	Assert(word != 0);
+
+#if defined(HAVE_LONG_INT_64)
+	return __builtin_ctzl(word);
+#elif defined(HAVE_LONG_LONG_INT_64)
+	return __builtin_ctzll(word);
+#else
+#error must have a working 64-bit integer datatype
+#endif
+#else							/* !HAVE__BUILTIN_CTZ */
+	int			result = 0;
+
+	Assert(word != 0);
+
+	while ((word & 255) == 0)
+	{
+		word >>= 8;
+		result += 8;
+	}
+	result += pg_rightmost_one_pos[word & 255];
+	return result;
+#endif							/* HAVE__BUILTIN_CTZ */
+}
+
+/* Count the number of one-bits in a uint32 or uint64 */
+extern int	(*pg_popcount32) (uint32 word);
+extern int	(*pg_popcount64) (uint64 word);
+
+/* Count the number of one-bits in a byte array */
+extern uint64 pg_popcount(const char *buf, int bytes);
+
+#endif							/* PG_BITUTILS_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index 9cfc0f92796c89875d25f004d4b8c7f9d2820598..b9492b741f48230281a23afded30e68531f25ff8 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -36,7 +36,7 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
 LIBS += $(PTHREAD_LIBS)
 
 OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
-	noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
+	noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \
 	pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \
 	qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \
 	tar.o thread.o
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
new file mode 100644
index 0000000000000000000000000000000000000000..d12765b33f2af3931b12276451b6838e390378b7
--- /dev/null
+++ b/src/port/pg_bitutils.c
@@ -0,0 +1,321 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_bitutils.c
+ *	  Miscellaneous functions for bit-wise operations.
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_bitutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#ifdef HAVE__GET_CPUID
+#include <cpuid.h>
+#endif
+#ifdef HAVE__CPUID
+#include <intrin.h>
+#endif
+
+#include "port/pg_bitutils.h"
+
+
+/*
+ * Array giving the position of the left-most set bit for each possible
+ * byte value.  We count the right-most position as the 0th bit, and the
+ * left-most the 7th bit.  The 0th entry of the array should not be used.
+ *
+ * Note: this is not used by the functions in pg_bitutils.h when
+ * HAVE_BUILTIN_CLZ is defined, but we provide it anyway, so that
+ * extensions possibly compiled with a different compiler can use it.
+ */
+const uint8 pg_leftmost_one_pos[256] = {
+	0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+};
+
+/*
+ * Array giving the position of the right-most set bit for each possible
+ * byte value.  We count the right-most position as the 0th bit, and the
+ * left-most the 7th bit.  The 0th entry of the array should not be used.
+ *
+ * Note: this is not used by the functions in pg_bitutils.h when
+ * HAVE_BUILTIN_CTZ is defined, but we provide it anyway, so that
+ * extensions possibly compiled with a different compiler can use it.
+ */
+const uint8 pg_rightmost_one_pos[256] = {
+	0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+/*
+ * Array giving the number of 1-bits in each possible byte value.
+ *
+ * Note: we export this for use by functions in which explicit use
+ * of the popcount functions seems unlikely to be a win.
+ */
+const uint8 pg_number_of_ones[256] = {
+	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+/*
+ * On x86_64, we can use the hardware popcount instruction, but only if
+ * we can verify that the CPU supports it via the cpuid instruction.
+ *
+ * Otherwise, we fall back to __builtin_popcount if the compiler has that,
+ * or a hand-rolled implementation if not.
+ */
+#ifdef HAVE_X86_64_POPCNTQ
+#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
+#define USE_POPCNT_ASM 1
+#endif
+#endif
+
+static int	pg_popcount32_slow(uint32 word);
+static int	pg_popcount64_slow(uint64 word);
+
+#ifdef USE_POPCNT_ASM
+static bool pg_popcount_available(void);
+static int	pg_popcount32_choose(uint32 word);
+static int	pg_popcount64_choose(uint64 word);
+static int	pg_popcount32_asm(uint32 word);
+static int	pg_popcount64_asm(uint64 word);
+
+int			(*pg_popcount32) (uint32 word) = pg_popcount32_choose;
+int			(*pg_popcount64) (uint64 word) = pg_popcount64_choose;
+#else
+int			(*pg_popcount32) (uint32 word) = pg_popcount32_slow;
+int			(*pg_popcount64) (uint64 word) = pg_popcount64_slow;
+#endif							/* USE_POPCNT_ASM */
+
+#ifdef USE_POPCNT_ASM
+
+/*
+ * Return true if CPUID indicates that the POPCNT instruction is available.
+ */
+static bool
+pg_popcount_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+
+	return (exx[2] & (1 << 23)) != 0;	/* POPCNT */
+}
+
+/*
+ * These functions get called on the first call to pg_popcount32 etc.
+ * They detect whether we can use the asm implementations, and replace
+ * the function pointers so that subsequent calls are routed directly to
+ * the chosen implementation.
+ */
+static int
+pg_popcount32_choose(uint32 word)
+{
+	if (pg_popcount_available())
+	{
+		pg_popcount32 = pg_popcount32_asm;
+		pg_popcount64 = pg_popcount64_asm;
+	}
+	else
+	{
+		pg_popcount32 = pg_popcount32_slow;
+		pg_popcount64 = pg_popcount64_slow;
+	}
+
+	return pg_popcount32(word);
+}
+
+static int
+pg_popcount64_choose(uint64 word)
+{
+	if (pg_popcount_available())
+	{
+		pg_popcount32 = pg_popcount32_asm;
+		pg_popcount64 = pg_popcount64_asm;
+	}
+	else
+	{
+		pg_popcount32 = pg_popcount32_slow;
+		pg_popcount64 = pg_popcount64_slow;
+	}
+
+	return pg_popcount64(word);
+}
+
+/*
+ * pg_popcount32_asm
+ *		Return the number of 1 bits set in word
+ */
+static int
+pg_popcount32_asm(uint32 word)
+{
+	uint32		res;
+
+	__asm__ __volatile__(" popcntl %1,%0\n" : "=q"(res) : "rm"(word) : "cc");
+	return (int) res;
+}
+
+/*
+ * pg_popcount64_asm
+ *		Return the number of 1 bits set in word
+ */
+static int
+pg_popcount64_asm(uint64 word)
+{
+	uint64		res;
+
+	__asm__ __volatile__(" popcntq %1,%0\n" : "=q"(res) : "rm"(word) : "cc");
+	return (int) res;
+}
+
+#endif							/* USE_POPCNT_ASM */
+
+
+/*
+ * pg_popcount32_slow
+ *		Return the number of 1 bits set in word
+ */
+static int
+pg_popcount32_slow(uint32 word)
+{
+#ifdef HAVE__BUILTIN_POPCOUNT
+	return __builtin_popcount(word);
+#else							/* !HAVE__BUILTIN_POPCOUNT */
+	int			result = 0;
+
+	while (word != 0)
+	{
+		result += pg_number_of_ones[word & 255];
+		word >>= 8;
+	}
+
+	return result;
+#endif							/* HAVE__BUILTIN_POPCOUNT */
+}
+
+/*
+ * pg_popcount64_slow
+ *		Return the number of 1 bits set in word
+ */
+static int
+pg_popcount64_slow(uint64 word)
+{
+#ifdef HAVE__BUILTIN_POPCOUNT
+#if defined(HAVE_LONG_INT_64)
+	return __builtin_popcountl(word);
+#elif defined(HAVE_LONG_LONG_INT_64)
+	return __builtin_popcountll(word);
+#else
+#error must have a working 64-bit integer datatype
+#endif
+#else							/* !HAVE__BUILTIN_POPCOUNT */
+	int			result = 0;
+
+	while (word != 0)
+	{
+		result += pg_number_of_ones[word & 255];
+		word >>= 8;
+	}
+
+	return result;
+#endif							/* HAVE__BUILTIN_POPCOUNT */
+}
+
+
+/*
+ * pg_popcount
+ *		Returns the number of 1-bits in buf
+ */
+uint64
+pg_popcount(const char *buf, int bytes)
+{
+	uint64		popcnt = 0;
+
+#if SIZEOF_VOID_P >= 8
+	/* Process in 64-bit chunks if the buffer is aligned. */
+	if (buf == (const char *) TYPEALIGN(8, buf))
+	{
+		const uint64 *words = (const uint64 *) buf;
+
+		while (bytes >= 8)
+		{
+			popcnt += pg_popcount64(*words++);
+			bytes -= 8;
+		}
+
+		buf = (const char *) words;
+	}
+#else
+	/* Process in 32-bit chunks if the buffer is aligned. */
+	if (buf == (const char *) TYPEALIGN(4, buf))
+	{
+		const uint32 *words = (const uint32 *) buf;
+
+		while (bytes >= 4)
+		{
+			popcnt += pg_popcount32(*words++);
+			bytes -= 4;
+		}
+
+		buf = (const char *) words;
+	}
+#endif
+
+	/* Process any remaining bytes */
+	while (bytes--)
+		popcnt += pg_number_of_ones[(unsigned char) *buf++];
+
+	return popcnt;
+}
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 5251a21d3487ab7ddf3742b865d55aadd3d8c92f..f4225030fc21e5722c613f5dbb85a19d0db8182c 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -97,7 +97,7 @@ sub mkvcbuild
 	  srandom.c getaddrinfo.c gettimeofday.c inet_net_ntop.c kill.c open.c
 	  erand48.c snprintf.c strlcat.c strlcpy.c dirmod.c noblock.c path.c
 	  dirent.c dlopen.c getopt.c getopt_long.c
-	  pread.c pwrite.c
+	  pread.c pwrite.c pg_bitutils.c
 	  pg_strong_random.c pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c
 	  pqsignal.c mkdtemp.c qsort.c qsort_arg.c quotes.c system.c
 	  sprompt.c strerror.c tar.c thread.c