From fdea2530bd4afb3d512cb9700b1d8cd603fab0e7 Mon Sep 17 00:00:00 2001
From: Simon Riggs <simon@2ndQuadrant.com>
Date: Tue, 30 Apr 2013 06:59:26 +0100
Subject: [PATCH] Compiler optimizations for page checksum code.

Ants Aasma and Jeff Davis
---
 config/c-compiler.m4              |  25 ++++++
 configure                         | 130 ++++++++++++++++++++++++++++++
 configure.in                      |  10 +++
 src/Makefile.global.in            |   1 +
 src/backend/storage/page/Makefile |   3 +
 5 files changed, 169 insertions(+)

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 29db5b16b04..4ba3236ef46 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -242,6 +242,31 @@ undefine([Ac_cachevar])dnl
 
 
 
+# PGAC_PROG_CC_VAR_OPT
+# -----------------------
+# Given a variable name and a string, check if the compiler supports
+# the string as a command-line option. If it does, add the string to
+# the given variable.
+AC_DEFUN([PGAC_PROG_CC_VAR_OPT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_prog_cc_cflags_$2])])dnl
+AC_CACHE_CHECK([whether $CC supports $2], [Ac_cachevar],
+[pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS $2"
+ac_save_c_werror_flag=$ac_c_werror_flag
+ac_c_werror_flag=yes
+_AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
+                   [Ac_cachevar=yes],
+                   [Ac_cachevar=no])
+ac_c_werror_flag=$ac_save_c_werror_flag
+CFLAGS="$pgac_save_CFLAGS"])
+if test x"$Ac_cachevar" = x"yes"; then
+  $1="${$1} $2"
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_PROG_CC_CFLAGS_OPT
+
+
+
 # PGAC_PROG_CC_LDFLAGS_OPT
 # ------------------------
 # Given a string, check if the compiler supports the string as a
diff --git a/configure b/configure
index b391308d810..826f3e183c6 100755
--- a/configure
+++ b/configure
@@ -731,6 +731,7 @@ autodepend
 TAS
 GCC
 CPP
+CFLAGS_VECTOR
 SUN_STUDIO_CC
 OBJEXT
 EXEEXT
@@ -3944,6 +3945,11 @@ else
   fi
 fi
 
+# set CFLAGS_VECTOR from the environment, if available
+if test "$ac_env_CFLAGS_VECTOR_set" = set; then
+  CFLAGS_VECTOR=$ac_env_CFLAGS_VECTOR_value
+fi
+
 # Some versions of GCC support some additional useful warning flags.
 # Check whether they are supported, and add them to CFLAGS if so.
 # ICC pretends to be GCC but it's lying; it doesn't support these flags,
@@ -4376,6 +4382,127 @@ if test x"$pgac_cv_prog_cc_cflags__fexcess_precision_standard" = x"yes"; then
   CFLAGS="$CFLAGS -fexcess-precision=standard"
 fi
 
+  # Optimization flags for specific files that benefit from vectorization
+  { $as_echo "$as_me:$LINENO: checking whether $CC supports -funroll-loops" >&5
+$as_echo_n "checking whether $CC supports -funroll-loops... " >&6; }
+if test "${pgac_cv_prog_cc_cflags__funroll_loops+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -funroll-loops"
+ac_save_c_werror_flag=$ac_c_werror_flag
+ac_c_werror_flag=yes
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  pgac_cv_prog_cc_cflags__funroll_loops=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	pgac_cv_prog_cc_cflags__funroll_loops=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_c_werror_flag=$ac_save_c_werror_flag
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:$LINENO: result: $pgac_cv_prog_cc_cflags__funroll_loops" >&5
+$as_echo "$pgac_cv_prog_cc_cflags__funroll_loops" >&6; }
+if test x"$pgac_cv_prog_cc_cflags__funroll_loops" = x"yes"; then
+  CFLAGS_VECTOR="${CFLAGS_VECTOR} -funroll-loops"
+fi
+
+  { $as_echo "$as_me:$LINENO: checking whether $CC supports -ftree-vectorize" >&5
+$as_echo_n "checking whether $CC supports -ftree-vectorize... " >&6; }
+if test "${pgac_cv_prog_cc_cflags__ftree_vectorize+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -ftree-vectorize"
+ac_save_c_werror_flag=$ac_c_werror_flag
+ac_c_werror_flag=yes
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  pgac_cv_prog_cc_cflags__ftree_vectorize=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	pgac_cv_prog_cc_cflags__ftree_vectorize=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_c_werror_flag=$ac_save_c_werror_flag
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:$LINENO: result: $pgac_cv_prog_cc_cflags__ftree_vectorize" >&5
+$as_echo "$pgac_cv_prog_cc_cflags__ftree_vectorize" >&6; }
+if test x"$pgac_cv_prog_cc_cflags__ftree_vectorize" = x"yes"; then
+  CFLAGS_VECTOR="${CFLAGS_VECTOR} -ftree-vectorize"
+fi
+
 elif test "$ICC" = yes; then
   # Intel's compiler has a bug/misoptimization in checking for
   # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS.
@@ -4627,6 +4754,9 @@ fi
 
 fi
 
+CFLAGS_VECTOR=$CFLAGS_VECTOR
+
+
 # supply -g if --enable-debug
 if test "$enable_debug" = yes && test "$ac_cv_prog_cc_g" = yes; then
   CFLAGS="$CFLAGS -g"
diff --git a/configure.in b/configure.in
index f81fda75641..ea3da2640d7 100644
--- a/configure.in
+++ b/configure.in
@@ -400,6 +400,11 @@ else
   fi
 fi
 
+# set CFLAGS_VECTOR from the environment, if available
+if test "$ac_env_CFLAGS_VECTOR_set" = set; then
+  CFLAGS_VECTOR=$ac_env_CFLAGS_VECTOR_value
+fi
+
 # Some versions of GCC support some additional useful warning flags.
 # Check whether they are supported, and add them to CFLAGS if so.
 # ICC pretends to be GCC but it's lying; it doesn't support these flags,
@@ -419,6 +424,9 @@ if test "$GCC" = yes -a "$ICC" = no; then
   PGAC_PROG_CC_CFLAGS_OPT([-fwrapv])
   # Disable FP optimizations that cause various errors on gcc 4.5+ or maybe 4.6+
   PGAC_PROG_CC_CFLAGS_OPT([-fexcess-precision=standard])
+  # Optimization flags for specific files that benefit from vectorization
+  PGAC_PROG_CC_VAR_OPT(CFLAGS_VECTOR, [-funroll-loops])
+  PGAC_PROG_CC_VAR_OPT(CFLAGS_VECTOR, [-ftree-vectorize])
 elif test "$ICC" = yes; then
   # Intel's compiler has a bug/misoptimization in checking for
   # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS.
@@ -434,6 +442,8 @@ elif test "$PORTNAME" = "hpux"; then
   PGAC_PROG_CC_CFLAGS_OPT([+Olibmerrno])
 fi
 
+AC_SUBST(CFLAGS_VECTOR, $CFLAGS_VECTOR)
+
 # supply -g if --enable-debug
 if test "$enable_debug" = yes && test "$ac_cv_prog_cc_g" = yes; then
   CFLAGS="$CFLAGS -g"
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 80f509fa872..89e39d2fa0a 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -219,6 +219,7 @@ CC = @CC@
 GCC = @GCC@
 SUN_STUDIO_CC = @SUN_STUDIO_CC@
 CFLAGS = @CFLAGS@
+CFLAGS_VECTOR = @CFLAGS_VECTOR@
 
 # Kind-of compilers
 
diff --git a/src/backend/storage/page/Makefile b/src/backend/storage/page/Makefile
index 82d9c37fda5..49ab40740ae 100644
--- a/src/backend/storage/page/Makefile
+++ b/src/backend/storage/page/Makefile
@@ -15,3 +15,6 @@ include $(top_builddir)/src/Makefile.global
 OBJS =  bufpage.o checksum.o itemptr.o
 
 include $(top_srcdir)/src/backend/common.mk
+
+# important optimizations flags for checksum.c
+checksum.o: CFLAGS += ${CFLAGS_VECTOR}
-- 
GitLab