From 75be66464cb1bffa1e5757907b9a04ad5afc7859 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 16 Jun 2016 13:47:20 -0400
Subject: [PATCH] Invent min_parallel_relation_size GUC to replace a hard-wired
 constant.

The main point of doing this is to allow the cutoff to be set very small,
even zero, to allow parallel-query behavior to be tested on relatively
small tables such as we typically use in the regression tests.  But it
might be of use to users too.  The number-of-workers scaling behavior in
create_plain_partial_paths() is pretty ad-hoc and subject to change, so
we won't expose anything about that, but the notion of not considering
parallel query at all for tables below size X seems reasonably stable.

Amit Kapila, per a suggestion from me

Discussion: <17170.1465830165@sss.pgh.pa.us>
---
 doc/src/sgml/config.sgml                      | 14 ++++++++++++++
 src/backend/optimizer/path/allpaths.c         | 15 ++++++++++-----
 src/backend/utils/misc/guc.c                  | 11 +++++++++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/optimizer/paths.h                 |  1 +
 5 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e0e5a1edc60..a82bf065a06 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3699,6 +3699,20 @@ include_dir 'conf.d'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-min-parallel-relation-size" xreflabel="min_parallel_relation_size">
+      <term><varname>min_parallel_relation_size</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>min_parallel_relation_size</> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Sets the minimum size of relations to be considered for parallel scan.
+        The default is 8 megabytes (<literal>8MB</>).
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-effective-cache-size" xreflabel="effective_cache_size">
       <term><varname>effective_cache_size</varname> (<type>integer</type>)
       <indexterm>
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index cc8ba61bb0c..2e4b670e069 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -15,6 +15,7 @@
 
 #include "postgres.h"
 
+#include <limits.h>
 #include <math.h>
 
 #include "access/sysattr.h"
@@ -56,6 +57,7 @@ typedef struct pushdown_safety_info
 /* These parameters are set by GUC */
 bool		enable_geqo = false;	/* just in case GUC doesn't set it */
 int			geqo_threshold;
+int			min_parallel_relation_size;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
@@ -690,7 +692,7 @@ create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
 		parallel_workers = rel->rel_parallel_workers;
 	else
 	{
-		int			parallel_threshold = 1000;
+		int			parallel_threshold;
 
 		/*
 		 * If this relation is too small to be worth a parallel scan, just
@@ -699,21 +701,24 @@ create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
 		 * might not be worthwhile just for this relation, but when combined
 		 * with all of its inheritance siblings it may well pay off.
 		 */
-		if (rel->pages < parallel_threshold &&
+		if (rel->pages < (BlockNumber) min_parallel_relation_size &&
 			rel->reloptkind == RELOPT_BASEREL)
 			return;
 
 		/*
 		 * Select the number of workers based on the log of the size of the
 		 * relation.  This probably needs to be a good deal more
-		 * sophisticated, but we need something here for now.
+		 * sophisticated, but we need something here for now.  Note that the
+		 * upper limit of the min_parallel_relation_size GUC is chosen to
+		 * prevent overflow here.
 		 */
 		parallel_workers = 1;
-		while (rel->pages > parallel_threshold * 3)
+		parallel_threshold = Max(min_parallel_relation_size, 1);
+		while (rel->pages >= (BlockNumber) (parallel_threshold * 3))
 		{
 			parallel_workers++;
 			parallel_threshold *= 3;
-			if (parallel_threshold >= PG_INT32_MAX / 3)
+			if (parallel_threshold > INT_MAX / 3)
 				break;			/* avoid overflow */
 		}
 	}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 9b02111a834..60148b871b3 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2747,6 +2747,17 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"min_parallel_relation_size", PGC_USERSET, QUERY_TUNING_COST,
+			gettext_noop("Sets the minimum size of relations to be considered for parallel scan."),
+			NULL,
+			GUC_UNIT_BLOCKS,
+		},
+		&min_parallel_relation_size,
+		1024, 0, INT_MAX / 3,
+		NULL, NULL, NULL
+	},
+
 	{
 		/* Can't be set in postgresql.conf */
 		{"server_version_num", PGC_INTERNAL, PRESET_OPTIONS,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 8260e371bc9..3fa05403bbc 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -304,6 +304,7 @@
 #cpu_operator_cost = 0.0025		# same scale as above
 #parallel_tuple_cost = 0.1		# same scale as above
 #parallel_setup_cost = 1000.0	# same scale as above
+#min_parallel_relation_size = 8MB
 #effective_cache_size = 4GB
 
 # - Genetic Query Optimizer -
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f3b25e2419c..cc6f85d6dbf 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -22,6 +22,7 @@
  */
 extern bool enable_geqo;
 extern int	geqo_threshold;
+extern int	min_parallel_relation_size;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
-- 
GitLab