From 75be66464cb1bffa1e5757907b9a04ad5afc7859 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Thu, 16 Jun 2016 13:47:20 -0400 Subject: [PATCH] Invent min_parallel_relation_size GUC to replace a hard-wired constant. The main point of doing this is to allow the cutoff to be set very small, even zero, to allow parallel-query behavior to be tested on relatively small tables such as we typically use in the regression tests. But it might be of use to users too. The number-of-workers scaling behavior in create_plain_partial_paths() is pretty ad-hoc and subject to change, so we won't expose anything about that, but the notion of not considering parallel query at all for tables below size X seems reasonably stable. Amit Kapila, per a suggestion from me Discussion: <17170.1465830165@sss.pgh.pa.us> --- doc/src/sgml/config.sgml | 14 ++++++++++++++ src/backend/optimizer/path/allpaths.c | 15 ++++++++++----- src/backend/utils/misc/guc.c | 11 +++++++++++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/optimizer/paths.h | 1 + 5 files changed, 37 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index e0e5a1edc60..a82bf065a06 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3699,6 +3699,20 @@ include_dir 'conf.d' </listitem> </varlistentry> + <varlistentry id="guc-min-parallel-relation-size" xreflabel="min_parallel_relation_size"> + <term><varname>min_parallel_relation_size</varname> (<type>integer</type>) + <indexterm> + <primary><varname>min_parallel_relation_size</> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Sets the minimum size of relations to be considered for parallel scan. + The default is 8 megabytes (<literal>8MB</>). + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-effective-cache-size" xreflabel="effective_cache_size"> <term><varname>effective_cache_size</varname> (<type>integer</type>) <indexterm> diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index cc8ba61bb0c..2e4b670e069 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -15,6 +15,7 @@ #include "postgres.h" +#include <limits.h> #include <math.h> #include "access/sysattr.h" @@ -56,6 +57,7 @@ typedef struct pushdown_safety_info /* These parameters are set by GUC */ bool enable_geqo = false; /* just in case GUC doesn't set it */ int geqo_threshold; +int min_parallel_relation_size; /* Hook for plugins to get control in set_rel_pathlist() */ set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL; @@ -690,7 +692,7 @@ create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel) parallel_workers = rel->rel_parallel_workers; else { - int parallel_threshold = 1000; + int parallel_threshold; /* * If this relation is too small to be worth a parallel scan, just @@ -699,21 +701,24 @@ create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel) * might not be worthwhile just for this relation, but when combined * with all of its inheritance siblings it may well pay off. */ - if (rel->pages < parallel_threshold && + if (rel->pages < (BlockNumber) min_parallel_relation_size && rel->reloptkind == RELOPT_BASEREL) return; /* * Select the number of workers based on the log of the size of the * relation. This probably needs to be a good deal more - * sophisticated, but we need something here for now. + * sophisticated, but we need something here for now. Note that the + * upper limit of the min_parallel_relation_size GUC is chosen to + * prevent overflow here. */ parallel_workers = 1; - while (rel->pages > parallel_threshold * 3) + parallel_threshold = Max(min_parallel_relation_size, 1); + while (rel->pages >= (BlockNumber) (parallel_threshold * 3)) { parallel_workers++; parallel_threshold *= 3; - if (parallel_threshold >= PG_INT32_MAX / 3) + if (parallel_threshold > INT_MAX / 3) break; /* avoid overflow */ } } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 9b02111a834..60148b871b3 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2747,6 +2747,17 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"min_parallel_relation_size", PGC_USERSET, QUERY_TUNING_COST, + gettext_noop("Sets the minimum size of relations to be considered for parallel scan."), + NULL, + GUC_UNIT_BLOCKS, + }, + &min_parallel_relation_size, + 1024, 0, INT_MAX / 3, + NULL, NULL, NULL + }, + { /* Can't be set in postgresql.conf */ {"server_version_num", PGC_INTERNAL, PRESET_OPTIONS, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 8260e371bc9..3fa05403bbc 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -304,6 +304,7 @@ #cpu_operator_cost = 0.0025 # same scale as above #parallel_tuple_cost = 0.1 # same scale as above #parallel_setup_cost = 1000.0 # same scale as above +#min_parallel_relation_size = 8MB #effective_cache_size = 4GB # - Genetic Query Optimizer - diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index f3b25e2419c..cc6f85d6dbf 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -22,6 +22,7 @@ */ extern bool enable_geqo; extern int geqo_threshold; +extern int min_parallel_relation_size; /* Hook for plugins to get control in set_rel_pathlist() */ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, -- GitLab