From 65e3ea76417d1baab158fd8305ebed4f43141c7a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 13 Dec 2008 19:13:44 +0000
Subject: [PATCH] Increase the default value of default_statistics_target from
 10 to 100, and its maximum value from 1000 to 10000.  ALTER TABLE SET
 STATISTICS similarly now allows a value up to 10000.  Per discussion.

---
 doc/src/sgml/config.sgml                      |  4 ++--
 doc/src/sgml/perform.sgml                     |  4 ++--
 doc/src/sgml/ref/alter_table.sgml             |  4 ++--
 doc/src/sgml/ref/analyze.sgml                 | 12 ++++++------
 src/backend/commands/analyze.c                |  8 ++++----
 src/backend/commands/tablecmds.c              |  6 +++---
 src/backend/tsearch/ts_typanalyze.c           | 10 +++++-----
 src/backend/utils/misc/guc.c                  |  4 ++--
 src/backend/utils/misc/postgresql.conf.sample |  2 +-
 9 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index cf99ba2c85f..86ccac9448c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.199 2008/12/08 15:11:39 mha Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.200 2008/12/13 19:13:43 tgl Exp $ -->
 
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -2133,7 +2133,7 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"'  # Windows
         not had a column-specific target set via <command>ALTER TABLE
         SET STATISTICS</>.  Larger values increase the time needed to
         do <command>ANALYZE</>, but might improve the quality of the
-        planner's estimates. The default is 10. For more information
+        planner's estimates. The default is 100. For more information
         on the use of statistics by the <productname>PostgreSQL</>
         query planner, refer to <xref linkend="planner-stats">.
        </para>
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index be5ec9f2c2e..aea529552c6 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/perform.sgml,v 1.68 2007/12/28 21:03:31 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/perform.sgml,v 1.69 2008/12/13 19:13:43 tgl Exp $ -->
 
  <chapter id="performance-tips">
   <title>Performance Tips</title>
@@ -562,7 +562,7 @@ SELECT attname, n_distinct, most_common_vals FROM pg_stats WHERE tablename = 'ro
    column-by-column basis using the <command>ALTER TABLE SET STATISTICS</>
    command, or globally by setting the
    <xref linkend="guc-default-statistics-target"> configuration variable.
-   The default limit is presently 10 entries.  Raising the limit
+   The default limit is presently 100 entries.  Raising the limit
    might allow more accurate planner estimates to be made, particularly for
    columns with irregular data distributions, at the price of consuming
    more space in <structname>pg_statistic</structname> and slightly more
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 61cd403fcfd..b19c89f4211 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.101 2008/11/14 10:22:45 petere Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.102 2008/12/13 19:13:44 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -142,7 +142,7 @@ where <replaceable class="PARAMETER">action</replaceable> is one of:
       This form
       sets the per-column statistics-gathering target for subsequent
       <xref linkend="sql-analyze" endterm="sql-analyze-title"> operations.
-      The target can be set in the range 0 to 1000; alternatively, set it
+      The target can be set in the range 0 to 10000; alternatively, set it
       to -1 to revert to using the system default statistics
       target (<xref linkend="guc-default-statistics-target">).
       For more information on the use of statistics by the
diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml
index ad851405856..418082e6d93 100644
--- a/doc/src/sgml/ref/analyze.sgml
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/analyze.sgml,v 1.24 2008/11/14 10:22:45 petere Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/analyze.sgml,v 1.25 2008/12/13 19:13:44 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -132,10 +132,10 @@ ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
    will change slightly each time <command>ANALYZE</command> is run,
    even if the actual table contents did not change.  This might result
    in small changes in the planner's estimated costs shown by
-   <xref linkend="sql-explain" endterm="sql-explain-title">. In rare situations, this
-   non-determinism will cause the query optimizer to choose a
-   different query plan between runs of <command>ANALYZE</command>. To
-   avoid this, raise the amount of statistics collected by
+   <xref linkend="sql-explain" endterm="sql-explain-title">.
+   In rare situations, this non-determinism will cause the planner's
+   choices of query plans to change after <command>ANALYZE</command> is run.
+   To avoid this, raise the amount of statistics collected by
    <command>ANALYZE</command>, as described below.
   </para>
 
@@ -148,7 +148,7 @@ ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
    endterm="sql-altertable-title">).  The target value sets the
    maximum number of entries in the most-common-value list and the
    maximum number of bins in the histogram.  The default target value
-   is 10, but this can be adjusted up or down to trade off accuracy of
+   is 100, but this can be adjusted up or down to trade off accuracy of
    planner estimates against the time taken for
    <command>ANALYZE</command> and the amount of space occupied in
    <literal>pg_statistic</literal>.  In particular, setting the
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 6b95075be1f..2b3af54ff05 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.128 2008/11/10 00:49:37 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.129 2008/12/13 19:13:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,7 +67,7 @@ typedef struct AnlIndexData
 
 
 /* Default statistics target (GUC parameter) */
-int			default_statistics_target = 10;
+int			default_statistics_target = 100;
 
 /* A few variables that don't seem worth passing around as parameters */
 static int	elevel = -1;
@@ -1531,10 +1531,10 @@ std_typanalyze(VacAttrStats *stats)
 		 * error in bin size f, and error probability gamma, the minimum
 		 * random sample size is
 		 *		r = 4 * k * ln(2*n/gamma) / f^2
-		 * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+		 * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
 		 *		r = 305.82 * k
 		 * Note that because of the log function, the dependence on n is
-		 * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+		 * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
 		 * bin size error with probability 0.99.  So there's no real need to
 		 * scale for n, which is a good thing because we don't necessarily
 		 * know it at this point.
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 8e12a77d11c..6c60ddd5c10 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.272 2008/12/06 23:22:46 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.273 2008/12/13 19:13:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3942,9 +3942,9 @@ ATExecSetStatistics(Relation rel, const char *colName, Node *newValue)
 				 errmsg("statistics target %d is too low",
 						newtarget)));
 	}
-	else if (newtarget > 1000)
+	else if (newtarget > 10000)
 	{
-		newtarget = 1000;
+		newtarget = 10000;
 		ereport(WARNING,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("lowering statistics target to %d",
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 199432097bc..d30afe352c5 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.3 2008/11/27 21:17:39 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.4 2008/12/13 19:13:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -63,7 +63,7 @@ ts_typanalyze(PG_FUNCTION_ARGS)
 		attr->attstattarget = default_statistics_target;
 
 	stats->compute_stats = compute_tsvector_stats;
-	/* see comment about the choice of minrows from analyze.c */
+	/* see comment about the choice of minrows in commands/analyze.c */
 	stats->minrows = 300 * attr->attstattarget;
 
 	PG_RETURN_BOOL(true);
@@ -105,8 +105,8 @@ ts_typanalyze(PG_FUNCTION_ARGS)
  *	is no more than a few times w.
  *
  *	We use a hashtable for the D structure and a bucket width of
- *	statistic_target * 100, where 100 is an arbitrarily chosen constant, meant
- *	to approximate the number of lexemes in a single tsvector.
+ *	statistics_target * 100, where 100 is an arbitrarily chosen constant,
+ *	meant to approximate the number of lexemes in a single tsvector.
  */
 static void
 compute_tsvector_stats(VacAttrStats *stats,
@@ -130,7 +130,7 @@ compute_tsvector_stats(VacAttrStats *stats,
 	LexemeHashKey 	hash_key;
 	TrackItem		*item;
 
-	/* We want statistic_target * 100 lexemes in the MCELEM array */
+	/* We want statistics_target * 100 lexemes in the MCELEM array */
 	num_mcelem = stats->attr->attstattarget * 100;
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 00dac28c27e..8e3be6fe371 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.482 2008/12/02 02:00:32 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.483 2008/12/13 19:13:44 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -1245,7 +1245,7 @@ static struct config_int ConfigureNamesInt[] =
 				"column-specific target set via ALTER TABLE SET STATISTICS.")
 		},
 		&default_statistics_target,
-		10, 1, 1000, NULL, NULL
+		100, 1, 10000, NULL, NULL
 	},
 	{
 		{"from_collapse_limit", PGC_USERSET, QUERY_TUNING_OTHER,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f886ef74b21..bf85ae083c6 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -210,7 +210,7 @@
 
 # - Other Planner Options -
 
-#default_statistics_target = 10		# range 1-1000
+#default_statistics_target = 100	# range 1-10000
 #constraint_exclusion = off
 #cursor_tuple_fraction = 0.1		# range 0.0-1.0
 #from_collapse_limit = 8
-- 
GitLab