diff --git a/doc/src/sgml/libpq++.sgml b/doc/src/sgml/libpq++.sgml index d259206c88227318029f6fd5405cc4936aa06b3b..65cc873e2803b1a7e1daa8989d5882acca7e8ab5 100644 --- a/doc/src/sgml/libpq++.sgml +++ b/doc/src/sgml/libpq++.sgml @@ -164,24 +164,6 @@ sets the default mode for the genetic optimizer. </para> </listitem> - <listitem> - <para> - <envar>PGRPLANS</envar> - sets the default mode to allow or disable right-sided plans in the optimizer. - </para> - </listitem> - <listitem> - <para> - <envar>PGCOSTHEAP</envar> - sets the default cost for heap searches for the optimizer. - </para> - </listitem> - <listitem> - <para> - <envar>PGCOSTINDEX</envar> - sets the default cost for indexed searches for the optimizer. - </para> - </listitem> </itemizedlist> </para> diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 2e02618c62e955461add106d0e4f1e7f942f63a8..506d98002c586a4559300cfc4517d245c8b8c5f4 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1900,24 +1900,6 @@ behavior for every Postgres session: sets the default mode for the genetic optimizer. </para> </listitem> -<listitem> -<para> -<envar>PGRPLANS</envar> -sets the default mode to allow or disable right-sided plans in the optimizer. -</para> -</listitem> -<listitem> -<para> -<envar>PGCOSTHEAP</envar> -sets the default cost for heap searches for the optimizer. -</para> -</listitem> -<listitem> -<para> -<envar>PGCOSTINDEX</envar> -sets the default cost for indexed searches for the optimizer. -</para> -</listitem> </itemizedlist> </para> diff --git a/doc/src/sgml/ref/set.sgml b/doc/src/sgml/ref/set.sgml index 51177570649c6f300dc5860e43aec18bfa8a137b..2c32c76ff035b3bfc9328fa07a257f537411928a 100644 --- a/doc/src/sgml/ref/set.sgml +++ b/doc/src/sgml/ref/set.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/ref/set.sgml,v 1.28 1999/07/22 15:09:15 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/ref/set.sgml,v 1.29 2000/02/15 20:49:07 tgl Exp $ Postgres documentation --> @@ -50,7 +50,8 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } <term><replaceable class="PARAMETER">value</replaceable></term> <listitem> <para> - New value of parameter. + New value of parameter. The word <term>DEFAULT</term> can be + written to specify resetting the parameter to its default value. </para> </listitem> </varlistentry> @@ -78,20 +79,12 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } </para> </listitem> </varlistentry> - - <varlistentry> - <term>DEFAULT</term> - <listitem> - <para> - Sets the multi-byte client encoding to the default value. - </para> - </listitem> - </varlistentry> </variablelist> </para> <para> - This is only enabled if multi-byte was specified to configure. + This option is only available if MULTIBYTE support was enabled + during the configure step of building Postgres. </para> </listitem> </varlistentry> @@ -176,6 +169,9 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } <simplelist> <member> Setting the <envar>PGDATESTYLE</envar> environment variable. + If PGDATESTYLE is set in the frontend environment of a client + based on libpq, libpq will automatically set DATESTYLE to the + value of PGDATESTYLE during connection startup. </member> <member> Running postmaster using the option <option>-o -e</option> to set @@ -218,19 +214,12 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } </para> </listitem> </varlistentry> - - <varlistentry> - <term>DEFAULT</term> - <listitem> - <para> - Sets the multi-byte server encoding. - </para> - </listitem> - </varlistentry> </variablelist> </para> + <para> - This is only enabled if multi-byte was specified to configure. + This option is only available if MULTIBYTE support was enabled + during the configure step of building Postgres. </para> </listitem> </varlistentry> @@ -286,16 +275,17 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } If an invalid time zone is specified, the time zone becomes GMT (on most systems anyway). </para> - <para> - A frontend which uses libpq may be initialized by setting the PGTZ - environment variable. - </para> <para> The second syntax shown above, allows one to set the timezone with a syntax similar to SQL92 <command>SET TIME ZONE</command>. The LOCAL keyword is just an alternate form of DEFAULT for SQL92 compatibility. </para> + <para> + If the PGTZ environment variable is set in the frontend + environment of a client based on libpq, libpq will automatically + set TIMEZONE to the value of PGTZ during connection startup. + </para> </listitem> </varlistentry> @@ -349,133 +339,381 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } <variablelist> <varlistentry> - <term>COST_HEAP</term> + <term>RANDOM_PAGE_COST</term> <listitem> <para> - Sets the default cost of a heap scan for use by the optimizer. + Sets the optimizer's estimate of the cost of a nonsequentially + fetched disk page. This is measured as a multiple of the cost + of a sequential page fetch. <variablelist> <varlistentry> - <term><replaceable class="parameter">float4</replaceable></term> + <term><replaceable class="parameter">float8</replaceable></term> <listitem> <para> - Set the cost of a heap scan to the specified floating point value. + Set the cost of a random page access + to the specified floating-point value. </para> </listitem> </varlistentry> - + </variablelist> + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>CPU_TUPLE_COST</term> + <listitem> + <para> + Sets the optimizer's estimate of the cost of processing each + tuple during a query. This is measured as a fraction of the cost + of a sequential page fetch. + + <variablelist> <varlistentry> - <term>DEFAULT</term> + <term><replaceable class="parameter">float8</replaceable></term> <listitem> <para> - Sets the cost of a heap scan to the default value. + Set the cost of per-tuple CPU processing + to the specified floating-point value. </para> </listitem> </varlistentry> </variablelist> </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>CPU_INDEX_TUPLE_COST</term> + <listitem> <para> - The frontend may be initialized by setting the PGCOSTHEAP - environment variable. + Sets the optimizer's estimate of the cost of processing each + index tuple during an index scan. This is measured as a fraction + of the cost of a sequential page fetch. + + <variablelist> + <varlistentry> + <term><replaceable class="parameter">float8</replaceable></term> + <listitem> + <para> + Set the cost of per-index-tuple CPU processing + to the specified floating-point value. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>CPU_OPERATOR_COST</term> + <listitem> + <para> + Sets the optimizer's estimate of the cost of processing each + operator in a WHERE clause. This is measured as a fraction + of the cost of a sequential page fetch. + + <variablelist> + <varlistentry> + <term><replaceable class="parameter">float8</replaceable></term> + <listitem> + <para> + Set the cost of per-operator CPU processing + to the specified floating-point value. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>EFFECTIVE_CACHE_SIZE</term> + <listitem> + <para> + Sets the optimizer's assumption about the effective size of the + disk cache (that is, the portion of the kernel's disk cache that + will be used for Postgres data files). This is measured in disk + pages, which are normally 8Kb apiece. + + <variablelist> + <varlistentry> + <term><replaceable class="parameter">float8</replaceable></term> + <listitem> + <para> + Set the assumed cache size + to the specified floating-point value. + </para> + </listitem> + </varlistentry> + </variablelist> </para> </listitem> </varlistentry> <varlistentry> - <term>COST_INDEX</term> + <term>ENABLE_SEQSCAN</term> <listitem> <para> - Sets the default cost of an index scan for use by the optimizer. + Enables or disables the planner's use of sequential scan plan types. + (It's not possible to suppress sequential scans entirely, but turning + this variable OFF discourages the planner from using one if there is + any other method available.) - <variablelist> + <variablelist> <varlistentry> - <term><replaceable class="parameter">float4</replaceable></term> + <term>ON</term> <listitem> <para> - Set the cost of an index scan to the specified floating point value. + enables use of sequential scans (default setting). </para> </listitem> </varlistentry> - + <varlistentry> - <term>DEFAULT</term> + <term>OFF</term> <listitem> <para> - Sets the cost of an index scan to the default value. + disables use of sequential scans. </para> </listitem> </varlistentry> </variablelist> </para> + </listitem> + </varlistentry> + <varlistentry> + <term>ENABLE_INDEXSCAN</term> + <listitem> <para> - The frontend may be initialized by setting the PGCOSTINDEX - environment variable. + Enables or disables the planner's use of index scan plan types. + + <variablelist> + <varlistentry> + <term>ON</term> + <listitem> + <para> + enables use of index scans (default setting). + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>OFF</term> + <listitem> + <para> + disables use of index scans. + </para> + </listitem> + </varlistentry> + </variablelist> </para> </listitem> </varlistentry> <varlistentry> - <term>GEQO</term> + <term>ENABLE_TIDSCAN</term> <listitem> <para> - Sets the threshold for using the genetic optimizer algorithm. + Enables or disables the planner's use of TID scan plan types. <variablelist> <varlistentry> <term>ON</term> <listitem> <para> - enables the genetic optimizer algorithm - for statements with 6 or more tables. + enables use of TID scans (default setting). </para> </listitem> </varlistentry> + + <varlistentry> + <term>OFF</term> + <listitem> + <para> + disables use of TID scans. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + </listitem> + </varlistentry> + <varlistentry> + <term>ENABLE_SORT</term> + <listitem> + <para> + Enables or disables the planner's use of explicit sort steps. + (It's not possible to suppress explicit sorts entirely, but turning + this variable OFF discourages the planner from using one if there is + any other method available.) + + <variablelist> <varlistentry> - <term>ON=<replaceable class="parameter">#</replaceable></term> + <term>ON</term> <listitem> <para> - Takes an integer argument to enable the genetic optimizer algorithm - for statements with <replaceable class="parameter">#</replaceable> - or more tables in the query. + enables use of sorts (default setting). </para> </listitem> </varlistentry> + + <varlistentry> + <term>OFF</term> + <listitem> + <para> + disables use of sorts. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + </listitem> + </varlistentry> + <varlistentry> + <term>ENABLE_NESTLOOP</term> + <listitem> + <para> + Enables or disables the planner's use of nested-loop join plans. + (It's not possible to suppress nested-loop joins entirely, but turning + this variable OFF discourages the planner from using one if there is + any other method available.) + + <variablelist> + <varlistentry> + <term>ON</term> + <listitem> + <para> + enables use of nested-loop joins (default setting). + </para> + </listitem> + </varlistentry> + <varlistentry> <term>OFF</term> <listitem> <para> - disables the genetic optimizer algorithm. + disables use of nested-loop joins. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>ENABLE_MERGEJOIN</term> + <listitem> + <para> + Enables or disables the planner's use of mergejoin plans. + + <variablelist> + <varlistentry> + <term>ON</term> + <listitem> + <para> + enables use of merge joins (default setting). </para> </listitem> </varlistentry> + <varlistentry> - <term>DEFAULT</term> + <term>OFF</term> + <listitem> + <para> + disables use of merge joins. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>ENABLE_HASHJOIN</term> + <listitem> + <para> + Enables or disables the planner's use of hashjoin plans. + + <variablelist> + <varlistentry> + <term>ON</term> <listitem> <para> - Equivalent to specifying <command>SET GEQO='ON'</command> + enables use of hash joins (default setting). + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>OFF</term> + <listitem> + <para> + disables use of hash joins. </para> </listitem> </varlistentry> </variablelist> </para> + </listitem> + </varlistentry> + <varlistentry> + <term>GEQO</term> + <listitem> <para> - This algorithm is on by default, which used GEQO for - statements of eleven or more tables. - (See the chapter on GEQO in the Programmer's Guide - for more information). + Sets the threshold for using the genetic optimizer algorithm. + + <variablelist> + <varlistentry> + <term>ON</term> + <listitem> + <para> + enables the genetic optimizer algorithm + for statements with 11 or more tables. + (This is also the DEFAULT setting.) + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>ON=<replaceable class="parameter">#</replaceable></term> + <listitem> + <para> + Takes an integer argument to enable the genetic optimizer algorithm + for statements with <replaceable class="parameter">#</replaceable> + or more tables in the query. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>OFF</term> + <listitem> + <para> + disables the genetic optimizer algorithm. + </para> + </listitem> + </varlistentry> + </variablelist> </para> + <para> - The frontend may be initialized by setting PGGEQO - environment variable. + See the chapter on GEQO in the Programmer's Guide + for more information about query optimization. </para> <para> - It may be useful when joining big relations with - small ones. This algorithm is off by default. - It's not used by GEQO anyway. + If the PGGEQO environment variable is set in the frontend + environment of a client based on libpq, libpq will automatically + set GEQO to the value of PGGEQO during connection startup. </para> </listitem> </varlistentry> @@ -484,10 +722,16 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } <term>KSQO</term> <listitem> <para> - <firstterm>Key Set Query Optimizer</firstterm> forces the query optimizer - to optimize repetative OR clauses such as generated by - <productname>MicroSoft Access</productname>: - + <firstterm>Key Set Query Optimizer</firstterm> causes the query + planner to convert queries whose WHERE clause contains many + OR'ed AND clauses (such as "WHERE (a=1 AND b=2) OR (a=2 AND b=3) ...") + into a UNION query. This method can be faster than the default + implementation, but it doesn't necessarily give exactly the same + results, since UNION implicitly adds a SELECT DISTINCT clause to + eliminate identical output rows. KSQO is commonly used when + working with products like <productname>MicroSoft + Access</productname>, which tend to generate queries of this form. + <variablelist> <varlistentry> <term>ON</term> @@ -502,7 +746,7 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } <term>OFF</term> <listitem> <para> - disables this optimization. + disables this optimization (default setting). </para> </listitem> </varlistentry> @@ -519,13 +763,9 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE } </para> <para> - It may be useful when joining big relations with - small ones. This algorithm is off by default. - It's not used by GEQO anyway. - </para> - <para> - The frontend may be initialized by setting the PGKSQO - environment variable. + The KSQO algorithm used to be absolutely essential for queries + with many OR'ed AND clauses, but in Postgres 7.0 and later + the standard planner handles these queries fairly successfully. </para> </listitem> </varlistentry> diff --git a/doc/src/sgml/ref/show.sgml b/doc/src/sgml/ref/show.sgml index 14b43d823447eeba6376b0d60e8601313e77982e..39fbde16ae0c3434ca5b46327e15493cdff8a38d 100644 --- a/doc/src/sgml/ref/show.sgml +++ b/doc/src/sgml/ref/show.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/ref/show.sgml,v 1.7 1999/07/22 15:09:15 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/ref/show.sgml,v 1.8 2000/02/15 20:49:07 tgl Exp $ Postgres documentation --> @@ -106,14 +106,14 @@ SHOW VARIABLE Description </title> <para> - <command>SHOW</command> will display the current - configuration parameters for - variable during a session. + <command>SHOW</command> will display the current setting of a + run-time parameter during a session. </para> <para> - The session can be configured using <command>SET</command> statement, - and values - can be restored to the defaults using <command>RESET</command> statement. + These variables can be set using the <command>SET</command> statement, + and + can be restored to the default values using the <command>RESET</command> + statement. Parameters and values are case-insensitive. </para> @@ -125,13 +125,12 @@ SHOW VARIABLE Notes </title> <para> - The <command>SHOW</command> is a <productname>Postgres</productname> + <command>SHOW</command> is a <productname>Postgres</productname> language extension. </para> <para> Refer to <command>SET</command>/<command>RESET</command> to set/reset variable values. - See also <command>SET TIME ZONE</command>. </para> </refsect2> </refsect1> diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 2b152b2fe5b0e7df8c90563035a1486ef343023e..2a38a349d6073999b40bfe939939bdb23ec619a9 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -5,7 +5,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994-5, Regents of the University of California * - * $Id: explain.c,v 1.53 2000/02/15 03:36:39 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/explain.c,v 1.54 2000/02/15 20:49:08 tgl Exp $ * */ @@ -217,39 +217,24 @@ explain_outNode(StringInfo str, Plan *plan, int indent, ExplainState *es) { relation = RelationIdGetRelation(lfirsti(l)); Assert(relation); - if (++i > 1) - appendStringInfo(str, ", "); - appendStringInfo(str, + appendStringInfo(str, "%s%s", + (++i > 1) ? ", " : "", stringStringInfo(RelationGetRelationName(relation))); /* drop relcache refcount from RelationIdGetRelation */ RelationDecrementReferenceCount(relation); } + /* FALL THRU */ case T_SeqScan: + case T_TidScan: if (((Scan *) plan)->scanrelid > 0) { RangeTblEntry *rte = nth(((Scan *) plan)->scanrelid - 1, es->rtable); - appendStringInfo(str, " on "); - if (strcmp(rte->ref->relname, rte->relname) != 0) - { - appendStringInfo(str, "%s ", - stringStringInfo(rte->relname)); - } - appendStringInfo(str, stringStringInfo(rte->ref->relname)); - } - break; - case T_TidScan: - if (((TidScan *) plan)->scan.scanrelid > 0) - { - RangeTblEntry *rte = nth(((TidScan *) plan)->scan.scanrelid - 1, es->rtable); - - appendStringInfo(str, " on "); - if (strcmp(rte->ref->relname, rte->relname) != 0) - { - appendStringInfo(str, "%s ", - stringStringInfo(rte->relname)); - } - appendStringInfo(str, stringStringInfo(rte->ref->relname)); + appendStringInfo(str, " on %s", + stringStringInfo(rte->relname)); + if (rte->ref && strcmp(rte->ref->relname, rte->relname) != 0) + appendStringInfo(str, " %s", + stringStringInfo(rte->ref->relname)); } break; default: @@ -257,8 +242,9 @@ explain_outNode(StringInfo str, Plan *plan, int indent, ExplainState *es) } if (es->printCost) { - appendStringInfo(str, " (cost=%.2f rows=%.0f width=%d)", - plan->cost, plan->plan_rows, plan->plan_width); + appendStringInfo(str, " (cost=%.2f..%.2f rows=%.0f width=%d)", + plan->startup_cost, plan->total_cost, + plan->plan_rows, plan->plan_width); } appendStringInfo(str, "\n"); diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c index 52c4ed3552a1149254673abd6dea97f05c1d26aa..718a62a118d98d79413ceb1fc5acfb4e889f1f82 100644 --- a/src/backend/commands/variable.c +++ b/src/backend/commands/variable.c @@ -1,15 +1,24 @@ -/* - * Routines for handling of 'SET var TO', - * 'SHOW var' and 'RESET var' statements. +/*------------------------------------------------------------------------- + * + * variable.c + * Routines for handling of 'SET var TO', + * 'SHOW var' and 'RESET var' statements. + * + * Portions Copyright (c) 1996-2000, PostgreSQL, Inc + * Portions Copyright (c) 1994, Regents of the University of California + * * - * $Id: variable.c,v 1.28 2000/01/22 23:50:10 tgl Exp $ + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/commands/variable.c,v 1.29 2000/02/15 20:49:08 tgl Exp $ * + *------------------------------------------------------------------------- */ #include <ctype.h> #include <time.h> #include "postgres.h" + #include "access/xact.h" #include "catalog/pg_shadow.h" #include "commands/variable.h" @@ -24,18 +33,53 @@ #include "mb/pg_wchar.h" #endif + +/* XXX should be in a header file */ +extern bool _use_keyset_query_optimizer; + + static bool show_date(void); static bool reset_date(void); static bool parse_date(const char *); static bool show_timezone(void); static bool reset_timezone(void); static bool parse_timezone(const char *); -static bool show_cost_heap(void); -static bool reset_cost_heap(void); -static bool parse_cost_heap(const char *); -static bool show_cost_index(void); -static bool reset_cost_index(void); -static bool parse_cost_index(const char *); +static bool show_effective_cache_size(void); +static bool reset_effective_cache_size(void); +static bool parse_effective_cache_size(const char *); +static bool show_random_page_cost(void); +static bool reset_random_page_cost(void); +static bool parse_random_page_cost(const char *); +static bool show_cpu_tuple_cost(void); +static bool reset_cpu_tuple_cost(void); +static bool parse_cpu_tuple_cost(const char *); +static bool show_cpu_index_tuple_cost(void); +static bool reset_cpu_index_tuple_cost(void); +static bool parse_cpu_index_tuple_cost(const char *); +static bool show_cpu_operator_cost(void); +static bool reset_cpu_operator_cost(void); +static bool parse_cpu_operator_cost(const char *); +static bool reset_enable_seqscan(void); +static bool show_enable_seqscan(void); +static bool parse_enable_seqscan(const char *); +static bool reset_enable_indexscan(void); +static bool show_enable_indexscan(void); +static bool parse_enable_indexscan(const char *); +static bool reset_enable_tidscan(void); +static bool show_enable_tidscan(void); +static bool parse_enable_tidscan(const char *); +static bool reset_enable_sort(void); +static bool show_enable_sort(void); +static bool parse_enable_sort(const char *); +static bool reset_enable_nestloop(void); +static bool show_enable_nestloop(void); +static bool parse_enable_nestloop(const char *); +static bool reset_enable_mergejoin(void); +static bool show_enable_mergejoin(void); +static bool parse_enable_mergejoin(const char *); +static bool reset_enable_hashjoin(void); +static bool show_enable_hashjoin(void); +static bool parse_enable_hashjoin(const char *); static bool reset_geqo(void); static bool show_geqo(void); static bool parse_geqo(const char *); @@ -46,8 +90,6 @@ static bool show_XactIsoLevel(void); static bool reset_XactIsoLevel(void); static bool parse_XactIsoLevel(const char *); -extern bool _use_keyset_query_optimizer; - /* * * Get_Token @@ -153,6 +195,204 @@ get_token(char **tok, char **val, const char *str) return str; } +/* + * Generic parse routine for boolean ON/OFF variables + */ +static bool +parse_boolean_var(const char *value, + bool *variable, const char *varname, bool defaultval) +{ + if (value == NULL) + { + *variable = defaultval; + return TRUE; + } + + if (strcasecmp(value, "on") == 0) + *variable = true; + else if (strcasecmp(value, "off") == 0) + *variable = false; + else + elog(ERROR, "Bad value for %s (%s)", varname, value); + + return TRUE; +} + +/* + * ENABLE_SEQSCAN + */ +static bool +parse_enable_seqscan(const char *value) +{ + return parse_boolean_var(value, &enable_seqscan, + "ENABLE_SEQSCAN", true); +} + +static bool +show_enable_seqscan() +{ + elog(NOTICE, "ENABLE_SEQSCAN is %s", + enable_seqscan ? "ON" : "OFF"); + return TRUE; +} + +static bool +reset_enable_seqscan() +{ + enable_seqscan = true; + return TRUE; +} + +/* + * ENABLE_INDEXSCAN + */ +static bool +parse_enable_indexscan(const char *value) +{ + return parse_boolean_var(value, &enable_indexscan, + "ENABLE_INDEXSCAN", true); +} + +static bool +show_enable_indexscan() +{ + elog(NOTICE, "ENABLE_INDEXSCAN is %s", + enable_indexscan ? "ON" : "OFF"); + return TRUE; +} + +static bool +reset_enable_indexscan() +{ + enable_indexscan = true; + return TRUE; +} + +/* + * ENABLE_TIDSCAN + */ +static bool +parse_enable_tidscan(const char *value) +{ + return parse_boolean_var(value, &enable_tidscan, + "ENABLE_TIDSCAN", true); +} + +static bool +show_enable_tidscan() +{ + elog(NOTICE, "ENABLE_TIDSCAN is %s", + enable_tidscan ? "ON" : "OFF"); + return TRUE; +} + +static bool +reset_enable_tidscan() +{ + enable_tidscan = true; + return TRUE; +} + +/* + * ENABLE_SORT + */ +static bool +parse_enable_sort(const char *value) +{ + return parse_boolean_var(value, &enable_sort, + "ENABLE_SORT", true); +} + +static bool +show_enable_sort() +{ + elog(NOTICE, "ENABLE_SORT is %s", + enable_sort ? "ON" : "OFF"); + return TRUE; +} + +static bool +reset_enable_sort() +{ + enable_sort = true; + return TRUE; +} + +/* + * ENABLE_NESTLOOP + */ +static bool +parse_enable_nestloop(const char *value) +{ + return parse_boolean_var(value, &enable_nestloop, + "ENABLE_NESTLOOP", true); +} + +static bool +show_enable_nestloop() +{ + elog(NOTICE, "ENABLE_NESTLOOP is %s", + enable_nestloop ? "ON" : "OFF"); + return TRUE; +} + +static bool +reset_enable_nestloop() +{ + enable_nestloop = true; + return TRUE; +} + +/* + * ENABLE_MERGEJOIN + */ +static bool +parse_enable_mergejoin(const char *value) +{ + return parse_boolean_var(value, &enable_mergejoin, + "ENABLE_MERGEJOIN", true); +} + +static bool +show_enable_mergejoin() +{ + elog(NOTICE, "ENABLE_MERGEJOIN is %s", + enable_mergejoin ? "ON" : "OFF"); + return TRUE; +} + +static bool +reset_enable_mergejoin() +{ + enable_mergejoin = true; + return TRUE; +} + +/* + * ENABLE_HASHJOIN + */ +static bool +parse_enable_hashjoin(const char *value) +{ + return parse_boolean_var(value, &enable_hashjoin, + "ENABLE_HASHJOIN", true); +} + +static bool +show_enable_hashjoin() +{ + elog(NOTICE, "ENABLE_HASHJOIN is %s", + enable_hashjoin ? "ON" : "OFF"); + return TRUE; +} + +static bool +reset_enable_hashjoin() +{ + enable_hashjoin = true; + return TRUE; +} + /* * * GEQO @@ -208,7 +448,6 @@ parse_geqo(const char *value) static bool show_geqo() { - if (enable_geqo) elog(NOTICE, "GEQO is ON beginning with %d relations", geqo_rels); else @@ -219,7 +458,6 @@ show_geqo() static bool reset_geqo(void) { - #ifdef GEQO enable_geqo = true; #else @@ -230,76 +468,173 @@ reset_geqo(void) } /* - * - * COST_HEAP - * + * EFFECTIVE_CACHE_SIZE */ static bool -parse_cost_heap(const char *value) +parse_effective_cache_size(const char *value) { float64 res; if (value == NULL) { - reset_cost_heap(); + reset_effective_cache_size(); return TRUE; } res = float8in((char *) value); - cpu_page_weight = *res; + effective_cache_size = *res; return TRUE; } static bool -show_cost_heap() +show_effective_cache_size() { + elog(NOTICE, "EFFECTIVE_CACHE_SIZE is %g (%dK pages)", + effective_cache_size, BLCKSZ/1024); + return TRUE; +} - elog(NOTICE, "COST_HEAP is %f", cpu_page_weight); +static bool +reset_effective_cache_size() +{ + effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE; return TRUE; } +/* + * RANDOM_PAGE_COST + */ static bool -reset_cost_heap() +parse_random_page_cost(const char *value) { - cpu_page_weight = CPU_PAGE_WEIGHT; + float64 res; + + if (value == NULL) + { + reset_random_page_cost(); + return TRUE; + } + + res = float8in((char *) value); + random_page_cost = *res; + + return TRUE; +} + +static bool +show_random_page_cost() +{ + elog(NOTICE, "RANDOM_PAGE_COST is %g", random_page_cost); + return TRUE; +} + +static bool +reset_random_page_cost() +{ + random_page_cost = DEFAULT_RANDOM_PAGE_COST; return TRUE; } /* - * - * COST_INDEX - * + * CPU_TUPLE_COST */ static bool -parse_cost_index(const char *value) +parse_cpu_tuple_cost(const char *value) { float64 res; if (value == NULL) { - reset_cost_index(); + reset_cpu_tuple_cost(); return TRUE; } res = float8in((char *) value); - cpu_index_page_weight = *res; + cpu_tuple_cost = *res; return TRUE; } static bool -show_cost_index() +show_cpu_tuple_cost() { + elog(NOTICE, "CPU_TUPLE_COST is %g", cpu_tuple_cost); + return TRUE; +} - elog(NOTICE, "COST_INDEX is %f", cpu_index_page_weight); +static bool +reset_cpu_tuple_cost() +{ + cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST; return TRUE; } +/* + * CPU_INDEX_TUPLE_COST + */ static bool -reset_cost_index() +parse_cpu_index_tuple_cost(const char *value) { - cpu_index_page_weight = CPU_INDEX_PAGE_WEIGHT; + float64 res; + + if (value == NULL) + { + reset_cpu_index_tuple_cost(); + return TRUE; + } + + res = float8in((char *) value); + cpu_index_tuple_cost = *res; + + return TRUE; +} + +static bool +show_cpu_index_tuple_cost() +{ + elog(NOTICE, "CPU_INDEX_TUPLE_COST is %g", cpu_index_tuple_cost); + return TRUE; +} + +static bool +reset_cpu_index_tuple_cost() +{ + cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST; + return TRUE; +} + +/* + * CPU_OPERATOR_COST + */ +static bool +parse_cpu_operator_cost(const char *value) +{ + float64 res; + + if (value == NULL) + { + reset_cpu_operator_cost(); + return TRUE; + } + + res = float8in((char *) value); + cpu_operator_cost = *res; + + return TRUE; +} + +static bool +show_cpu_operator_cost() +{ + elog(NOTICE, "CPU_OPERATOR_COST is %g", cpu_operator_cost); + return TRUE; +} + +static bool +reset_cpu_operator_cost() +{ + cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST; return TRUE; } @@ -527,6 +862,89 @@ reset_timezone() return TRUE; } /* reset_timezone() */ +/*----------------------------------------------------------------------- +KSQO code will one day be unnecessary when the optimizer makes use of +indexes when multiple ORs are specified in the where clause. +See optimizer/prep/prepkeyset.c for more on this. + daveh@insightdist.com 6/16/98 +-----------------------------------------------------------------------*/ +static bool +parse_ksqo(const char *value) +{ + return parse_boolean_var(value, &_use_keyset_query_optimizer, + "KSQO", false); +} + +static bool +show_ksqo() +{ + elog(NOTICE, "KSQO is %s", + _use_keyset_query_optimizer ? "ON" : "OFF"); + return TRUE; +} + +static bool +reset_ksqo() +{ + _use_keyset_query_optimizer = false; + return TRUE; +} + +/* SET TRANSACTION */ + +static bool +parse_XactIsoLevel(const char *value) +{ + + if (value == NULL) + { + reset_XactIsoLevel(); + return TRUE; + } + + if (SerializableSnapshot != NULL) + { + elog(ERROR, "SET TRANSACTION ISOLATION LEVEL must be called before any query"); + return TRUE; + } + + + if (strcasecmp(value, "SERIALIZABLE") == 0) + XactIsoLevel = XACT_SERIALIZABLE; + else if (strcasecmp(value, "COMMITTED") == 0) + XactIsoLevel = XACT_READ_COMMITTED; + else + elog(ERROR, "Bad TRANSACTION ISOLATION LEVEL (%s)", value); + + return TRUE; +} + +static bool +show_XactIsoLevel() +{ + + if (XactIsoLevel == XACT_SERIALIZABLE) + elog(NOTICE, "TRANSACTION ISOLATION LEVEL is SERIALIZABLE"); + else + elog(NOTICE, "TRANSACTION ISOLATION LEVEL is READ COMMITTED"); + return TRUE; +} + +static bool +reset_XactIsoLevel() +{ + + if (SerializableSnapshot != NULL) + { + elog(ERROR, "SET TRANSACTION ISOLATION LEVEL must be called before any query"); + return TRUE; + } + + XactIsoLevel = DefaultXactIsoLevel; + + return TRUE; +} + /* * Pg_options */ @@ -557,6 +975,7 @@ reset_pg_options(void) return (TRUE); } + /*-----------------------------------------------------------------------*/ struct VariableParsers @@ -575,10 +994,52 @@ struct VariableParsers "timezone", parse_timezone, show_timezone, reset_timezone }, { - "cost_heap", parse_cost_heap, show_cost_heap, reset_cost_heap + "effective_cache_size", parse_effective_cache_size, + show_effective_cache_size, reset_effective_cache_size + }, + { + "random_page_cost", parse_random_page_cost, + show_random_page_cost, reset_random_page_cost + }, + { + "cpu_tuple_cost", parse_cpu_tuple_cost, + show_cpu_tuple_cost, reset_cpu_tuple_cost + }, + { + "cpu_index_tuple_cost", parse_cpu_index_tuple_cost, + show_cpu_index_tuple_cost, reset_cpu_index_tuple_cost + }, + { + "cpu_operator_cost", parse_cpu_operator_cost, + show_cpu_operator_cost, reset_cpu_operator_cost + }, + { + "enable_seqscan", parse_enable_seqscan, + show_enable_seqscan, reset_enable_seqscan + }, + { + "enable_indexscan", parse_enable_indexscan, + show_enable_indexscan, reset_enable_indexscan + }, + { + "enable_tidscan", parse_enable_tidscan, + show_enable_tidscan, reset_enable_tidscan + }, + { + "enable_sort", parse_enable_sort, + show_enable_sort, reset_enable_sort }, { - "cost_index", parse_cost_index, show_cost_index, reset_cost_index + "enable_nestloop", parse_enable_nestloop, + show_enable_nestloop, reset_enable_nestloop + }, + { + "enable_mergejoin", parse_enable_mergejoin, + show_enable_mergejoin, reset_enable_mergejoin + }, + { + "enable_hashjoin", parse_enable_hashjoin, + show_enable_hashjoin, reset_enable_hashjoin }, { "geqo", parse_geqo, show_geqo, reset_geqo @@ -655,102 +1116,3 @@ ResetPGVariable(const char *name) return TRUE; } - - -/*----------------------------------------------------------------------- -KSQO code will one day be unnecessary when the optimizer makes use of -indexes when multiple ORs are specified in the where clause. -See optimizer/prep/prepkeyset.c for more on this. - daveh@insightdist.com 6/16/98 ------------------------------------------------------------------------*/ -static bool -parse_ksqo(const char *value) -{ - if (value == NULL) - { - reset_ksqo(); - return TRUE; - } - - if (strcasecmp(value, "on") == 0) - _use_keyset_query_optimizer = true; - else if (strcasecmp(value, "off") == 0) - _use_keyset_query_optimizer = false; - else - elog(ERROR, "Bad value for Key Set Query Optimizer (%s)", value); - - return TRUE; -} - -static bool -show_ksqo() -{ - - if (_use_keyset_query_optimizer) - elog(NOTICE, "Key Set Query Optimizer is ON"); - else - elog(NOTICE, "Key Set Query Optimizer is OFF"); - return TRUE; -} - -static bool -reset_ksqo() -{ - _use_keyset_query_optimizer = false; - return TRUE; -} - -/* SET TRANSACTION */ - -static bool -parse_XactIsoLevel(const char *value) -{ - - if (value == NULL) - { - reset_XactIsoLevel(); - return TRUE; - } - - if (SerializableSnapshot != NULL) - { - elog(ERROR, "SET TRANSACTION ISOLATION LEVEL must be called before any query"); - return TRUE; - } - - - if (strcasecmp(value, "SERIALIZABLE") == 0) - XactIsoLevel = XACT_SERIALIZABLE; - else if (strcasecmp(value, "COMMITTED") == 0) - XactIsoLevel = XACT_READ_COMMITTED; - else - elog(ERROR, "Bad TRANSACTION ISOLATION LEVEL (%s)", value); - - return TRUE; -} - -static bool -show_XactIsoLevel() -{ - - if (XactIsoLevel == XACT_SERIALIZABLE) - elog(NOTICE, "TRANSACTION ISOLATION LEVEL is SERIALIZABLE"); - else - elog(NOTICE, "TRANSACTION ISOLATION LEVEL is READ COMMITTED"); - return TRUE; -} - -static bool -reset_XactIsoLevel() -{ - - if (SerializableSnapshot != NULL) - { - elog(ERROR, "SET TRANSACTION ISOLATION LEVEL must be called before any query"); - return TRUE; - } - - XactIsoLevel = DefaultXactIsoLevel; - - return TRUE; -} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index adf0c7f1987c2ffe8dd28542ead1f778336c60f0..5bf01e227228c4d55e9f93a0de964054d06b4c58 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.105 2000/02/15 03:37:08 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.106 2000/02/15 20:49:09 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -75,7 +75,8 @@ listCopy(List *list) static void CopyPlanFields(Plan *from, Plan *newnode) { - newnode->cost = from->cost; + newnode->startup_cost = from->startup_cost; + newnode->total_cost = from->total_cost; newnode->plan_rows = from->plan_rows; newnode->plan_width = from->plan_width; /* state is NOT copied */ @@ -981,8 +982,9 @@ _copyRelOptInfo(RelOptInfo *from) Node_Copy(from, newnode, targetlist); Node_Copy(from, newnode, pathlist); - /* XXX cheapestpath should point to a member of pathlist? */ - Node_Copy(from, newnode, cheapestpath); + /* XXX cheapest-path fields should point to members of pathlist? */ + Node_Copy(from, newnode, cheapest_startup_path); + Node_Copy(from, newnode, cheapest_total_path); newnode->pruneable = from->pruneable; newnode->indexed = from->indexed; @@ -990,6 +992,7 @@ _copyRelOptInfo(RelOptInfo *from) newnode->tuples = from->tuples; Node_Copy(from, newnode, baserestrictinfo); + newnode->baserestrictcost = from->baserestrictcost; Node_Copy(from, newnode, joininfo); Node_Copy(from, newnode, innerjoin); @@ -1045,6 +1048,7 @@ _copyIndexOptInfo(IndexOptInfo *from) newnode->amcostestimate = from->amcostestimate; newnode->indproc = from->indproc; Node_Copy(from, newnode, indpred); + newnode->lossy = from->lossy; return newnode; } @@ -1066,7 +1070,8 @@ CopyPathFields(Path *from, Path *newnode) */ newnode->parent = from->parent; - newnode->path_cost = from->path_cost; + newnode->startup_cost = from->startup_cost; + newnode->total_cost = from->total_cost; newnode->pathtype = from->pathtype; @@ -1108,6 +1113,7 @@ _copyIndexPath(IndexPath *from) */ newnode->indexid = listCopy(from->indexid); Node_Copy(from, newnode, indexqual); + newnode->indexscandir = from->indexscandir; newnode->joinrelids = listCopy(from->joinrelids); return newnode; @@ -1339,8 +1345,7 @@ _copyRangeTblEntry(RangeTblEntry *from) if (from->relname) newnode->relname = pstrdup(from->relname); - if (from->ref) - Node_Copy(from, newnode, ref); + Node_Copy(from, newnode, ref); newnode->relid = from->relid; newnode->inh = from->inh; newnode->inFromCl = from->inFromCl; @@ -1449,8 +1454,10 @@ _copyQuery(Query *from) Node_Copy(from, newnode, limitOffset); Node_Copy(from, newnode, limitCount); - /* we do not copy the planner internal fields: base_rel_list, - * join_rel_list, query_pathkeys. Not entirely clear if this is right? + /* + * We do not copy the planner internal fields: base_rel_list, + * join_rel_list, equi_key_list, query_pathkeys. + * Not entirely clear if this is right? */ return newnode; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 3ddc8d6c98a518cc43c91cdd4fb1c321e98e2217..fadc282d1add86fcb5aa8255e80f65cf443af725 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.60 2000/02/15 03:37:08 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.61 2000/02/15 20:49:09 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -100,10 +100,10 @@ _equalAttr(Attr *a, Attr *b) { if (!strcmp(a->relname, b->relname)) return false; - if (length(a->attrs) != length(b->attrs)) + if (!equal(a->attrs, b->attrs)) return false; - return equal(a->attrs, b->attrs); + return true; } static bool @@ -342,8 +342,8 @@ _equalPath(Path *a, Path *b) return false; if (!equal(a->parent, b->parent)) return false; - /* do not check path_cost, since it may not be set yet, and being - * a float there are roundoff error issues anyway... + /* do not check path costs, since they may not be set yet, and being + * float values there are roundoff error issues anyway... */ if (!equal(a->pathkeys, b->pathkeys)) return false; @@ -359,6 +359,8 @@ _equalIndexPath(IndexPath *a, IndexPath *b) return false; if (!equal(a->indexqual, b->indexqual)) return false; + if (a->indexscandir != b->indexscandir) + return false; if (!equali(a->joinrelids, b->joinrelids)) return false; return true; @@ -625,8 +627,9 @@ _equalQuery(Query *a, Query *b) /* * We do not check the internal-to-the-planner fields: base_rel_list, - * join_rel_list, query_pathkeys. They might not be set yet, and - * in any case they should be derivable from the other fields. + * join_rel_list, equi_key_list, query_pathkeys. + * They might not be set yet, and in any case they should be derivable + * from the other fields. */ return true; } @@ -644,16 +647,8 @@ _equalRangeTblEntry(RangeTblEntry *a, RangeTblEntry *b) if (a->relname != b->relname) return false; } - if (a->ref && b->ref) - { - if (! equal(a->ref, b->ref)) - return false; - } - else - { - if (a->ref != b->ref) - return false; - } + if (!equal(a->ref, b->ref)) + return false; if (a->relid != b->relid) return false; if (a->inh != b->inh) @@ -784,6 +779,9 @@ equal(void *a, void *b) case T_Stream: retval = _equalStream(a, b); break; + case T_Attr: + retval = _equalAttr(a, b); + break; case T_Var: retval = _equalVar(a, b); break; @@ -856,9 +854,6 @@ equal(void *a, void *b) case T_EState: retval = _equalEState(a, b); break; - case T_Attr: - retval = _equalAttr(a, b); - break; case T_Integer: case T_String: case T_Float: diff --git a/src/backend/nodes/freefuncs.c b/src/backend/nodes/freefuncs.c index 690da02de85f5ec10190b924a83102f2793d40a2..8eed80e61ab42d4835136ac4259514d0424b0936 100644 --- a/src/backend/nodes/freefuncs.c +++ b/src/backend/nodes/freefuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.35 2000/02/15 03:37:08 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.36 2000/02/15 20:49:09 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -730,10 +730,11 @@ _freeRelOptInfo(RelOptInfo *node) freeObject(node->targetlist); freeObject(node->pathlist); - /* XXX is this right? cheapestpath will typically be a pointer into - * pathlist, won't it? + /* XXX is this right? cheapest-path fields will typically be pointers + * into pathlist, not separate structs... */ - freeObject(node->cheapestpath); + freeObject(node->cheapest_startup_path); + freeObject(node->cheapest_total_path); freeObject(node->baserestrictinfo); freeObject(node->joininfo); @@ -1013,8 +1014,7 @@ _freeRangeTblEntry(RangeTblEntry *node) { if (node->relname) pfree(node->relname); - if (node->ref) - freeObject(node->ref); + freeObject(node->ref); pfree(node); } @@ -1024,8 +1024,7 @@ _freeAttr(Attr *node) { if (node->relname) pfree(node->relname); - if (node->attrs) - freeObject(node->attrs); + freeObject(node->attrs); pfree(node); } @@ -1346,10 +1345,3 @@ freeObject(void *node) break; } } - - - - - - - diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e4c35cc277fb9a4850e8b0ad2fbe69db9b4b3f2e..c40ca9ff9cbdf1e1a8afc969dbbde086477824cc 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.107 2000/02/15 03:37:09 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.108 2000/02/15 20:49:09 tgl Exp $ * * NOTES * Every (plan) node in POSTGRES has an associated "out" routine which @@ -321,8 +321,9 @@ static void _outPlanInfo(StringInfo str, Plan *node) { appendStringInfo(str, - ":cost %g :rows %.0f :width %d :state %s :qptargetlist ", - node->cost, + ":startup_cost %.2f :total_cost %.2f :rows %.0f :width %d :state %s :qptargetlist ", + node->startup_cost, + node->total_cost, node->plan_rows, node->plan_width, node->state ? "not-NULL" : "<>"); @@ -908,15 +909,13 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node) appendStringInfo(str, " :pathlist "); _outNode(str, node->pathlist); - /* - * Not sure if these are nodes or not. They're declared as struct - * Path *. Since i don't know, i'll just print the addresses for now. - * This can be changed later, if necessary. - */ + appendStringInfo(str, " :cheapest_startup_path "); + _outNode(str, node->cheapest_startup_path); + appendStringInfo(str, " :cheapest_total_path "); + _outNode(str, node->cheapest_total_path); appendStringInfo(str, - " :cheapestpath @ 0x%x :pruneable %s :baserestrictinfo ", - (int) node->cheapestpath, + " :pruneable %s :baserestrictinfo ", node->pruneable ? "true" : "false"); _outNode(str, node->baserestrictinfo); @@ -977,9 +976,11 @@ _outRowMark(StringInfo str, RowMark *node) static void _outPath(StringInfo str, Path *node) { - appendStringInfo(str, " PATH :pathtype %d :cost %.2f :pathkeys ", + appendStringInfo(str, + " PATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ", node->pathtype, - node->path_cost); + node->startup_cost, + node->total_cost); _outNode(str, node->pathkeys); } @@ -990,9 +991,10 @@ static void _outIndexPath(StringInfo str, IndexPath *node) { appendStringInfo(str, - " INDEXPATH :pathtype %d :cost %.2f :pathkeys ", + " INDEXPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ", node->path.pathtype, - node->path.path_cost); + node->path.startup_cost, + node->path.total_cost); _outNode(str, node->path.pathkeys); appendStringInfo(str, " :indexid "); @@ -1001,7 +1003,8 @@ _outIndexPath(StringInfo str, IndexPath *node) appendStringInfo(str, " :indexqual "); _outNode(str, node->indexqual); - appendStringInfo(str, " :joinrelids "); + appendStringInfo(str, " :indexscandir %d :joinrelids ", + (int) node->indexscandir); _outIntList(str, node->joinrelids); } @@ -1012,9 +1015,10 @@ static void _outTidPath(StringInfo str, TidPath *node) { appendStringInfo(str, - " TIDPATH :pathtype %d :cost %.2f :pathkeys ", + " TIDPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ", node->path.pathtype, - node->path.path_cost); + node->path.startup_cost, + node->path.total_cost); _outNode(str, node->path.pathkeys); appendStringInfo(str, " :tideval "); @@ -1031,9 +1035,10 @@ static void _outNestPath(StringInfo str, NestPath *node) { appendStringInfo(str, - " NESTPATH :pathtype %d :cost %.2f :pathkeys ", + " NESTPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ", node->path.pathtype, - node->path.path_cost); + node->path.startup_cost, + node->path.total_cost); _outNode(str, node->path.pathkeys); appendStringInfo(str, " :outerjoinpath "); _outNode(str, node->outerjoinpath); @@ -1050,9 +1055,10 @@ static void _outMergePath(StringInfo str, MergePath *node) { appendStringInfo(str, - " MERGEPATH :pathtype %d :cost %.2f :pathkeys ", + " MERGEPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ", node->jpath.path.pathtype, - node->jpath.path.path_cost); + node->jpath.path.startup_cost, + node->jpath.path.total_cost); _outNode(str, node->jpath.path.pathkeys); appendStringInfo(str, " :outerjoinpath "); _outNode(str, node->jpath.outerjoinpath); @@ -1078,9 +1084,10 @@ static void _outHashPath(StringInfo str, HashPath *node) { appendStringInfo(str, - " HASHPATH :pathtype %d :cost %.2f :pathkeys ", + " HASHPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ", node->jpath.path.pathtype, - node->jpath.path.path_cost); + node->jpath.path.startup_cost, + node->jpath.path.total_cost); _outNode(str, node->jpath.path.pathkeys); appendStringInfo(str, " :outerjoinpath "); _outNode(str, node->jpath.outerjoinpath); @@ -1364,7 +1371,7 @@ _outNode(StringInfo str, void *obj) return; } - if (nodeTag(obj) == T_List) + if (IsA(obj, List)) { List *l; @@ -1377,6 +1384,11 @@ _outNode(StringInfo str, void *obj) } appendStringInfoChar(str, ')'); } + else if (IsA_Value(obj)) + { + /* nodeRead does not want to see { } around these! */ + _outValue(str, obj); + } else { appendStringInfoChar(str, '{'); @@ -1550,11 +1562,6 @@ _outNode(StringInfo str, void *obj) case T_Stream: _outStream(str, obj); break; - case T_Integer: - case T_String: - case T_Float: - _outValue(str, obj); - break; case T_A_Expr: _outAExpr(str, obj); break; diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c index a84b829950f5140d7991a3ab54deaae34434c576..248991c0928623d1e224c11159cb1d12dfa960bf 100644 --- a/src/backend/nodes/print.c +++ b/src/backend/nodes/print.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.36 2000/02/15 03:37:09 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.37 2000/02/15 20:49:12 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -175,9 +175,8 @@ print_expr(Node *expr, List *rtable) { rt = rt_fetch(var->varno, rtable); relname = rt->relname; - if (rt->ref) - if (rt->ref->relname) - relname = rt->relname; /* table renamed */ + if (rt->ref && rt->ref->relname) + relname = rt->ref->relname; /* table renamed */ attname = get_attname(rt->relid, var->varattno); } break; @@ -366,8 +365,9 @@ print_plan_recursive(Plan *p, Query *parsetree, int indentLevel, char *label) return; for (i = 0; i < indentLevel; i++) printf(" "); - printf("%s%s :c=%.4f :r=%.0f :w=%d ", label, plannode_type(p), - p->cost, p->plan_rows, p->plan_width); + printf("%s%s :c=%.2f..%.2f :r=%.0f :w=%d ", label, plannode_type(p), + p->startup_cost, p->total_cost, + p->plan_rows, p->plan_width); if (IsA(p, Scan) ||IsA(p, SeqScan)) { RangeTblEntry *rte; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 7d56b603b85c1766676a8520a3ad8a2d7ff963e7..7d1e0b4cccf22ca551325c917bd9dce570baf099 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.83 2000/02/15 03:37:09 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.84 2000/02/15 20:49:12 tgl Exp $ * * NOTES * Most of the read functions for plan nodes are tested. (In fact, they @@ -217,9 +217,13 @@ _getPlan(Plan *node) char *token; int length; - token = lsptok(NULL, &length); /* first token is :cost */ + token = lsptok(NULL, &length); /* first token is :startup_cost */ token = lsptok(NULL, &length); /* next is the actual cost */ - node->cost = (Cost) atof(token); + node->startup_cost = (Cost) atof(token); + + token = lsptok(NULL, &length); /* skip the :total_cost */ + token = lsptok(NULL, &length); /* next is the actual cost */ + node->total_cost = (Cost) atof(token); token = lsptok(NULL, &length); /* skip the :rows */ token = lsptok(NULL, &length); /* get the plan_rows */ @@ -520,7 +524,6 @@ _readIndexScan() token = lsptok(NULL, &length); /* eat :indxorderdir */ token = lsptok(NULL, &length); /* get indxorderdir */ - local_node->indxorderdir = atoi(token); return local_node; @@ -1275,18 +1278,15 @@ _readRelOptInfo() token = lsptok(NULL, &length); /* get :pathlist */ local_node->pathlist = nodeRead(true); /* now read it */ - /* - * Not sure if these are nodes or not. They're declared as struct - * Path *. Since i don't know, i'll just print the addresses for now. - * This can be changed later, if necessary. - */ - - token = lsptok(NULL, &length); /* get :cheapestpath */ - token = lsptok(NULL, &length); /* get @ */ - token = lsptok(NULL, &length); /* now read it */ + token = lsptok(NULL, &length); /* get :cheapest_startup_path */ + local_node->cheapest_startup_path = nodeRead(true); /* now read it */ - sscanf(token, "%x", (unsigned int *) &local_node->cheapestpath); + token = lsptok(NULL, &length); /* get :cheapest_total_path */ + local_node->cheapest_total_path = nodeRead(true); /* now read it */ + token = lsptok(NULL, &length); /* eat :pruneable */ + token = lsptok(NULL, &length); /* get :pruneable */ + local_node->pruneable = (token[0] == 't') ? true : false; token = lsptok(NULL, &length); /* get :baserestrictinfo */ local_node->baserestrictinfo = nodeRead(true); /* now read it */ @@ -1322,29 +1322,6 @@ _readTargetEntry() return local_node; } -static List * -_readList() -{ - List *local_node = NULL; - char *token; - int length; - - token = lsptok(NULL, &length); /* eat "(" */ - token = lsptok(NULL, &length); /* get "{" */ - while (strncmp(token, "{", length) == 0) - { - nconc(local_node, nodeRead(true)); - - token = lsptok(NULL, &length); /* eat ")" */ - if (strncmp(token, "}", length) != 0) - elog(ERROR, "badly formatted attribute list" - " in planstring \"%.10s\"...\n", token); - token = lsptok(NULL, &length); /* "{" or ")" */ - } - - return local_node; -} - static Attr * _readAttr() { @@ -1356,13 +1333,10 @@ _readAttr() token = lsptok(NULL, &length); /* eat :relname */ token = lsptok(NULL, &length); /* get relname */ - if (length == 0) - local_node->relname = pstrdup(""); - else - local_node->relname = debackslash(token, length); + local_node->relname = debackslash(token, length); token = lsptok(NULL, &length); /* eat :attrs */ - local_node->attrs = _readList(); + local_node->attrs = nodeRead(true); /* now read it */ return local_node; } @@ -1388,7 +1362,7 @@ _readRangeTblEntry() local_node->relname = debackslash(token, length); token = lsptok(NULL, &length); /* eat :ref */ - local_node->ref = nodeRead(true); + local_node->ref = nodeRead(true); /* now read it */ token = lsptok(NULL, &length); /* eat :relid */ token = lsptok(NULL, &length); /* get :relid */ @@ -1450,9 +1424,13 @@ _readPath() token = lsptok(NULL, &length); /* now read it */ local_node->pathtype = atol(token); - token = lsptok(NULL, &length); /* get :cost */ + token = lsptok(NULL, &length); /* get :startup_cost */ + token = lsptok(NULL, &length); /* now read it */ + local_node->startup_cost = (Cost) atof(token); + + token = lsptok(NULL, &length); /* get :total_cost */ token = lsptok(NULL, &length); /* now read it */ - local_node->path_cost = (Cost) atof(token); + local_node->total_cost = (Cost) atof(token); token = lsptok(NULL, &length); /* get :pathkeys */ local_node->pathkeys = nodeRead(true); /* now read it */ @@ -1479,9 +1457,13 @@ _readIndexPath() token = lsptok(NULL, &length); /* now read it */ local_node->path.pathtype = atol(token); - token = lsptok(NULL, &length); /* get :cost */ + token = lsptok(NULL, &length); /* get :startup_cost */ token = lsptok(NULL, &length); /* now read it */ - local_node->path.path_cost = (Cost) atof(token); + local_node->path.startup_cost = (Cost) atof(token); + + token = lsptok(NULL, &length); /* get :total_cost */ + token = lsptok(NULL, &length); /* now read it */ + local_node->path.total_cost = (Cost) atof(token); token = lsptok(NULL, &length); /* get :pathkeys */ local_node->path.pathkeys = nodeRead(true); /* now read it */ @@ -1492,6 +1474,10 @@ _readIndexPath() token = lsptok(NULL, &length); /* get :indexqual */ local_node->indexqual = nodeRead(true); /* now read it */ + token = lsptok(NULL, &length); /* get :indexscandir */ + token = lsptok(NULL, &length); /* now read it */ + local_node->indexscandir = (ScanDirection) atoi(token); + token = lsptok(NULL, &length); /* get :joinrelids */ local_node->joinrelids = toIntList(nodeRead(true)); @@ -1517,9 +1503,13 @@ _readTidPath() token = lsptok(NULL, &length); /* now read it */ local_node->path.pathtype = atol(token); - token = lsptok(NULL, &length); /* get :cost */ + token = lsptok(NULL, &length); /* get :startup_cost */ token = lsptok(NULL, &length); /* now read it */ - local_node->path.path_cost = (Cost) atof(token); + local_node->path.startup_cost = (Cost) atof(token); + + token = lsptok(NULL, &length); /* get :total_cost */ + token = lsptok(NULL, &length); /* now read it */ + local_node->path.total_cost = (Cost) atof(token); token = lsptok(NULL, &length); /* get :pathkeys */ local_node->path.pathkeys = nodeRead(true); /* now read it */ @@ -1552,9 +1542,13 @@ _readNestPath() token = lsptok(NULL, &length); /* now read it */ local_node->path.pathtype = atol(token); - token = lsptok(NULL, &length); /* get :cost */ + token = lsptok(NULL, &length); /* get :startup_cost */ + token = lsptok(NULL, &length); /* now read it */ + local_node->path.startup_cost = (Cost) atof(token); + + token = lsptok(NULL, &length); /* get :total_cost */ token = lsptok(NULL, &length); /* now read it */ - local_node->path.path_cost = (Cost) atof(token); + local_node->path.total_cost = (Cost) atof(token); token = lsptok(NULL, &length); /* get :pathkeys */ local_node->path.pathkeys = nodeRead(true); /* now read it */ @@ -1588,13 +1582,15 @@ _readMergePath() token = lsptok(NULL, &length); /* get :pathtype */ token = lsptok(NULL, &length); /* now read it */ - local_node->jpath.path.pathtype = atol(token); - token = lsptok(NULL, &length); /* get :cost */ + token = lsptok(NULL, &length); /* get :startup_cost */ token = lsptok(NULL, &length); /* now read it */ + local_node->jpath.path.startup_cost = (Cost) atof(token); - local_node->jpath.path.path_cost = (Cost) atof(token); + token = lsptok(NULL, &length); /* get :total_cost */ + token = lsptok(NULL, &length); /* now read it */ + local_node->jpath.path.total_cost = (Cost) atof(token); token = lsptok(NULL, &length); /* get :pathkeys */ local_node->jpath.path.pathkeys = nodeRead(true); /* now read it */ @@ -1637,13 +1633,15 @@ _readHashPath() token = lsptok(NULL, &length); /* get :pathtype */ token = lsptok(NULL, &length); /* now read it */ - local_node->jpath.path.pathtype = atol(token); - token = lsptok(NULL, &length); /* get :cost */ + token = lsptok(NULL, &length); /* get :startup_cost */ token = lsptok(NULL, &length); /* now read it */ + local_node->jpath.path.startup_cost = (Cost) atof(token); - local_node->jpath.path.path_cost = (Cost) atof(token); + token = lsptok(NULL, &length); /* get :total_cost */ + token = lsptok(NULL, &length); /* now read it */ + local_node->jpath.path.total_cost = (Cost) atof(token); token = lsptok(NULL, &length); /* get :pathkeys */ local_node->jpath.path.pathkeys = nodeRead(true); /* now read it */ @@ -1886,14 +1884,6 @@ parsePlanString(void) return_value = _readCaseWhen(); else if (length == 7 && strncmp(token, "ROWMARK", length) == 0) return_value = _readRowMark(); -#if 0 - else if (length == 1 && strncmp(token, "{", length) == 0) - { - /* raw list (of strings?) found in Attr structure - thomas 2000-02-09 */ - return_value = nodeRead(true); - token = lsptok(NULL, &length); /* eat trailing brace */ - } -#endif else elog(ERROR, "badly formatted planstring \"%.10s\"...\n", token); diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index bbc1204395a8555fbe0f30b10b4fcf56f1386b2b..6ca70a91f1d3d931780a4b7028da85e0cdb64758 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -122,7 +122,7 @@ among other choices. Although the jointree scanning code produces these potential join combinations one at a time, all the ways to produce the same set of joined base rels will share the same RelOptInfo, so the paths produced from different join combinations that produce equivalent joinrels -will compete in add_pathlist. +will compete in add_path. Once we have built the final join rel, we use either the cheapest path for it or the cheapest path with the desired ordering (if that's cheaper diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 614ca47c84d5c8f37b99e65c301646469e47d5c9..1c70e4bcd8d274d591cafe67f96774912052a6e7 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_eval.c,v 1.47 2000/02/07 04:40:58 tgl Exp $ + * $Id: geqo_eval.c,v 1.48 2000/02/15 20:49:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -96,8 +96,13 @@ geqo_eval(Query *root, Gene *tour, int num_gene) /* construct the best path for the given combination of relations */ joinrel = gimme_tree(root, tour, 0, num_gene, NULL); - /* compute fitness */ - fitness = joinrel->cheapestpath->path_cost; + /* + * compute fitness + * + * XXX geqo does not currently support optimization for partial + * result retrieval --- how to fix? + */ + fitness = joinrel->cheapest_total_path->total_cost; /* restore join_rel_list */ root->join_rel_list = savelist; @@ -155,8 +160,8 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old rel_count++; Assert(length(new_rel->relids) == rel_count); - /* Find and save the cheapest path for this rel */ - set_cheapest(new_rel, new_rel->pathlist); + /* Find and save the cheapest paths for this rel */ + set_cheapest(new_rel); return gimme_tree(root, tour, rel_count, num_gene, new_rel); } diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c index 849c739f2ddd64c0b811289a460bf7bc929a44ff..01ced310e1b48d4dc9e2c8865670224cb5590b59 100644 --- a/src/backend/optimizer/geqo/geqo_misc.c +++ b/src/backend/optimizer/geqo/geqo_misc.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_misc.c,v 1.27 2000/02/07 04:40:58 tgl Exp $ + * $Id: geqo_misc.c,v 1.28 2000/02/15 20:49:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -179,8 +179,9 @@ geqo_print_path(Query *root, Path *path, int indent) if (join) { jp = (JoinPath *) path; - printf("%s rows=%.0f cost=%f\n", - ptype, path->parent->rows, path->path_cost); + printf("%s rows=%.0f cost=%.2f..%.2f\n", + ptype, path->parent->rows, + path->startup_cost, path->total_cost); switch (nodeTag(path)) { case T_MergePath: @@ -215,8 +216,9 @@ geqo_print_path(Query *root, Path *path, int indent) { int relid = lfirsti(path->parent->relids); - printf("%s(%d) rows=%.0f cost=%f\n", - ptype, relid, path->parent->rows, path->path_cost); + printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n", + ptype, relid, path->parent->rows, + path->startup_cost, path->total_cost); if (IsA(path, IndexPath)) { @@ -241,6 +243,9 @@ geqo_print_rel(Query *root, RelOptInfo *rel) foreach(l, rel->pathlist) geqo_print_path(root, lfirst(l), 1); - printf("\tcheapest path:\n"); - geqo_print_path(root, rel->cheapestpath, 1); + printf("\tcheapest startup path:\n"); + geqo_print_path(root, rel->cheapest_startup_path, 1); + + printf("\tcheapest total path:\n"); + geqo_print_path(root, rel->cheapest_total_path, 1); } diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 52c30f7d01dd3210fed2127b033a89dd2a04f3c3..572ef00d2e8f2f6b61c215edb25c675cd4d41c76 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.58 2000/02/07 04:40:59 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.59 2000/02/15 20:49:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -100,7 +100,7 @@ set_base_rel_pathlist(Query *root) /* * Generate paths and add them to the rel's pathlist. * - * add_path/add_pathlist will discard any paths that are dominated + * Note: add_path() will discard any paths that are dominated * by another available path, keeping only those paths that are * superior along at least one dimension of cost or sortedness. */ @@ -109,24 +109,21 @@ set_base_rel_pathlist(Query *root) add_path(rel, create_seqscan_path(rel)); /* Consider TID scans */ - add_pathlist(rel, create_tidscan_paths(root, rel)); + create_tidscan_paths(root, rel); /* Consider index paths for both simple and OR index clauses */ - add_pathlist(rel, create_index_paths(root, - rel, - indices, - rel->baserestrictinfo, - rel->joininfo)); + create_index_paths(root, rel, indices, + rel->baserestrictinfo, + rel->joininfo); /* Note: create_or_index_paths depends on create_index_paths * to have marked OR restriction clauses with relevant indices; - * this is why it doesn't need to be given the full list of indices. + * this is why it doesn't need to be given the list of indices. */ - add_pathlist(rel, create_or_index_paths(root, rel, - rel->baserestrictinfo)); + create_or_index_paths(root, rel, rel->baserestrictinfo); /* Now find the cheapest of the paths for this rel */ - set_cheapest(rel, rel->pathlist); + set_cheapest(rel); } } @@ -196,8 +193,8 @@ make_one_rel_by_joins(Query *root, int levels_needed) xfunc_trypullup(rel); #endif - /* Find and save the cheapest path for this rel */ - set_cheapest(rel, rel->pathlist); + /* Find and save the cheapest paths for this rel */ + set_cheapest(rel); #ifdef OPTIMIZER_DEBUG debug_print_rel(root, rel); @@ -279,15 +276,26 @@ print_path(Query *root, Path *path, int indent) if (join) { jp = (JoinPath *) path; - printf("%s rows=%.0f cost=%f\n", - ptype, path->parent->rows, path->path_cost); + + printf("%s rows=%.0f cost=%.2f..%.2f\n", + ptype, path->parent->rows, + path->startup_cost, path->total_cost); + + if (path->pathkeys) + { + for (i = 0; i < indent; i++) + printf("\t"); + printf(" pathkeys="); + print_pathkeys(path->pathkeys, root->rtable); + } + switch (nodeTag(path)) { case T_MergePath: case T_HashPath: - for (i = 0; i < indent + 1; i++) + for (i = 0; i < indent; i++) printf("\t"); - printf(" clauses=("); + printf(" clauses=("); print_joinclauses(root, jp->joinrestrictinfo); printf(")\n"); @@ -297,9 +305,9 @@ print_path(Query *root, Path *path, int indent) if (mp->outersortkeys || mp->innersortkeys) { - for (i = 0; i < indent + 1; i++) + for (i = 0; i < indent; i++) printf("\t"); - printf(" sortouter=%d sortinner=%d\n", + printf(" sortouter=%d sortinner=%d\n", ((mp->outersortkeys) ? 1 : 0), ((mp->innersortkeys) ? 1 : 0)); } @@ -315,11 +323,14 @@ print_path(Query *root, Path *path, int indent) { int relid = lfirsti(path->parent->relids); - printf("%s(%d) rows=%.0f cost=%f\n", - ptype, relid, path->parent->rows, path->path_cost); + printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n", + ptype, relid, path->parent->rows, + path->startup_cost, path->total_cost); - if (IsA(path, IndexPath)) + if (path->pathkeys) { + for (i = 0; i < indent; i++) + printf("\t"); printf(" pathkeys="); print_pathkeys(path->pathkeys, root->rtable); } @@ -339,8 +350,10 @@ debug_print_rel(Query *root, RelOptInfo *rel) printf("\tpath list:\n"); foreach(l, rel->pathlist) print_path(root, lfirst(l), 1); - printf("\tcheapest path:\n"); - print_path(root, rel->cheapestpath, 1); + printf("\tcheapest startup path:\n"); + print_path(root, rel->cheapest_startup_path, 1); + printf("\tcheapest total path:\n"); + print_path(root, rel->cheapest_total_path, 1); } #endif /* OPTIMIZER_DEBUG */ diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7c8d4b63c07f5e5bfd470a41abb8078dda962f9a..c14692d5b97edfe9bbe1ff69ba83a14a92b3f7ea 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3,23 +3,46 @@ * costsize.c * Routines to compute (and set) relation sizes and path costs * - * Path costs are measured in units of disk accesses: one page fetch - * has cost 1. The other primitive unit is the CPU time required to - * process one tuple, which we set at "cpu_page_weight" of a page - * fetch. Obviously, the CPU time per tuple depends on the query - * involved, but the relative CPU and disk speeds of a given platform - * are so variable that we are lucky if we can get useful numbers - * at all. cpu_page_weight is user-settable, in case a particular - * user is clueful enough to have a better-than-default estimate - * of the ratio for his platform. There is also cpu_index_page_weight, - * the cost to process a tuple of an index during an index scan. + * Path costs are measured in units of disk accesses: one sequential page + * fetch has cost 1. All else is scaled relative to a page fetch, using + * the scaling parameters + * + * random_page_cost Cost of a non-sequential page fetch + * cpu_tuple_cost Cost of typical CPU time to process a tuple + * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple + * cpu_operator_cost Cost of CPU time to process a typical WHERE operator + * + * We also use a rough estimate "effective_cache_size" of the number of + * disk pages in Postgres + OS-level disk cache. (We can't simply use + * NBuffers for this purpose because that would ignore the effects of + * the kernel's disk cache.) + * + * Obviously, taking constants for these values is an oversimplification, + * but it's tough enough to get any useful estimates even at this level of + * detail. Note that all of these parameters are user-settable, in case + * the default values are drastically off for a particular platform. + * + * We compute two separate costs for each path: + * total_cost: total estimated cost to fetch all tuples + * startup_cost: cost that is expended before first tuple is fetched + * In some scenarios, such as when there is a LIMIT or we are implementing + * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the + * path's result. A caller can estimate the cost of fetching a partial + * result by interpolating between startup_cost and total_cost. In detail: + * actual_cost = startup_cost + + * (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows; + * Note that a relation's rows count (and, by extension, a Plan's plan_rows) + * are set without regard to any LIMIT, so that this equation works properly. + * (Also, these routines guarantee not to set the rows count to zero, so there + * will be no zero divide.) RelOptInfos, Paths, and Plans themselves never + * account for LIMIT. * * * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.51 2000/02/07 04:40:59 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.52 2000/02/15 20:49:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,26 +50,25 @@ #include "postgres.h" #include <math.h> -#ifdef HAVE_LIMITS_H -#include <limits.h> -#ifndef MAXINT -#define MAXINT INT_MAX -#endif -#else -#ifdef HAVE_VALUES_H -#include <values.h> -#endif -#endif #include "miscadmin.h" +#include "nodes/plannodes.h" +#include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/internal.h" #include "optimizer/tlist.h" #include "utils/lsyscache.h" -Cost cpu_page_weight = CPU_PAGE_WEIGHT; -Cost cpu_index_page_weight = CPU_INDEX_PAGE_WEIGHT; +#define LOG2(x) (log(x) / 0.693147180559945) +#define LOG6(x) (log(x) / 1.79175946922805) + + +double effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE; +Cost random_page_cost = DEFAULT_RANDOM_PAGE_COST; +Cost cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST; +Cost cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST; +Cost cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST; Cost disable_cost = 100000000.0; @@ -59,53 +81,114 @@ bool enable_mergejoin = true; bool enable_hashjoin = true; +static bool cost_qual_eval_walker(Node *node, Cost *total); static void set_rel_width(Query *root, RelOptInfo *rel); static int compute_attribute_width(TargetEntry *tlistentry); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); -static double base_log(double x, double b); /* * cost_seqscan * Determines and returns the cost of scanning a relation sequentially. - * If the relation is a temporary to be materialized from a query - * embedded within a data field (determined by 'relid' containing an - * attribute reference), then a predetermined constant is returned (we - * have NO IDEA how big the result of a POSTQUEL procedure is going to - * be). - * - * disk = p - * cpu = CPU-PAGE-WEIGHT * t + * + * If the relation is a temporary to be materialized from a query + * embedded within a data field (determined by 'relid' containing an + * attribute reference), then a predetermined constant is returned (we + * have NO IDEA how big the result of a POSTQUEL procedure is going to be). + * + * Note: for historical reasons, this routine and the others in this module + * use the passed result Path only to store their startup_cost and total_cost + * results into. All the input data they need is passed as separate + * parameters, even though much of it could be extracted from the result Path. */ -Cost -cost_seqscan(RelOptInfo *baserel) +void +cost_seqscan(Path *path, RelOptInfo *baserel) { - Cost temp = 0; + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_per_tuple; /* Should only be applied to base relations */ Assert(length(baserel->relids) == 1); if (!enable_seqscan) - temp += disable_cost; + startup_cost += disable_cost; + /* disk costs */ if (lfirsti(baserel->relids) < 0) { /* * cost of sequentially scanning a materialized temporary relation */ - temp += _NONAME_SCAN_COST_; + run_cost += _NONAME_SCAN_COST_; } else { - temp += baserel->pages; - temp += cpu_page_weight * baserel->tuples; + /* + * The cost of reading a page sequentially is 1.0, by definition. + * Note that the Unix kernel will typically do some amount of + * read-ahead optimization, so that this cost is less than the true + * cost of reading a page from disk. We ignore that issue here, + * but must take it into account when estimating the cost of + * non-sequential accesses! + */ + run_cost += baserel->pages; /* sequential fetches with cost 1.0 */ } - Assert(temp >= 0); - return temp; + /* CPU costs */ + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; + run_cost += cpu_per_tuple * baserel->tuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; } +/* + * cost_nonsequential_access + * Estimate the cost of accessing one page at random from a relation + * (or sort temp file) of the given size in pages. + * + * The simplistic model that the cost is random_page_cost is what we want + * to use for large relations; but for small ones that is a serious + * overestimate because of the effects of caching. This routine tries to + * account for that. + * + * Unfortunately we don't have any good way of estimating the effective cache + * size we are working with --- we know that Postgres itself has NBuffers + * internal buffers, but the size of the kernel's disk cache is uncertain, + * and how much of it we get to use is even less certain. We punt the problem + * for now by assuming we are given an effective_cache_size parameter. + * + * Given a guesstimated cache size, we estimate the actual I/O cost per page + * with the entirely ad-hoc equations: + * for rel_size <= effective_cache_size: + * 1 + (random_page_cost/2-1) * (rel_size/effective_cache_size) ** 2 + * for rel_size >= effective_cache_size: + * random_page_cost * (1 - (effective_cache_size/rel_size)/2) + * These give the right asymptotic behavior (=> 1.0 as rel_size becomes + * small, => random_page_cost as it becomes large) and meet in the middle + * with the estimate that the cache is about 50% effective for a relation + * of the same size as effective_cache_size. (XXX this is probably all + * wrong, but I haven't been able to find any theory about how effective + * a disk cache should be presumed to be.) + */ +static Cost +cost_nonsequential_access(double relpages) +{ + double relsize; + + /* don't crash on bad input data */ + if (relpages <= 0.0 || effective_cache_size <= 0.0) + return random_page_cost; + + relsize = relpages / effective_cache_size; + + if (relsize >= 1.0) + return random_page_cost * (1.0 - 0.5 / relsize); + else + return 1.0 + (random_page_cost * 0.5 - 1.0) * relsize * relsize; +} /* * cost_index @@ -126,25 +209,28 @@ cost_seqscan(RelOptInfo *baserel) * tuples, but they won't reduce the number of tuples we have to fetch from * the table, so they don't reduce the scan cost. */ -Cost -cost_index(Query *root, +void +cost_index(Path *path, Query *root, RelOptInfo *baserel, IndexOptInfo *index, List *indexQuals, bool is_injoin) { - Cost temp = 0; - Cost indexAccessCost; + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_per_tuple; + Cost indexStartupCost; + Cost indexTotalCost; Selectivity indexSelectivity; - double reltuples; - double relpages; + double tuples_fetched; + double pages_fetched; /* Should only be applied to base relations */ Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo)); Assert(length(baserel->relids) == 1); if (!enable_indexscan && !is_injoin) - temp += disable_cost; + startup_cost += disable_cost; /* * Call index-access-method-specific code to estimate the processing @@ -152,31 +238,21 @@ cost_index(Query *root, * (ie, the fraction of main-table tuples we will have to retrieve). */ fmgr(index->amcostestimate, root, baserel, index, indexQuals, - &indexAccessCost, &indexSelectivity); + &indexStartupCost, &indexTotalCost, &indexSelectivity); /* all costs for touching index itself included here */ - temp += indexAccessCost; + startup_cost += indexStartupCost; + run_cost += indexTotalCost - indexStartupCost; - /*-------------------- - * Estimate number of main-table tuples and pages touched. - * - * Worst case is that each tuple the index tells us to fetch comes - * from a different base-rel page, in which case the I/O cost would be - * 'reltuples' pages. In practice we can expect the number of page - * fetches to be reduced by the buffer cache, because more than one - * tuple can be retrieved per page fetched. Currently, we estimate - * the number of pages to be retrieved as - * MIN(reltuples, relpages) - * This amounts to assuming that the buffer cache is perfectly efficient - * and never ends up reading the same page twice within one scan, which - * of course is too optimistic. On the other hand, we are assuming that - * the target tuples are perfectly uniformly distributed across the - * relation's pages, which is too pessimistic --- any nonuniformity of - * distribution will reduce the number of pages we have to fetch. - * So, we guess-and-hope that these sources of error will more or less - * balance out. + /* + * Estimate number of main-table tuples and pages fetched. * - * XXX need to add a penalty for nonsequential page fetches. + * If the number of tuples is much smaller than the number of pages in + * the relation, each tuple will cost a separate nonsequential fetch. + * If it is comparable or larger, then probably we will be able to avoid + * some fetches. We use a growth rate of log(#tuples/#pages + 1) --- + * probably totally bogus, but intuitively it gives the right shape of + * curve at least. * * XXX if the relation has recently been "clustered" using this index, * then in fact the target tuples will be highly nonuniformly distributed, @@ -184,54 +260,77 @@ cost_index(Query *root, * have no way to know whether the relation has been clustered, nor how * much it's been modified since the last clustering, so we ignore this * effect. Would be nice to do better someday. - *-------------------- */ - reltuples = indexSelectivity * baserel->tuples; + tuples_fetched = indexSelectivity * baserel->tuples; - relpages = reltuples; - if (baserel->pages > 0 && baserel->pages < relpages) - relpages = baserel->pages; + if (tuples_fetched > 0 && baserel->pages > 0) + pages_fetched = baserel->pages * + log(tuples_fetched / baserel->pages + 1.0); + else + pages_fetched = tuples_fetched; + + /* + * Now estimate one nonsequential access per page fetched, + * plus appropriate CPU costs per tuple. + */ /* disk costs for main table */ - temp += relpages; + run_cost += pages_fetched * cost_nonsequential_access(baserel->pages); - /* CPU costs for heap tuples */ - temp += cpu_page_weight * reltuples; + /* CPU costs */ + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; + /* + * Assume that the indexquals will be removed from the list of + * restriction clauses that we actually have to evaluate as qpquals. + * This is not completely right, but it's close. + * For a lossy index, however, we will have to recheck all the quals. + */ + if (! index->lossy) + cpu_per_tuple -= cost_qual_eval(indexQuals); - Assert(temp >= 0); - return temp; + run_cost += cpu_per_tuple * tuples_fetched; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; } /* * cost_tidscan * Determines and returns the cost of scanning a relation using tid-s. - * - * disk = number of tids - * cpu = CPU-PAGE-WEIGHT * number_of_tids */ -Cost -cost_tidscan(RelOptInfo *baserel, List *tideval) +void +cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval) { - Cost temp = 0; + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_per_tuple; + int ntuples = length(tideval); if (!enable_tidscan) - temp += disable_cost; + startup_cost += disable_cost; - temp += (1.0 + cpu_page_weight) * length(tideval); + /* disk costs --- assume each tuple on a different page */ + run_cost += random_page_cost * ntuples; - return temp; + /* CPU costs */ + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; + run_cost += cpu_per_tuple * ntuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; } /* * cost_sort * Determines and returns the cost of sorting a relation. * + * The cost of supplying the input data is NOT included; the caller should + * add that cost to both startup and total costs returned from this routine! + * * If the total volume of data to sort is less than SortMem, we will do * an in-memory sort, which requires no I/O and about t*log2(t) tuple - * comparisons for t tuples. We use cpu_index_page_weight as the cost - * of a tuple comparison (is this reasonable, or do we need another - * basic parameter?). + * comparisons for t tuples. * * If the total volume exceeds SortMem, we switch to a tape-style merge * algorithm. There will still be about t*log2(t) tuple comparisons in @@ -240,8 +339,14 @@ cost_tidscan(RelOptInfo *baserel, List *tideval) * number of initial runs formed (log6 because tuplesort.c uses six-tape * merging). Since the average initial run should be about twice SortMem, * we have - * disk = 2 * p * ceil(log6(p / (2*SortMem))) - * cpu = CPU-INDEX-PAGE-WEIGHT * t * log2(t) + * disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem))) + * cpu = comparison_cost * t * log2(t) + * + * The disk traffic is assumed to be half sequential and half random + * accesses (XXX can't we refine that guess?) + * + * We charge two operator evals per tuple comparison, which should be in + * the right ballpark in most cases. * * 'pathkeys' is a list of sort keys * 'tuples' is the number of tuples in the relation @@ -252,15 +357,16 @@ cost_tidscan(RelOptInfo *baserel, List *tideval) * currently do anything with pathkeys anyway, that doesn't matter... * but if it ever does, it should react gracefully to lack of key data. */ -Cost -cost_sort(List *pathkeys, double tuples, int width) +void +cost_sort(Path *path, List *pathkeys, double tuples, int width) { - Cost temp = 0; + Cost startup_cost = 0; + Cost run_cost = 0; double nbytes = relation_byte_size(tuples, width); long sortmembytes = SortMem * 1024L; if (!enable_sort) - temp += disable_cost; + startup_cost += disable_cost; /* * We want to be sure the cost of a sort is never estimated as zero, @@ -270,42 +376,39 @@ cost_sort(List *pathkeys, double tuples, int width) if (tuples < 2.0) tuples = 2.0; - temp += cpu_index_page_weight * tuples * base_log(tuples, 2.0); + /* + * CPU costs + * + * Assume about two operator evals per tuple comparison + * and N log2 N comparisons + */ + startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples); + /* disk costs */ if (nbytes > sortmembytes) { double npages = ceil(nbytes / BLCKSZ); double nruns = nbytes / (sortmembytes * 2); - double log_runs = ceil(base_log(nruns, 6.0)); + double log_runs = ceil(LOG6(nruns)); + double npageaccesses; if (log_runs < 1.0) log_runs = 1.0; - temp += 2 * npages * log_runs; + npageaccesses = 2.0 * npages * log_runs; + /* Assume half are sequential (cost 1), half are not */ + startup_cost += npageaccesses * + (1.0 + cost_nonsequential_access(npages)) * 0.5; } - Assert(temp > 0); - return temp; -} - - -/* - * cost_result - * Determines and returns the cost of writing a relation of 'tuples' - * tuples of 'width' bytes out to a result relation. - */ -#ifdef NOT_USED -Cost -cost_result(double tuples, int width) -{ - Cost temp = 0; - - temp += page_size(tuples, width); - temp += cpu_page_weight * tuples; - Assert(temp >= 0); - return temp; + /* + * Note: should we bother to assign a nonzero run_cost to reflect the + * overhead of extracting tuples from the sort result? Probably not + * worth worrying about. + */ + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; } -#endif /* * cost_nestloop @@ -314,23 +417,45 @@ cost_result(double tuples, int width) * * 'outer_path' is the path for the outer relation * 'inner_path' is the path for the inner relation + * 'restrictlist' are the RestrictInfo nodes to be applied at the join * 'is_indexjoin' is true if we are using an indexscan for the inner relation + * (not currently needed here; the indexscan adjusts its cost...) */ -Cost -cost_nestloop(Path *outer_path, +void +cost_nestloop(Path *path, + Path *outer_path, Path *inner_path, + List *restrictlist, bool is_indexjoin) { - Cost temp = 0; + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_per_tuple; + double ntuples; if (!enable_nestloop) - temp += disable_cost; + startup_cost += disable_cost; + + /* cost of source data */ + /* + * NOTE: we assume that the inner path's startup_cost is paid once, not + * over again on each restart. This is certainly correct if the inner + * path is materialized. Are there any cases where it is wrong? + */ + startup_cost += outer_path->startup_cost + inner_path->startup_cost; + run_cost += outer_path->total_cost - outer_path->startup_cost; + run_cost += outer_path->parent->rows * + (inner_path->total_cost - inner_path->startup_cost); - temp += outer_path->path_cost; - temp += outer_path->parent->rows * inner_path->path_cost; + /* number of tuples processed (not number emitted!) */ + ntuples = outer_path->parent->rows * inner_path->parent->rows; - Assert(temp >= 0); - return temp; + /* CPU costs */ + cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); + run_cost += cpu_per_tuple * ntuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; } /* @@ -340,33 +465,66 @@ cost_nestloop(Path *outer_path, * * 'outer_path' is the path for the outer relation * 'inner_path' is the path for the inner relation + * 'restrictlist' are the RestrictInfo nodes to be applied at the join * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used * to sort the outer and inner relations, or NIL if no explicit * sort is needed because the source path is already ordered */ -Cost -cost_mergejoin(Path *outer_path, +void +cost_mergejoin(Path *path, + Path *outer_path, Path *inner_path, + List *restrictlist, List *outersortkeys, List *innersortkeys) { - Cost temp = 0; + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_per_tuple; + double ntuples; + Path sort_path; /* dummy for result of cost_sort */ if (!enable_mergejoin) - temp += disable_cost; + startup_cost += disable_cost; /* cost of source data */ - temp += outer_path->path_cost + inner_path->path_cost; - - if (outersortkeys) /* do we need to sort? */ - temp += cost_sort(outersortkeys, - outer_path->parent->rows, - outer_path->parent->width); + /* + * Note we are assuming that each source tuple is fetched just once, + * which is not right in the presence of equal keys. If we had a way of + * estimating the proportion of equal keys, we could apply a correction + * factor... + */ + if (outersortkeys) /* do we need to sort outer? */ + { + startup_cost += outer_path->total_cost; + cost_sort(&sort_path, + outersortkeys, + outer_path->parent->rows, + outer_path->parent->width); + startup_cost += sort_path.startup_cost; + run_cost += sort_path.total_cost - sort_path.startup_cost; + } + else + { + startup_cost += outer_path->startup_cost; + run_cost += outer_path->total_cost - outer_path->startup_cost; + } - if (innersortkeys) /* do we need to sort? */ - temp += cost_sort(innersortkeys, - inner_path->parent->rows, - inner_path->parent->width); + if (innersortkeys) /* do we need to sort inner? */ + { + startup_cost += inner_path->total_cost; + cost_sort(&sort_path, + innersortkeys, + inner_path->parent->rows, + inner_path->parent->width); + startup_cost += sort_path.startup_cost; + run_cost += sort_path.total_cost - sort_path.startup_cost; + } + else + { + startup_cost += inner_path->startup_cost; + run_cost += inner_path->total_cost - inner_path->startup_cost; + } /* * Estimate the number of tuples to be processed in the mergejoin itself @@ -374,11 +532,14 @@ cost_mergejoin(Path *outer_path, * underestimate if there are many equal-keyed tuples in either relation, * but we have no good way of estimating that... */ - temp += cpu_page_weight * (outer_path->parent->rows + - inner_path->parent->rows); + ntuples = outer_path->parent->rows + inner_path->parent->rows; - Assert(temp >= 0); - return temp; + /* CPU costs */ + cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); + run_cost += cpu_per_tuple * ntuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; } /* @@ -388,15 +549,21 @@ cost_mergejoin(Path *outer_path, * * 'outer_path' is the path for the outer relation * 'inner_path' is the path for the inner relation + * 'restrictlist' are the RestrictInfo nodes to be applied at the join * 'innerdisbursion' is an estimate of the disbursion statistic * for the inner hash key. */ -Cost -cost_hashjoin(Path *outer_path, +void +cost_hashjoin(Path *path, + Path *outer_path, Path *inner_path, + List *restrictlist, Selectivity innerdisbursion) { - Cost temp = 0; + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_per_tuple; + double ntuples; double outerbytes = relation_byte_size(outer_path->parent->rows, outer_path->parent->width); double innerbytes = relation_byte_size(inner_path->parent->rows, @@ -404,48 +571,169 @@ cost_hashjoin(Path *outer_path, long hashtablebytes = SortMem * 1024L; if (!enable_hashjoin) - temp += disable_cost; + startup_cost += disable_cost; /* cost of source data */ - temp += outer_path->path_cost + inner_path->path_cost; + startup_cost += outer_path->startup_cost; + run_cost += outer_path->total_cost - outer_path->startup_cost; + startup_cost += inner_path->total_cost; - /* cost of computing hash function: must do it once per tuple */ - temp += cpu_page_weight * (outer_path->parent->rows + - inner_path->parent->rows); + /* cost of computing hash function: must do it once per input tuple */ + startup_cost += cpu_operator_cost * inner_path->parent->rows; + run_cost += cpu_operator_cost * outer_path->parent->rows; /* the number of tuple comparisons needed is the number of outer * tuples times the typical hash bucket size, which we estimate - * conservatively as the inner disbursion times the inner tuple - * count. The cost per comparison is set at cpu_index_page_weight; - * is that reasonable, or do we need another basic parameter? + * conservatively as the inner disbursion times the inner tuple count. */ - temp += cpu_index_page_weight * outer_path->parent->rows * + run_cost += cpu_operator_cost * outer_path->parent->rows * (inner_path->parent->rows * innerdisbursion); + /* + * Estimate the number of tuples that get through the hashing filter + * as one per tuple in the two source relations. This could be a drastic + * underestimate if there are many equal-keyed tuples in either relation, + * but we have no good way of estimating that... + */ + ntuples = outer_path->parent->rows + inner_path->parent->rows; + + /* CPU costs */ + cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); + run_cost += cpu_per_tuple * ntuples; + /* * if inner relation is too big then we will need to "batch" the join, * which implies writing and reading most of the tuples to disk an - * extra time. Charge one cost unit per page of I/O. + * extra time. Charge one cost unit per page of I/O (correct since + * it should be nice and sequential...). Writing the inner rel counts + * as startup cost, all the rest as run cost. */ if (innerbytes > hashtablebytes) - temp += 2 * (page_size(outer_path->parent->rows, - outer_path->parent->width) + - page_size(inner_path->parent->rows, - inner_path->parent->width)); + { + double outerpages = page_size(outer_path->parent->rows, + outer_path->parent->width); + double innerpages = page_size(inner_path->parent->rows, + inner_path->parent->width); + + startup_cost += innerpages; + run_cost += innerpages + 2 * outerpages; + } /* * Bias against putting larger relation on inside. We don't want * an absolute prohibition, though, since larger relation might have * better disbursion --- and we can't trust the size estimates - * unreservedly, anyway. + * unreservedly, anyway. Instead, inflate the startup cost by + * the square root of the size ratio. (Why square root? No real good + * reason, but it seems reasonable...) + */ + if (innerbytes > outerbytes && outerbytes > 0) + { + startup_cost *= sqrt(innerbytes / outerbytes); + } + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + + +/* + * cost_qual_eval + * Estimate the CPU cost of evaluating a WHERE clause (once). + * The input can be either an implicitly-ANDed list of boolean + * expressions, or a list of RestrictInfo nodes. + */ +Cost +cost_qual_eval(List *quals) +{ + Cost total = 0; + + cost_qual_eval_walker((Node *) quals, &total); + return total; +} + +static bool +cost_qual_eval_walker(Node *node, Cost *total) +{ + if (node == NULL) + return false; + /* + * Our basic strategy is to charge one cpu_operator_cost for each + * operator or function node in the given tree. Vars and Consts + * are charged zero, and so are boolean operators (AND, OR, NOT). + * Simplistic, but a lot better than no model at all. + * + * Should we try to account for the possibility of short-circuit + * evaluation of AND/OR? */ - if (innerbytes > outerbytes) - temp *= 1.1; /* is this an OK fudge factor? */ + if (IsA(node, Expr)) + { + Expr *expr = (Expr *) node; + + switch (expr->opType) + { + case OP_EXPR: + case FUNC_EXPR: + *total += cpu_operator_cost; + break; + case OR_EXPR: + case AND_EXPR: + case NOT_EXPR: + break; + case SUBPLAN_EXPR: + /* + * A subplan node in an expression indicates that the subplan + * will be executed on each evaluation, so charge accordingly. + * (We assume that sub-selects that can be executed as + * InitPlans have already been removed from the expression.) + * + * NOTE: this logic should agree with make_subplan in + * subselect.c. + */ + { + SubPlan *subplan = (SubPlan *) expr->oper; + Plan *plan = subplan->plan; + Cost subcost; + + if (subplan->sublink->subLinkType == EXISTS_SUBLINK) + { + /* we only need to fetch 1 tuple */ + subcost = plan->startup_cost + + (plan->total_cost - plan->startup_cost) / plan->plan_rows; + } + else if (subplan->sublink->subLinkType == EXPR_SUBLINK) + { + /* assume we need all tuples */ + subcost = plan->total_cost; + } + else + { + /* assume we need 50% of the tuples */ + subcost = plan->startup_cost + + 0.50 * (plan->total_cost - plan->startup_cost); + } + *total += subcost; + } + break; + } + /* fall through to examine args of Expr node */ + } + /* + * expression_tree_walker doesn't know what to do with RestrictInfo nodes, + * but we just want to recurse through them. + */ + if (IsA(node, RestrictInfo)) + { + RestrictInfo *restrictinfo = (RestrictInfo *) node; - Assert(temp >= 0); - return temp; + return cost_qual_eval_walker((Node *) restrictinfo->clause, total); + } + /* Otherwise, recurse. */ + return expression_tree_walker(node, cost_qual_eval_walker, + (void *) total); } + /* * set_baserel_size_estimates * Set the size estimates for the given base relation. @@ -457,6 +745,7 @@ cost_hashjoin(Path *outer_path, * rows: the estimated number of output tuples (after applying * restriction clauses). * width: the estimated average output tuple width in bytes. + * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses. */ void set_baserel_size_estimates(Query *root, RelOptInfo *rel) @@ -468,7 +757,14 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel) restrictlist_selectivity(root, rel->baserestrictinfo, lfirsti(rel->relids)); - Assert(rel->rows >= 0); + /* + * Force estimate to be at least one row, to make explain output look + * better and to avoid possible divide-by-zero when interpolating cost. + */ + if (rel->rows < 1.0) + rel->rows = 1.0; + + rel->baserestrictcost = cost_qual_eval(rel->baserestrictinfo); set_rel_width(root, rel); } @@ -513,7 +809,12 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, restrictlist, 0); - Assert(temp >= 0); + /* + * Force estimate to be at least one row, to make explain output look + * better and to avoid possible divide-by-zero when interpolating cost. + */ + if (temp < 1.0) + temp = 1.0; rel->rows = temp; /* @@ -582,9 +883,3 @@ page_size(double tuples, int width) { return ceil(relation_byte_size(tuples, width) / BLCKSZ); } - -static double -base_log(double x, double b) -{ - return log(x) / log(b); -} diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 4c2b0109bc02421b1b64b59f3d2b3ba2f2315818..edb16ce0d6d26620d9cfc9cba5393ad31f51b341 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.79 2000/02/05 18:26:09 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.80 2000/02/15 20:49:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -83,7 +83,8 @@ static List *index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, static bool useful_for_mergejoin(RelOptInfo *rel, IndexOptInfo *index, List *joininfo_list); static bool useful_for_ordering(Query *root, RelOptInfo *rel, - IndexOptInfo *index); + IndexOptInfo *index, + ScanDirection scandir); static bool match_index_to_operand(int indexkey, Var *operand, RelOptInfo *rel, IndexOptInfo *index); static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel, @@ -106,6 +107,8 @@ static bool string_lessthan(const char * str1, const char * str2, /* * create_index_paths() * Generate all interesting index paths for the given relation. + * Candidate paths are added to the rel's pathlist (using add_path). + * Additional IndexPath nodes may also be added to rel's innerjoin list. * * To be considered for an index scan, an index must match one or more * restriction clauses or join clauses from the query's qual condition, @@ -120,29 +123,26 @@ static bool string_lessthan(const char * str1, const char * str2, * in its join clauses. In that context, values for the other rels' * attributes are available and fixed during any one scan of the indexpath. * - * This routine's return value is a list of plain IndexPaths for each - * index the routine deems potentially interesting for the current query + * An IndexPath is generated and submitted to add_path() for each index + * this routine deems potentially interesting for the current query * (at most one IndexPath per index on the given relation). An innerjoin * path is also generated for each interesting combination of outer join - * relations. The innerjoin paths are *not* in the return list, but are - * appended to the "innerjoin" list of the relation itself. + * relations. The innerjoin paths are *not* passed to add_path(), but are + * appended to the "innerjoin" list of the relation for later consideration + * in nested-loop joins. * * 'rel' is the relation for which we want to generate index paths * 'indices' is a list of available indexes for 'rel' * 'restrictinfo_list' is a list of restrictinfo nodes for 'rel' * 'joininfo_list' is a list of joininfo nodes for 'rel' - * - * Returns a list of IndexPath access path descriptors. Additional - * IndexPath nodes may also be added to the rel->innerjoin list. */ -List * +void create_index_paths(Query *root, RelOptInfo *rel, List *indices, List *restrictinfo_list, List *joininfo_list) { - List *retval = NIL; List *ilist; foreach(ilist, indices) @@ -189,9 +189,9 @@ create_index_paths(Query *root, restrictinfo_list); if (restrictclauses != NIL) - retval = lappend(retval, - create_index_path(root, rel, index, - restrictclauses)); + add_path(rel, (Path *) create_index_path(root, rel, index, + restrictclauses, + NoMovementScanDirection)); /* * 3. If this index can be used for a mergejoin, then create an @@ -205,10 +205,22 @@ create_index_paths(Query *root, if (restrictclauses == NIL) { if (useful_for_mergejoin(rel, index, joininfo_list) || - useful_for_ordering(root, rel, index)) - retval = lappend(retval, - create_index_path(root, rel, index, NIL)); + useful_for_ordering(root, rel, index, ForwardScanDirection)) + add_path(rel, (Path *) + create_index_path(root, rel, index, + NIL, + ForwardScanDirection)); } + /* + * Currently, backwards scan is never considered except for the case + * of matching a query result ordering. Possibly should consider + * it in other places? + */ + if (useful_for_ordering(root, rel, index, BackwardScanDirection)) + add_path(rel, (Path *) + create_index_path(root, rel, index, + NIL, + BackwardScanDirection)); /* * 4. Create an innerjoin index path for each combination of @@ -231,8 +243,6 @@ create_index_paths(Query *root, joinouterrelids)); } } - - return retval; } @@ -892,39 +902,26 @@ useful_for_mergejoin(RelOptInfo *rel, * Determine whether the given index can produce an ordering matching * the order that is wanted for the query result. * - * We check to see whether either forward or backward scan direction can - * match the specified pathkeys. - * * 'rel' is the relation for which 'index' is defined + * 'scandir' is the contemplated scan direction */ static bool useful_for_ordering(Query *root, RelOptInfo *rel, - IndexOptInfo *index) + IndexOptInfo *index, + ScanDirection scandir) { List *index_pathkeys; if (root->query_pathkeys == NIL) return false; /* no special ordering requested */ - index_pathkeys = build_index_pathkeys(root, rel, index); + index_pathkeys = build_index_pathkeys(root, rel, index, scandir); if (index_pathkeys == NIL) return false; /* unordered index */ - if (pathkeys_contained_in(root->query_pathkeys, index_pathkeys)) - return true; - - /* caution: commute_pathkeys destructively modifies its argument; - * safe because we just built the index_pathkeys for local use here. - */ - if (commute_pathkeys(index_pathkeys)) - { - if (pathkeys_contained_in(root->query_pathkeys, index_pathkeys)) - return true; /* useful as a reverse-order path */ - } - - return false; + return pathkeys_contained_in(root->query_pathkeys, index_pathkeys); } /**************************************************************************** @@ -1433,7 +1430,12 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, pathnode->path.pathtype = T_IndexScan; pathnode->path.parent = rel; - pathnode->path.pathkeys = build_index_pathkeys(root, rel, index); + /* + * There's no point in marking the path with any pathkeys, since + * it will only ever be used as the inner path of a nestloop, + * and so its ordering does not matter. + */ + pathnode->path.pathkeys = NIL; indexquals = get_actual_clauses(clausegroup); /* expand special operators to indexquals the executor can handle */ @@ -1446,11 +1448,13 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, pathnode->indexid = lconsi(index->indexoid, NIL); pathnode->indexqual = lcons(indexquals, NIL); + /* We don't actually care what order the index scans in ... */ + pathnode->indexscandir = NoMovementScanDirection; + /* joinrelids saves the rels needed on the outer side of the join */ pathnode->joinrelids = lfirst(outerrelids_list); - pathnode->path.path_cost = cost_index(root, rel, index, indexquals, - true); + cost_index(&pathnode->path, root, rel, index, indexquals, true); path_list = lappend(path_list, pathnode); outerrelids_list = lnext(outerrelids_list); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index f8912a1a5477b3e48131da8e74f871b3f8eb6cbc..091e2e40c7922a9e249f234c7de2e9077b5d6aac 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.51 2000/02/07 04:40:59 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.52 2000/02/15 20:49:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,24 +27,21 @@ #include "parser/parsetree.h" #include "utils/lsyscache.h" +static void sort_inner_and_outer(Query *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *mergeclause_list); +static void match_unsorted_outer(Query *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *mergeclause_list); +#ifdef NOT_USED +static void match_unsorted_inner(Query *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *mergeclause_list); +#endif +static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist); static Path *best_innerjoin(List *join_paths, List *outer_relid); -static List *sort_inner_and_outer(RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - List *mergeclause_list); -static List *match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, - RelOptInfo *innerrel, List *restrictlist, - List *outerpath_list, Path *cheapest_inner, - Path *best_innerjoin, - List *mergeclause_list); -static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel, - RelOptInfo *innerrel, List *restrictlist, - List *innerpath_list, - List *mergeclause_list); -static List *hash_inner_and_outer(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist); static Selectivity estimate_disbursion(Query *root, Var *var); static List *select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *outerrel, @@ -70,14 +67,8 @@ add_paths_to_joinrel(Query *root, RelOptInfo *innerrel, List *restrictlist) { - Path *bestinnerjoin; List *mergeclause_list = NIL; - /* - * Get the best inner join for match_unsorted_outer(). - */ - bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids); - /* * Find potential mergejoin clauses. */ @@ -91,84 +82,41 @@ add_paths_to_joinrel(Query *root, * 1. Consider mergejoin paths where both relations must be * explicitly sorted. */ - add_pathlist(joinrel, sort_inner_and_outer(joinrel, - outerrel, - innerrel, - restrictlist, - mergeclause_list)); + sort_inner_and_outer(root, joinrel, outerrel, innerrel, + restrictlist, mergeclause_list); /* * 2. Consider paths where the outer relation need not be * explicitly sorted. This includes both nestloops and * mergejoins where the outer path is already ordered. */ - add_pathlist(joinrel, match_unsorted_outer(joinrel, - outerrel, - innerrel, - restrictlist, - outerrel->pathlist, - innerrel->cheapestpath, - bestinnerjoin, - mergeclause_list)); + match_unsorted_outer(root, joinrel, outerrel, innerrel, + restrictlist, mergeclause_list); +#ifdef NOT_USED /* * 3. Consider paths where the inner relation need not be * explicitly sorted. This includes mergejoins only * (nestloops were already built in match_unsorted_outer). + * + * Diked out as redundant 2/13/2000 -- tgl. There isn't any + * really significant difference between the inner and outer + * side of a mergejoin, so match_unsorted_inner creates no paths + * that aren't equivalent to those made by match_unsorted_outer + * when add_paths_to_joinrel() is invoked with the two rels given + * in the other order. */ - add_pathlist(joinrel, match_unsorted_inner(joinrel, - outerrel, - innerrel, - restrictlist, - innerrel->pathlist, - mergeclause_list)); + match_unsorted_inner(root, joinrel, outerrel, innerrel, + restrictlist, mergeclause_list); +#endif /* * 4. Consider paths where both outer and inner relations must be * hashed before being joined. */ if (enable_hashjoin) - add_pathlist(joinrel, hash_inner_and_outer(root, - joinrel, - outerrel, - innerrel, - restrictlist)); -} - -/* - * best_innerjoin - * Find the cheapest index path that has already been identified by - * indexable_joinclauses() as being a possible inner path for the given - * outer relation(s) in a nestloop join. - * - * 'join_paths' is a list of potential inner indexscan join paths - * 'outer_relids' is the relid list of the outer join relation - * - * Returns the pathnode of the best path, or NULL if there's no - * usable path. - */ -static Path * -best_innerjoin(List *join_paths, Relids outer_relids) -{ - Path *cheapest = (Path *) NULL; - List *join_path; - - foreach(join_path, join_paths) - { - Path *path = (Path *) lfirst(join_path); - - Assert(IsA(path, IndexPath)); - - /* path->joinrelids is the set of base rels that must be part of - * outer_relids in order to use this inner path, because those - * rels are used in the index join quals of this inner path. - */ - if (is_subseti(((IndexPath *) path)->joinrelids, outer_relids) && - (cheapest == NULL || - path_is_cheaper(path, cheapest))) - cheapest = path; - } - return cheapest; + hash_inner_and_outer(root, joinrel, outerrel, innerrel, + restrictlist); } /* @@ -183,17 +131,15 @@ best_innerjoin(List *join_paths, Relids outer_relids) * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join - * - * Returns a list of mergejoin paths. */ -static List * -sort_inner_and_outer(RelOptInfo *joinrel, +static void +sort_inner_and_outer(Query *root, + RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list) { - List *path_list = NIL; List *i; /* @@ -223,7 +169,6 @@ sort_inner_and_outer(RelOptInfo *joinrel, List *outerkeys; List *innerkeys; List *merge_pathkeys; - MergePath *path_node; /* Make a mergeclause list with this guy first. */ curclause_list = lcons(restrictinfo, @@ -231,31 +176,37 @@ sort_inner_and_outer(RelOptInfo *joinrel, listCopy(mergeclause_list))); /* Build sort pathkeys for both sides. * - * Note: it's possible that the cheapest path will already be - * sorted properly --- create_mergejoin_path will detect that case - * and suppress an explicit sort step. + * Note: it's possible that the cheapest paths will already be + * sorted properly. create_mergejoin_path will detect that case + * and suppress an explicit sort step, so we needn't do so here. */ - outerkeys = make_pathkeys_for_mergeclauses(curclause_list, + outerkeys = make_pathkeys_for_mergeclauses(root, + curclause_list, outerrel->targetlist); - innerkeys = make_pathkeys_for_mergeclauses(curclause_list, + innerkeys = make_pathkeys_for_mergeclauses(root, + curclause_list, innerrel->targetlist); /* Build pathkeys representing output sort order. */ merge_pathkeys = build_join_pathkeys(outerkeys, joinrel->targetlist, - curclause_list); - /* And now we can make the path. */ - path_node = create_mergejoin_path(joinrel, - outerrel->cheapestpath, - innerrel->cheapestpath, - restrictlist, - merge_pathkeys, - get_actual_clauses(curclause_list), - outerkeys, - innerkeys); + root->equi_key_list); - path_list = lappend(path_list, path_node); + /* + * And now we can make the path. We only consider the cheapest- + * total-cost input paths, since we are assuming here that a sort + * is required. We will consider cheapest-startup-cost input paths + * later, and only if they don't need a sort. + */ + add_path(joinrel, (Path *) + create_mergejoin_path(joinrel, + outerrel->cheapest_total_path, + innerrel->cheapest_total_path, + restrictlist, + merge_pathkeys, + get_actual_clauses(curclause_list), + outerkeys, + innerkeys)); } - return path_list; } /* @@ -266,74 +217,56 @@ sort_inner_and_outer(RelOptInfo *joinrel, * only outer paths that are already ordered well enough for merging). * * We always generate a nestloop path for each available outer path. - * If an indexscan inner path exists that is compatible with this outer rel - * and cheaper than the cheapest general-purpose inner path, then we use - * the indexscan inner path; else we use the cheapest general-purpose inner. + * In fact we may generate as many as three: one on the cheapest-total-cost + * inner path, one on the cheapest-startup-cost inner path (if different), + * and one on the best inner-indexscan path (if any). * * We also consider mergejoins if mergejoin clauses are available. We have - * two ways to generate the inner path for a mergejoin: use the cheapest - * inner path (sorting it if it's not suitably ordered already), or using an - * inner path that is already suitably ordered for the merge. If the - * cheapest inner path is suitably ordered, then by definition it's the one - * to use. Otherwise, we look for ordered paths that are cheaper than the - * cheapest inner + sort costs. If we have several mergeclauses, it could be - * that there is no inner path (or only a very expensive one) for the full - * list of mergeclauses, but better paths exist if we truncate the - * mergeclause list (thereby discarding some sort key requirements). So, we - * consider truncations of the mergeclause list as well as the full list. - * In any case, we find the cheapest suitable path and generate a single - * output mergejoin path. (Since all the possible mergejoins will have - * identical output pathkeys, there is no need to keep any but the cheapest.) + * two ways to generate the inner path for a mergejoin: sort the cheapest + * inner path, or use an inner path that is already suitably ordered for the + * merge. If we have several mergeclauses, it could be that there is no inner + * path (or only a very expensive one) for the full list of mergeclauses, but + * better paths exist if we truncate the mergeclause list (thereby discarding + * some sort key requirements). So, we consider truncations of the + * mergeclause list as well as the full list. (Ideally we'd consider all + * subsets of the mergeclause list, but that seems way too expensive.) * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join - * 'outerpath_list' is the list of possible outer paths - * 'cheapest_inner' is the cheapest inner path - * 'best_innerjoin' is the best inner index path (if any) * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join - * - * Returns a list of possible join path nodes. */ -static List * -match_unsorted_outer(RelOptInfo *joinrel, +static void +match_unsorted_outer(Query *root, + RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, - List *outerpath_list, - Path *cheapest_inner, - Path *best_innerjoin, List *mergeclause_list) { - List *path_list = NIL; - Path *nestinnerpath; + Path *bestinnerjoin; List *i; /* - * We only use the best innerjoin indexpath if it is cheaper - * than the cheapest general-purpose inner path. + * Get the best innerjoin indexpath (if any) for this outer rel. + * It's the same for all outer paths. */ - if (best_innerjoin && - path_is_cheaper(best_innerjoin, cheapest_inner)) - nestinnerpath = best_innerjoin; - else - nestinnerpath = cheapest_inner; + bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids); - foreach(i, outerpath_list) + foreach(i, outerrel->pathlist) { Path *outerpath = (Path *) lfirst(i); - List *mergeclauses; List *merge_pathkeys; + List *mergeclauses; List *innersortkeys; - Path *mergeinnerpath; - int mergeclausecount; + List *trialsortkeys; + Path *cheapest_startup_inner; + Path *cheapest_total_inner; + int clausecnt; - /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys, - mergeclause_list); /* * The result will have this sort order (even if it is implemented * as a nestloop, and even if some of the mergeclauses are implemented @@ -341,91 +274,137 @@ match_unsorted_outer(RelOptInfo *joinrel, */ merge_pathkeys = build_join_pathkeys(outerpath->pathkeys, joinrel->targetlist, - mergeclauses); + root->equi_key_list); + + /* + * Always consider a nestloop join with this outer and cheapest- + * total-cost inner. Consider nestloops using the cheapest- + * startup-cost inner as well, and the best innerjoin indexpath. + */ + add_path(joinrel, (Path *) + create_nestloop_path(joinrel, + outerpath, + innerrel->cheapest_total_path, + restrictlist, + merge_pathkeys)); + if (innerrel->cheapest_startup_path != innerrel->cheapest_total_path) + add_path(joinrel, (Path *) + create_nestloop_path(joinrel, + outerpath, + innerrel->cheapest_startup_path, + restrictlist, + merge_pathkeys)); + if (bestinnerjoin != NULL) + add_path(joinrel, (Path *) + create_nestloop_path(joinrel, + outerpath, + bestinnerjoin, + restrictlist, + merge_pathkeys)); - /* Always consider a nestloop join with this outer and best inner. */ - path_list = lappend(path_list, - create_nestloop_path(joinrel, - outerpath, - nestinnerpath, - restrictlist, - merge_pathkeys)); + /* Look for useful mergeclauses (if any) */ + mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys, + mergeclause_list); /* Done with this outer path if no chance for a mergejoin */ if (mergeclauses == NIL) continue; /* Compute the required ordering of the inner path */ - innersortkeys = make_pathkeys_for_mergeclauses(mergeclauses, + innersortkeys = make_pathkeys_for_mergeclauses(root, + mergeclauses, innerrel->targetlist); - /* Set up on the assumption that we will use the cheapest_inner */ - mergeinnerpath = cheapest_inner; - mergeclausecount = length(mergeclauses); - - /* If the cheapest_inner doesn't need to be sorted, it is the winner - * by definition. + /* + * Generate a mergejoin on the basis of sorting the cheapest inner. + * Since a sort will be needed, only cheapest total cost matters. */ - if (pathkeys_contained_in(innersortkeys, - cheapest_inner->pathkeys)) - { - /* cheapest_inner is the winner */ - innersortkeys = NIL; /* we do not need to sort it... */ - } - else - { - /* look for a presorted path that's cheaper */ - List *trialsortkeys = listCopy(innersortkeys); - Cost cheapest_cost; - int clausecount; + add_path(joinrel, (Path *) + create_mergejoin_path(joinrel, + outerpath, + innerrel->cheapest_total_path, + restrictlist, + merge_pathkeys, + get_actual_clauses(mergeclauses), + NIL, + innersortkeys)); - cheapest_cost = cheapest_inner->path_cost + - cost_sort(innersortkeys, innerrel->rows, innerrel->width); + /* + * Look for presorted inner paths that satisfy the mergeclause list + * or any truncation thereof. Here, we consider both cheap startup + * cost and cheap total cost. + */ + trialsortkeys = listCopy(innersortkeys); /* modifiable copy */ + cheapest_startup_inner = NULL; + cheapest_total_inner = NULL; - for (clausecount = mergeclausecount; - clausecount > 0; - clausecount--) + for (clausecnt = length(mergeclauses); clausecnt > 0; clausecnt--) + { + Path *innerpath; + + /* Look for an inner path ordered well enough to merge with + * the first 'clausecnt' mergeclauses. NB: trialsortkeys list + * is modified destructively, which is why we made a copy... + */ + trialsortkeys = ltruncate(clausecnt, trialsortkeys); + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + TOTAL_COST); + if (innerpath != NULL && + (cheapest_total_inner == NULL || + compare_path_costs(innerpath, cheapest_total_inner, + TOTAL_COST) < 0)) { - Path *trialinnerpath; - - /* Look for an inner path ordered well enough to merge with - * the first 'clausecount' mergeclauses. NB: trialsortkeys - * is modified destructively, which is why we made a copy... - */ - trialinnerpath = - get_cheapest_path_for_pathkeys(innerrel->pathlist, - ltruncate(clausecount, - trialsortkeys), - false); - if (trialinnerpath != NULL && - trialinnerpath->path_cost < cheapest_cost) + /* Found a cheap (or even-cheaper) sorted path */ + List *newclauses; + + newclauses = ltruncate(clausecnt, + get_actual_clauses(mergeclauses)); + add_path(joinrel, (Path *) + create_mergejoin_path(joinrel, + outerpath, + innerpath, + restrictlist, + merge_pathkeys, + newclauses, + NIL, + NIL)); + cheapest_total_inner = innerpath; + } + /* Same on the basis of cheapest startup cost ... */ + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + STARTUP_COST); + if (innerpath != NULL && + (cheapest_startup_inner == NULL || + compare_path_costs(innerpath, cheapest_startup_inner, + STARTUP_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + if (innerpath != cheapest_total_inner) { - /* Found a cheaper (or even-cheaper) sorted path */ - cheapest_cost = trialinnerpath->path_cost; - mergeinnerpath = trialinnerpath; - mergeclausecount = clausecount; - innersortkeys = NIL; /* we will not need to sort it... */ + List *newclauses; + + newclauses = ltruncate(clausecnt, + get_actual_clauses(mergeclauses)); + add_path(joinrel, (Path *) + create_mergejoin_path(joinrel, + outerpath, + innerpath, + restrictlist, + merge_pathkeys, + newclauses, + NIL, + NIL)); } + cheapest_startup_inner = innerpath; } } - - /* Finally, we can build the mergejoin path */ - mergeclauses = ltruncate(mergeclausecount, - get_actual_clauses(mergeclauses)); - path_list = lappend(path_list, - create_mergejoin_path(joinrel, - outerpath, - mergeinnerpath, - restrictlist, - merge_pathkeys, - mergeclauses, - NIL, - innersortkeys)); } - - return path_list; } +#ifdef NOT_USED + /* * match_unsorted_inner * Generate mergejoin paths that use an explicit sort of the outer path @@ -436,86 +415,105 @@ match_unsorted_outer(RelOptInfo *joinrel, * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join - * 'innerpath_list' is the list of possible inner join paths * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join - * - * Returns a list of possible merge paths. */ -static List * -match_unsorted_inner(RelOptInfo *joinrel, +static void +match_unsorted_inner(Query *root, + RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, - List *innerpath_list, List *mergeclause_list) { - List *path_list = NIL; List *i; - foreach(i, innerpath_list) + foreach(i, innerrel->pathlist) { Path *innerpath = (Path *) lfirst(i); List *mergeclauses; + List *outersortkeys; + List *merge_pathkeys; + Path *totalouterpath; + Path *startupouterpath; /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_pathkeys(innerpath->pathkeys, mergeclause_list); + if (mergeclauses == NIL) + continue; - if (mergeclauses) - { - List *outersortkeys; - Path *mergeouterpath; - List *merge_pathkeys; - - /* Compute the required ordering of the outer path */ - outersortkeys = - make_pathkeys_for_mergeclauses(mergeclauses, - outerrel->targetlist); - - /* Look for an outer path already ordered well enough to merge */ - mergeouterpath = - get_cheapest_path_for_pathkeys(outerrel->pathlist, - outersortkeys, - false); - - /* Should we use the mergeouter, or sort the cheapest outer? */ - if (mergeouterpath != NULL && - mergeouterpath->path_cost <= - (outerrel->cheapestpath->path_cost + - cost_sort(outersortkeys, outerrel->rows, outerrel->width))) - { - /* Use mergeouterpath */ - outersortkeys = NIL; /* no explicit sort step */ - } - else - { - /* Use outerrel->cheapestpath, with the outersortkeys */ - mergeouterpath = outerrel->cheapestpath; - } + /* Compute the required ordering of the outer path */ + outersortkeys = make_pathkeys_for_mergeclauses(root, + mergeclauses, + outerrel->targetlist); + + /* + * Generate a mergejoin on the basis of sorting the cheapest outer. + * Since a sort will be needed, only cheapest total cost matters. + */ + merge_pathkeys = build_join_pathkeys(outersortkeys, + joinrel->targetlist, + root->equi_key_list); + add_path(joinrel, (Path *) + create_mergejoin_path(joinrel, + outerrel->cheapest_total_path, + innerpath, + restrictlist, + merge_pathkeys, + get_actual_clauses(mergeclauses), + outersortkeys, + NIL)); + /* + * Now generate mergejoins based on already-sufficiently-ordered + * outer paths. There's likely to be some redundancy here with paths + * already generated by merge_unsorted_outer ... but since + * merge_unsorted_outer doesn't consider all permutations of the + * mergeclause list, it may fail to notice that this particular + * innerpath could have been used with this outerpath. + */ + totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist, + outersortkeys, + TOTAL_COST); + if (totalouterpath == NULL) + continue; /* there won't be a startup-cost path either */ - /* Compute pathkeys the result will have */ - merge_pathkeys = build_join_pathkeys( - outersortkeys ? outersortkeys : mergeouterpath->pathkeys, - joinrel->targetlist, - mergeclauses); - - mergeclauses = get_actual_clauses(mergeclauses); - path_list = lappend(path_list, - create_mergejoin_path(joinrel, - mergeouterpath, - innerpath, - restrictlist, - merge_pathkeys, - mergeclauses, - outersortkeys, - NIL)); + merge_pathkeys = build_join_pathkeys(totalouterpath->pathkeys, + joinrel->targetlist, + root->equi_key_list); + add_path(joinrel, (Path *) + create_mergejoin_path(joinrel, + totalouterpath, + innerpath, + restrictlist, + merge_pathkeys, + get_actual_clauses(mergeclauses), + NIL, + NIL)); + + startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist, + outersortkeys, + STARTUP_COST); + if (startupouterpath != NULL && startupouterpath != totalouterpath) + { + merge_pathkeys = build_join_pathkeys(startupouterpath->pathkeys, + joinrel->targetlist, + root->equi_key_list); + add_path(joinrel, (Path *) + create_mergejoin_path(joinrel, + startupouterpath, + innerpath, + restrictlist, + merge_pathkeys, + get_actual_clauses(mergeclauses), + NIL, + NIL)); } } - - return path_list; } +#endif + /* * hash_inner_and_outer * Create hashjoin join paths by explicitly hashing both the outer and @@ -526,17 +524,14 @@ match_unsorted_inner(RelOptInfo *joinrel, * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join - * - * Returns a list of hashjoin paths. */ -static List * +static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist) { - List *hpath_list = NIL; Relids outerrelids = outerrel->relids; Relids innerrelids = innerrel->relids; List *i; @@ -558,7 +553,6 @@ hash_inner_and_outer(Query *root, *right, *inner; Selectivity innerdisbursion; - HashPath *hash_path; if (restrictinfo->hashjoinoperator == InvalidOid) continue; /* not hashjoinable */ @@ -581,17 +575,66 @@ hash_inner_and_outer(Query *root, /* estimate disbursion of inner var for costing purposes */ innerdisbursion = estimate_disbursion(root, inner); - hash_path = create_hashjoin_path(joinrel, - outerrel->cheapestpath, - innerrel->cheapestpath, - restrictlist, - lcons(clause, NIL), - innerdisbursion); - - hpath_list = lappend(hpath_list, hash_path); + /* + * We consider both the cheapest-total-cost and cheapest-startup-cost + * outer paths. There's no need to consider any but the cheapest- + * total-cost inner path, however. + */ + add_path(joinrel, (Path *) + create_hashjoin_path(joinrel, + outerrel->cheapest_total_path, + innerrel->cheapest_total_path, + restrictlist, + lcons(clause, NIL), + innerdisbursion)); + if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path) + add_path(joinrel, (Path *) + create_hashjoin_path(joinrel, + outerrel->cheapest_startup_path, + innerrel->cheapest_total_path, + restrictlist, + lcons(clause, NIL), + innerdisbursion)); } +} + +/* + * best_innerjoin + * Find the cheapest index path that has already been identified by + * indexable_joinclauses() as being a possible inner path for the given + * outer relation(s) in a nestloop join. + * + * We compare indexpaths on total_cost only, assuming that they will all have + * zero or negligible startup_cost. We might have to think harder someday... + * + * 'join_paths' is a list of potential inner indexscan join paths + * 'outer_relids' is the relid list of the outer join relation + * + * Returns the pathnode of the best path, or NULL if there's no + * usable path. + */ +static Path * +best_innerjoin(List *join_paths, Relids outer_relids) +{ + Path *cheapest = (Path *) NULL; + List *join_path; + + foreach(join_path, join_paths) + { + Path *path = (Path *) lfirst(join_path); + + Assert(IsA(path, IndexPath)); - return hpath_list; + /* path->joinrelids is the set of base rels that must be part of + * outer_relids in order to use this inner path, because those + * rels are used in the index join quals of this inner path. + */ + if (is_subseti(((IndexPath *) path)->joinrelids, outer_relids) && + (cheapest == NULL || + compare_path_costs(path, cheapest, TOTAL_COST) < 0)) + cheapest = path; + } + return cheapest; } /* diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index 9eb0484fc2fa1bc8cd88f8faba8224562d433445..6226100cfc791304a022691b5dc36c7737a82846 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.36 2000/02/05 18:26:09 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.37 2000/02/15 20:49:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/internal.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/restrictinfo.h" @@ -27,14 +28,13 @@ static void best_or_subclause_indices(Query *root, RelOptInfo *rel, List *subclauses, List *indices, - List **indexquals, - List **indexids, - Cost *cost); + IndexPath *pathnode); static void best_or_subclause_index(Query *root, RelOptInfo *rel, Expr *subclause, List *indices, List **retIndexQual, Oid *retIndexid, - Cost *retCost); + Cost *retStartupCost, + Cost *retTotalCost); /* @@ -45,14 +45,13 @@ static void best_or_subclause_index(Query *root, RelOptInfo *rel, * 'rel' is the relation entry for which the paths are to be created * 'clauses' is the list of available restriction clause nodes * - * Returns a list of index path nodes. - * + * Returns nothing, but adds paths to rel->pathlist via add_path(). */ -List * +void create_or_index_paths(Query *root, - RelOptInfo *rel, List *clauses) + RelOptInfo *rel, + List *clauses) { - List *path_list = NIL; List *clist; foreach(clist, clauses) @@ -86,17 +85,6 @@ create_or_index_paths(Query *root, * best available index for each subclause. */ IndexPath *pathnode = makeNode(IndexPath); - List *indexquals; - List *indexids; - Cost cost; - - best_or_subclause_indices(root, - rel, - clausenode->clause->args, - clausenode->subclauseindices, - &indexquals, - &indexids, - &cost); pathnode->path.pathtype = T_IndexScan; pathnode->path.parent = rel; @@ -108,17 +96,21 @@ create_or_index_paths(Query *root, */ pathnode->path.pathkeys = NIL; - pathnode->indexid = indexids; - pathnode->indexqual = indexquals; + /* We don't actually care what order the index scans in ... */ + pathnode->indexscandir = NoMovementScanDirection; + pathnode->joinrelids = NIL; /* no join clauses here */ - pathnode->path.path_cost = cost; - path_list = lappend(path_list, pathnode); + best_or_subclause_indices(root, + rel, + clausenode->clause->args, + clausenode->subclauseindices, + pathnode); + + add_path(rel, (Path *) pathnode); } } } - - return path_list; } /* @@ -128,53 +120,68 @@ create_or_index_paths(Query *root, * indices. The cost is the sum of the individual index costs, since * the executor will perform a scan for each subclause of the 'or'. * - * This routine also creates the indexquals and indexids lists that will - * be needed by the executor. The indexquals list has one entry for each + * This routine also creates the indexqual and indexid lists that will + * be needed by the executor. The indexqual list has one entry for each * scan of the base rel, which is a sublist of indexqual conditions to * apply in that scan. The implicit semantics are AND across each sublist * of quals, and OR across the toplevel list (note that the executor - * takes care not to return any single tuple more than once). The indexids - * list gives the index to be used in each scan. + * takes care not to return any single tuple more than once). The indexid + * list gives the OID of the index to be used in each scan. * * 'rel' is the node of the relation on which the indexes are defined * 'subclauses' are the subclauses of the 'or' clause * 'indices' is a list of sublists of the IndexOptInfo nodes that matched * each subclause of the 'or' clause - * '*indexquals' gets the constructed indexquals for the path (a list + * 'pathnode' is the IndexPath node being built. + * + * Results are returned by setting these fields of the passed pathnode: + * 'indexqual' gets the constructed indexquals for the path (a list * of sublists of clauses, one sublist per scan of the base rel) - * '*indexids' gets a list of the index OIDs for each scan of the rel - * '*cost' gets the total cost of the path + * 'indexid' gets a list of the index OIDs for each scan of the rel + * 'startup_cost' and 'total_cost' get the complete path costs. + * + * 'startup_cost' is the startup cost for the first index scan only; + * startup costs for later scans will be paid later on, so they just + * get reflected in total_cost. + * + * NOTE: we choose each scan on the basis of its total cost, ignoring startup + * cost. This is reasonable as long as all index types have zero or small + * startup cost, but we might have to work harder if any index types with + * nontrivial startup cost are ever invented. */ static void best_or_subclause_indices(Query *root, RelOptInfo *rel, List *subclauses, List *indices, - List **indexquals, /* return value */ - List **indexids, /* return value */ - Cost *cost) /* return value */ + IndexPath *pathnode) { List *slist; - *indexquals = NIL; - *indexids = NIL; - *cost = (Cost) 0.0; + pathnode->indexqual = NIL; + pathnode->indexid = NIL; + pathnode->path.startup_cost = 0; + pathnode->path.total_cost = 0; foreach(slist, subclauses) { Expr *subclause = lfirst(slist); List *best_indexqual; Oid best_indexid; - Cost best_cost; + Cost best_startup_cost; + Cost best_total_cost; best_or_subclause_index(root, rel, subclause, lfirst(indices), - &best_indexqual, &best_indexid, &best_cost); + &best_indexqual, &best_indexid, + &best_startup_cost, &best_total_cost); Assert(best_indexid != InvalidOid); - *indexquals = lappend(*indexquals, best_indexqual); - *indexids = lappendi(*indexids, best_indexid); - *cost += best_cost; + pathnode->indexqual = lappend(pathnode->indexqual, best_indexqual); + pathnode->indexid = lappendi(pathnode->indexid, best_indexid); + if (slist == subclauses) /* first scan? */ + pathnode->path.startup_cost = best_startup_cost; + pathnode->path.total_cost += best_total_cost; indices = lnext(indices); } @@ -182,16 +189,17 @@ best_or_subclause_indices(Query *root, /* * best_or_subclause_index - * Determines which is the best index to be used with a subclause of - * an 'or' clause by estimating the cost of using each index and selecting - * the least expensive. + * Determines which is the best index to be used with a subclause of an + * 'or' clause by estimating the cost of using each index and selecting + * the least expensive (considering total cost only, for now). * * 'rel' is the node of the relation on which the index is defined * 'subclause' is the OR subclause being considered * 'indices' is a list of IndexOptInfo nodes that match the subclause * '*retIndexQual' gets a list of the indexqual conditions for the best index * '*retIndexid' gets the OID of the best index - * '*retCost' gets the cost of a scan with that index + * '*retStartupCost' gets the startup cost of a scan with that index + * '*retTotalCost' gets the total cost of a scan with that index */ static void best_or_subclause_index(Query *root, @@ -200,7 +208,8 @@ best_or_subclause_index(Query *root, List *indices, List **retIndexQual, /* return value */ Oid *retIndexid, /* return value */ - Cost *retCost) /* return value */ + Cost *retStartupCost, /* return value */ + Cost *retTotalCost) /* return value */ { bool first_time = true; List *ilist; @@ -208,27 +217,28 @@ best_or_subclause_index(Query *root, /* if we don't match anything, return zeros */ *retIndexQual = NIL; *retIndexid = InvalidOid; - *retCost = 0.0; + *retStartupCost = 0; + *retTotalCost = 0; foreach(ilist, indices) { IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); List *indexqual; - Cost subcost; + Path subclause_path; Assert(IsA(index, IndexOptInfo)); /* Convert this 'or' subclause to an indexqual list */ indexqual = extract_or_indexqual_conditions(rel, index, subclause); - subcost = cost_index(root, rel, index, indexqual, - false); + cost_index(&subclause_path, root, rel, index, indexqual, false); - if (first_time || subcost < *retCost) + if (first_time || subclause_path.total_cost < *retTotalCost) { *retIndexQual = indexqual; *retIndexid = index->indexoid; - *retCost = subcost; + *retStartupCost = subclause_path.startup_cost; + *retTotalCost = subclause_path.total_cost; first_time = false; } } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 5aeda1e154e157df2e5d291b0a29b8865d850420..b578e33f5c850a6cde157b8b98aace14c75dfcc7 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.18 2000/01/26 05:56:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.19 2000/02/15 20:49:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,7 @@ #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/joininfo.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/tlist.h" #include "optimizer/var.h" @@ -25,9 +26,9 @@ #include "utils/lsyscache.h" static PathKeyItem *makePathKeyItem(Node *key, Oid sortop); -static Var *find_indexkey_var(int indexkey, List *tlist); -static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist, - List *joinclauses); +static List *make_canonical_pathkey(Query *root, PathKeyItem *item); +static Var *find_indexkey_var(Query *root, RelOptInfo *rel, + AttrNumber varattno); /*-------------------- @@ -50,50 +51,122 @@ static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist, * Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since * we can say nothing about the overall order of its result. Also, an * indexscan on an unordered type of index generates NIL pathkeys. However, - * we can always create a pathkey by doing an explicit sort. - * - * Multi-relation RelOptInfo Path's are more complicated. Mergejoins are - * only performed with equijoins ("="). Because of this, the resulting - * multi-relation path actually has more than one primary key. For example, - * a mergejoin using a clause "tab1.col1 = tab2.col1" would generate pathkeys - * of ( (tab1.col1/sortop1 tab2.col1/sortop2) ), indicating that the major - * sort order of the Path can be taken to be *either* tab1.col1 or tab2.col1. - * They are equal, so they are both primary sort keys. This allows future - * joins to use either var as a pre-sorted key to prevent upper Mergejoins - * from having to re-sort the Path. This is why pathkeys is a List of Lists. - * - * Note that while the order of the top list is meaningful (primary vs. - * secondary sort key), the order of each sublist is arbitrary. No code - * working with pathkeys should generate a result that depends on the order - * of a pathkey sublist. + * we can always create a pathkey by doing an explicit sort. The pathkeys + * for a sort plan's output just represent the sort key fields and the + * ordering operators used. + * + * Things get more interesting when we consider joins. Suppose we do a + * mergejoin between A and B using the mergeclause A.X = B.Y. The output + * of the mergejoin is sorted by X --- but it is also sorted by Y. We + * represent this fact by listing both keys in a single pathkey sublist: + * ( (A.X/xsortop B.Y/ysortop) ). This pathkey asserts that the major + * sort order of the Path can be taken to be *either* A.X or B.Y. + * They are equal, so they are both primary sort keys. By doing this, + * we allow future joins to use either var as a pre-sorted key, so upper + * Mergejoins may be able to avoid having to re-sort the Path. This is + * why pathkeys is a List of Lists. * * We keep a sortop associated with each PathKeyItem because cross-data-type - * mergejoins are possible; for example int4=int8 is mergejoinable. In this - * case we need to remember that the left var is ordered by int4lt while - * the right var is ordered by int8lt. So the different members of each - * sublist could have different sortops. - * - * When producing the pathkeys for a merge or nestloop join, we can keep - * all of the keys of the outer path, since the ordering of the outer path - * will be preserved in the result. We add to each pathkey sublist any inner - * vars that are equijoined to any of the outer vars in the sublist. In the - * nestloop case we have to be careful to consider only equijoin operators; - * the nestloop's join clauses might include non-equijoin operators. - * (Currently, we do this by considering only mergejoinable operators while - * making the pathkeys, since we have no separate marking for operators that - * are equijoins but aren't mergejoinable.) + * mergejoins are possible; for example int4 = int8 is mergejoinable. + * In this case we need to remember that the left var is ordered by int4lt + * while the right var is ordered by int8lt. So the different members of + * each sublist could have different sortops. + * + * Note that while the order of the top list is meaningful (primary vs. + * secondary sort key), the order of each sublist is arbitrary. Each sublist + * should be regarded as a set of equivalent keys, with no significance + * to the list order. + * + * With a little further thought, it becomes apparent that pathkeys for + * joins need not only come from mergejoins. For example, if we do a + * nestloop join between outer relation A and inner relation B, then any + * pathkeys relevant to A are still valid for the join result: we have + * not altered the order of the tuples from A. Even more interesting, + * if there was a mergeclause (more formally, an "equijoin clause") A.X=B.Y, + * and A.X was a pathkey for the outer relation A, then we can assert that + * B.Y is a pathkey for the join result; X was ordered before and still is, + * and the joined values of Y are equal to the joined values of X, so Y + * must now be ordered too. This is true even though we used no mergejoin. + * + * More generally, whenever we have an equijoin clause A.X = B.Y and a + * pathkey A.X, we can add B.Y to that pathkey if B is part of the joined + * relation the pathkey is for, *no matter how we formed the join*. + * + * In short, then: when producing the pathkeys for a merge or nestloop join, + * we can keep all of the keys of the outer path, since the ordering of the + * outer path will be preserved in the result. Furthermore, we can add to + * each pathkey sublist any inner vars that are equijoined to any of the + * outer vars in the sublist; this works regardless of whether we are + * implementing the join using that equijoin clause as a mergeclause, + * or merely enforcing the clause after-the-fact as a qpqual filter. * * Although Hashjoins also work only with equijoin operators, it is *not* * safe to consider the output of a Hashjoin to be sorted in any particular * order --- not even the outer path's order. This is true because the * executor might have to split the join into multiple batches. Therefore - * a Hashjoin is always given NIL pathkeys. + * a Hashjoin is always given NIL pathkeys. (Also, we need to use only + * mergejoinable operators when deducing which inner vars are now sorted, + * because a mergejoin operator tells us which left- and right-datatype + * sortops can be considered equivalent, whereas a hashjoin operator + * doesn't imply anything about sort order.) * * Pathkeys are also useful to represent an ordering that we wish to achieve, * since they are easily compared to the pathkeys of a potential candidate * path. So, SortClause lists are turned into pathkeys lists for use inside * the optimizer. * + * OK, now for how it *really* works: + * + * We did implement pathkeys just as described above, and found that the + * planner spent a huge amount of time comparing pathkeys, because the + * representation of pathkeys as unordered lists made it expensive to decide + * whether two were equal or not. So, we've modified the representation + * as described next. + * + * If we scan the WHERE clause for equijoin clauses (mergejoinable clauses) + * during planner startup, we can construct lists of equivalent pathkey items + * for the query. There could be more than two items per equivalence set; + * for example, WHERE A.X = B.Y AND B.Y = C.Z AND D.R = E.S creates the + * equivalence sets { A.X B.Y C.Z } and { D.R E.S } (plus associated sortops). + * Any pathkey item that belongs to an equivalence set implies that all the + * other items in its set apply to the relation too, or at least all the ones + * that are for fields present in the relation. (Some of the items in the + * set might be for as-yet-unjoined relations.) Furthermore, any multi-item + * pathkey sublist that appears at any stage of planning the query *must* be + * a subset of one or another of these equivalence sets; there's no way we'd + * have put two items in the same pathkey sublist unless they were equijoined + * in WHERE. + * + * Now suppose that we allow a pathkey sublist to contain pathkey items for + * vars that are not yet part of the pathkey's relation. This introduces + * no logical difficulty, because such items can easily be seen to be + * irrelevant; we just mandate that they be ignored. But having allowed + * this, we can declare (by fiat) that any multiple-item pathkey sublist + * must be equal() to the appropriate equivalence set. In effect, whenever + * we make a pathkey sublist that mentions any var appearing in an + * equivalence set, we instantly add all the other vars equivalenced to it, + * whether they appear yet in the pathkey's relation or not. And we also + * mandate that the pathkey sublist appear in the same order as the + * equivalence set it comes from. (In practice, we simply return a pointer + * to the relevant equivalence set without building any new sublist at all.) + * This makes comparing pathkeys very simple and fast, and saves a lot of + * work and memory space for pathkey construction as well. + * + * Note that pathkey sublists having just one item still exist, and are + * not expected to be equal() to any equivalence set. This occurs when + * we describe a sort order that involves a var that's not mentioned in + * any equijoin clause of the WHERE. We could add singleton sets containing + * such vars to the query's list of equivalence sets, but there's little + * point in doing so. + * + * By the way, it's OK and even useful for us to build equivalence sets + * that mention multiple vars from the same relation. For example, if + * we have WHERE A.X = A.Y and we are scanning A using an index on X, + * we can legitimately conclude that the path is sorted by Y as well; + * and this could be handy if Y is the variable used in other join clauses + * or ORDER BY. So, any WHERE clause with a mergejoinable operator can + * contribute to an equivalence set, even if it's not a join clause. + * * -- bjm & tgl *-------------------- */ @@ -113,6 +186,129 @@ makePathKeyItem(Node *key, Oid sortop) return item; } +/* + * add_equijoined_keys + * The given clause has a mergejoinable operator, so its two sides + * can be considered equal after restriction clause application; in + * particular, any pathkey mentioning one side (with the correct sortop) + * can be expanded to include the other as well. Record the vars and + * associated sortops in the query's equi_key_list for future use. + * + * The query's equi_key_list field points to a list of sublists of PathKeyItem + * nodes, where each sublist is a set of two or more vars+sortops that have + * been identified as logically equivalent (and, therefore, we may consider + * any two in a set to be equal). As described above, we will subsequently + * use direct pointers to one of these sublists to represent any pathkey + * that involves an equijoined variable. + * + * This code would actually work fine with expressions more complex than + * a single Var, but currently it won't see any because check_mergejoinable + * won't accept such clauses as mergejoinable. + */ +void +add_equijoined_keys(Query *root, RestrictInfo *restrictinfo) +{ + Expr *clause = restrictinfo->clause; + PathKeyItem *item1 = makePathKeyItem((Node *) get_leftop(clause), + restrictinfo->left_sortop); + PathKeyItem *item2 = makePathKeyItem((Node *) get_rightop(clause), + restrictinfo->right_sortop); + List *newset, + *cursetlink; + + /* We might see a clause X=X; don't make a single-element list from it */ + if (equal(item1, item2)) + return; + /* + * Our plan is to make a two-element set, then sweep through the existing + * equijoin sets looking for matches to item1 or item2. When we find one, + * we remove that set from equi_key_list and union it into our new set. + * When done, we add the new set to the front of equi_key_list. + * + * This is a standard UNION-FIND problem, for which there exist better + * data structures than simple lists. If this code ever proves to be + * a bottleneck then it could be sped up --- but for now, simple is + * beautiful. + */ + newset = lcons(item1, lcons(item2, NIL)); + + foreach(cursetlink, root->equi_key_list) + { + List *curset = lfirst(cursetlink); + + if (member(item1, curset) || member(item2, curset)) + { + /* Found a set to merge into our new set */ + newset = LispUnion(newset, curset); + /* Remove old set from equi_key_list. NOTE this does not change + * lnext(cursetlink), so the outer foreach doesn't break. + */ + root->equi_key_list = lremove(curset, root->equi_key_list); + freeList(curset); /* might as well recycle old cons cells */ + } + } + + root->equi_key_list = lcons(newset, root->equi_key_list); +} + +/* + * make_canonical_pathkey + * Given a PathKeyItem, find the equi_key_list subset it is a member of, + * if any. If so, return a pointer to that sublist, which is the + * canonical representation (for this query) of that PathKeyItem's + * equivalence set. If it is not found, return a single-element list + * containing the PathKeyItem (when the item has no equivalence peers, + * we just allow it to be a standalone list). + * + * Note that this function must not be used until after we have completed + * scanning the WHERE clause for equijoin operators. + */ +static List * +make_canonical_pathkey(Query *root, PathKeyItem *item) +{ + List *cursetlink; + + foreach(cursetlink, root->equi_key_list) + { + List *curset = lfirst(cursetlink); + + if (member(item, curset)) + return curset; + } + return lcons(item, NIL); +} + +/* + * canonicalize_pathkeys + * Convert a not-necessarily-canonical pathkeys list to canonical form. + * + * Note that this function must not be used until after we have completed + * scanning the WHERE clause for equijoin operators. + */ +List * +canonicalize_pathkeys(Query *root, List *pathkeys) +{ + List *new_pathkeys = NIL; + List *i; + + foreach(i, pathkeys) + { + List *pathkey = (List *) lfirst(i); + PathKeyItem *item; + + /* + * It's sufficient to look at the first entry in the sublist; + * if there are more entries, they're already part of an + * equivalence set by definition. + */ + Assert(pathkey != NIL); + item = (PathKeyItem *) lfirst(pathkey); + new_pathkeys = lappend(new_pathkeys, + make_canonical_pathkey(root, item)); + } + return new_pathkeys; +} + /**************************************************************************** * PATHKEY COMPARISONS ****************************************************************************/ @@ -126,15 +322,21 @@ makePathKeyItem(Node *key, Oid sortop) * it contains all the keys of the other plus more. For example, either * ((A) (B)) or ((A B)) is better than ((A)). * - * This gets called a lot, so it is optimized. + * Because we actually only expect to see canonicalized pathkey sublists, + * we don't have to do the full two-way-subset-inclusion test on each + * pair of sublists that is implied by the above statement. Instead we + * just do an equal(). In the normal case where multi-element sublists + * are pointers into the root's equi_key_list, equal() will be very fast: + * it will recognize pointer equality when the sublists are the same, + * and will fail at the first sublist element when they are not. + * + * Yes, this gets called enough to be worth coding it this tensely. */ PathKeysComparison compare_pathkeys(List *keys1, List *keys2) { List *key1, *key2; - bool key1_subsetof_key2 = true, - key2_subsetof_key1 = true; for (key1 = keys1, key2 = keys2; key1 != NIL && key2 != NIL; @@ -142,36 +344,12 @@ compare_pathkeys(List *keys1, List *keys2) { List *subkey1 = lfirst(key1); List *subkey2 = lfirst(key2); - List *i; - /* We have to do this the hard way since the ordering of the subkey - * lists is arbitrary. + /* We will never have two subkeys where one is a subset of the other, + * because of the canonicalization explained above. Either they are + * equal or they ain't. */ - if (key1_subsetof_key2) - { - foreach(i, subkey1) - { - if (! member(lfirst(i), subkey2)) - { - key1_subsetof_key2 = false; - break; - } - } - } - - if (key2_subsetof_key1) - { - foreach(i, subkey2) - { - if (! member(lfirst(i), subkey1)) - { - key2_subsetof_key1 = false; - break; - } - } - } - - if (!key1_subsetof_key2 && !key2_subsetof_key1) + if (! equal(subkey1, subkey2)) return PATHKEYS_DIFFERENT; /* no need to keep looking */ } @@ -180,18 +358,11 @@ compare_pathkeys(List *keys1, List *keys2) * of the other list are not NIL --- no pathkey list should ever have * a NIL sublist.) */ - if (key1 != NIL) - key1_subsetof_key2 = false; - if (key2 != NIL) - key2_subsetof_key1 = false; - - if (key1_subsetof_key2 && key2_subsetof_key1) + if (key1 == NIL && key2 == NIL) return PATHKEYS_EQUAL; - if (key1_subsetof_key2) - return PATHKEYS_BETTER2; - if (key2_subsetof_key1) - return PATHKEYS_BETTER1; - return PATHKEYS_DIFFERENT; + if (key1 != NIL) + return PATHKEYS_BETTER1; /* key1 is longer */ + return PATHKEYS_BETTER2; /* key2 is longer */ } /* @@ -215,16 +386,16 @@ pathkeys_contained_in(List *keys1, List *keys2) /* * get_cheapest_path_for_pathkeys - * Find the cheapest path in 'paths' that satisfies the given pathkeys. - * Return NULL if no such path. + * Find the cheapest path (according to the specified criterion) that + * satisfies the given pathkeys. Return NULL if no such path. * - * 'paths' is a list of possible paths (either inner or outer) - * 'pathkeys' represents a required ordering - * if 'indexpaths_only' is true, only IndexPaths will be considered. + * 'paths' is a list of possible paths that all generate the same relation + * 'pathkeys' represents a required ordering (already canonicalized!) + * 'cost_criterion' is STARTUP_COST or TOTAL_COST */ Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, - bool indexpaths_only) + CostSelector cost_criterion) { Path *matched_path = NULL; List *i; @@ -233,15 +404,55 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, { Path *path = (Path *) lfirst(i); - if (indexpaths_only && ! IsA(path, IndexPath)) + /* + * Since cost comparison is a lot cheaper than pathkey comparison, + * do that first. (XXX is that still true?) + */ + if (matched_path != NULL && + compare_path_costs(matched_path, path, cost_criterion) <= 0) continue; if (pathkeys_contained_in(pathkeys, path->pathkeys)) - { - if (matched_path == NULL || - path->path_cost < matched_path->path_cost) - matched_path = path; - } + matched_path = path; + } + return matched_path; +} + +/* + * get_cheapest_fractional_path_for_pathkeys + * Find the cheapest path (for retrieving a specified fraction of all + * the tuples) that satisfies the given pathkeys. + * Return NULL if no such path. + * + * See compare_fractional_path_costs() for the interpretation of the fraction + * parameter. + * + * 'paths' is a list of possible paths that all generate the same relation + * 'pathkeys' represents a required ordering (already canonicalized!) + * 'fraction' is the fraction of the total tuples expected to be retrieved + */ +Path * +get_cheapest_fractional_path_for_pathkeys(List *paths, + List *pathkeys, + double fraction) +{ + Path *matched_path = NULL; + List *i; + + foreach(i, paths) + { + Path *path = (Path *) lfirst(i); + + /* + * Since cost comparison is a lot cheaper than pathkey comparison, + * do that first. + */ + if (matched_path != NULL && + compare_fractional_path_costs(matched_path, path, fraction) <= 0) + continue; + + if (pathkeys_contained_in(pathkeys, path->pathkeys)) + matched_path = path; } return matched_path; } @@ -255,18 +466,22 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, * Build a pathkeys list that describes the ordering induced by an index * scan using the given index. (Note that an unordered index doesn't * induce any ordering; such an index will have no sortop OIDS in - * its "ordering" field.) + * its "ordering" field, and we will return NIL.) * - * Vars in the resulting pathkeys list are taken from the rel's targetlist. - * If we can't find the indexkey in the targetlist, we assume that the - * ordering of that key is not interesting. + * If 'scandir' is BackwardScanDirection, attempt to build pathkeys + * representing a backwards scan of the index. Return NIL if can't do it. */ List * -build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index) +build_index_pathkeys(Query *root, + RelOptInfo *rel, + IndexOptInfo *index, + ScanDirection scandir) { List *retval = NIL; int *indexkeys = index->indexkeys; Oid *ordering = index->ordering; + PathKeyItem *item; + Oid sortop; if (!indexkeys || indexkeys[0] == 0 || !ordering || ordering[0] == InvalidOid) @@ -275,8 +490,6 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index) if (index->indproc) { /* Functional index: build a representation of the function call */ - int relid = lfirsti(rel->relids); - Oid reloid = getrelid(relid, root->rtable); Func *funcnode = makeNode(Func); List *funcargs = NIL; @@ -291,43 +504,42 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index) while (*indexkeys != 0) { - int varattno = *indexkeys; - Oid vartypeid = get_atttype(reloid, varattno); - int32 type_mod = get_atttypmod(reloid, varattno); - funcargs = lappend(funcargs, - makeVar(relid, varattno, vartypeid, - type_mod, 0)); + find_indexkey_var(root, rel, *indexkeys)); indexkeys++; } + sortop = *ordering; + if (ScanDirectionIsBackward(scandir)) + { + sortop = get_commutator(sortop); + if (sortop == InvalidOid) + return NIL; /* oops, no reverse sort operator? */ + } + /* Make a one-sublist pathkeys list for the function expression */ - retval = lcons(lcons( - makePathKeyItem((Node *) make_funcclause(funcnode, funcargs), - *ordering), - NIL), NIL); + item = makePathKeyItem((Node *) make_funcclause(funcnode, funcargs), + sortop); + retval = lcons(make_canonical_pathkey(root, item), NIL); } else { /* Normal non-functional index */ - List *rel_tlist = rel->targetlist; - while (*indexkeys != 0 && *ordering != InvalidOid) { - Var *relvar = find_indexkey_var(*indexkeys, rel_tlist); + Var *relvar = find_indexkey_var(root, rel, *indexkeys); - /* If we can find no tlist entry for the n'th sort key, - * then we're done generating pathkeys; any subsequent sort keys - * no longer apply, since we can't represent the ordering properly - * even if there are tlist entries for them. - */ - if (!relvar) - break; - /* OK, make a one-element sublist for this sort key */ - retval = lappend(retval, - lcons(makePathKeyItem((Node *) relvar, - *ordering), - NIL)); + sortop = *ordering; + if (ScanDirectionIsBackward(scandir)) + { + sortop = get_commutator(sortop); + if (sortop == InvalidOid) + break; /* oops, no reverse sort operator? */ + } + + /* OK, make a sublist for this sort key */ + item = makePathKeyItem((Node *) relvar, sortop); + retval = lappend(retval, make_canonical_pathkey(root, item)); indexkeys++; ordering++; @@ -338,21 +550,37 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index) } /* - * Find a var in a relation's targetlist that matches an indexkey attrnum. + * Find or make a Var node for the specified attribute of the rel. + * + * We first look for the var in the rel's target list, because that's + * easy and fast. But the var might not be there (this should normally + * only happen for vars that are used in WHERE restriction clauses, + * but not in join clauses or in the SELECT target list). In that case, + * gin up a Var node the hard way. */ static Var * -find_indexkey_var(int indexkey, List *tlist) +find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno) { List *temp; + int relid; + Oid reloid, + vartypeid; + int32 type_mod; - foreach(temp, tlist) + foreach(temp, rel->targetlist) { Var *tle_var = get_expr(lfirst(temp)); - if (IsA(tle_var, Var) && tle_var->varattno == indexkey) + if (IsA(tle_var, Var) && tle_var->varattno == varattno) return tle_var; } - return NULL; + + relid = lfirsti(rel->relids); + reloid = getrelid(relid, root->rtable); + vartypeid = get_atttype(reloid, varattno); + type_mod = get_atttypmod(reloid, varattno); + + return makeVar(relid, varattno, vartypeid, type_mod, 0); } /* @@ -360,164 +588,33 @@ find_indexkey_var(int indexkey, List *tlist) * Build the path keys for a join relation constructed by mergejoin or * nestloop join. These keys should include all the path key vars of the * outer path (since the join will retain the ordering of the outer path) - * plus any vars of the inner path that are mergejoined to the outer vars. + * plus any vars of the inner path that are equijoined to the outer vars. * - * Per the discussion at the top of this file, mergejoined inner vars + * Per the discussion at the top of this file, equijoined inner vars * can be considered path keys of the result, just the same as the outer - * vars they were joined with. - * - * We can also use inner path vars as pathkeys of a nestloop join, but we - * must be careful that we only consider equijoin clauses and not general - * join clauses. For example, "t1.a < t2.b" might be a join clause of a - * nestloop, but it doesn't result in b acquiring the ordering of a! - * joinpath.c handles that problem by only passing this routine clauses - * that are marked mergejoinable, even if a nestloop join is being built. - * Therefore we only have 't1.a = t2.b' style clauses, and can expect that - * the inner var will acquire the outer's ordering no matter which join - * method is actually used. - * - * We drop pathkeys that are not vars of the join relation's tlist, - * on the assumption that they are not interesting to higher levels. - * (Is this correct?? To support expression pathkeys we might want to - * check that all vars mentioned in the key are in the tlist, instead.) - * - * All vars in the result are taken from the join relation's tlist, - * not from the given pathkeys or joinclauses. + * vars they were joined with; furthermore, it doesn't matter what kind + * of join algorithm is actually used. * * 'outer_pathkeys' is the list of the outer path's path keys * 'join_rel_tlist' is the target list of the join relation - * 'joinclauses' is the list of mergejoinable clauses to consider (note this - * is a list of RestrictInfos, not just bare qual clauses); can be NIL + * 'equi_key_list' is the query's list of pathkeyitem equivalence sets * * Returns the list of new path keys. - * */ List * build_join_pathkeys(List *outer_pathkeys, List *join_rel_tlist, - List *joinclauses) + List *equi_key_list) { - List *final_pathkeys = NIL; - List *i; - - foreach(i, outer_pathkeys) - { - List *outer_pathkey = lfirst(i); - List *new_pathkey; - - new_pathkey = build_join_pathkey(outer_pathkey, join_rel_tlist, - joinclauses); - /* if we can find no sortable vars for the n'th sort key, - * then we're done generating pathkeys; any subsequent sort keys - * no longer apply, since we can't represent the ordering properly. - */ - if (new_pathkey == NIL) - break; - final_pathkeys = lappend(final_pathkeys, new_pathkey); - } - return final_pathkeys; -} - -/* - * build_join_pathkey - * Generate an individual pathkey sublist, consisting of the outer vars - * already mentioned in 'pathkey' plus any inner vars that are joined to - * them (and thus can now also be considered path keys, per discussion - * at the top of this file). - * - * Note that each returned pathkey uses the var node found in - * 'join_rel_tlist' rather than the input pathkey or joinclause var node. - * (Is this important?) - * - * Returns a new pathkey (list of PathKeyItems). - */ -static List * -build_join_pathkey(List *pathkey, - List *join_rel_tlist, - List *joinclauses) -{ - List *new_pathkey = NIL; - List *i, - *j; - - foreach(i, pathkey) - { - PathKeyItem *key = (PathKeyItem *) lfirst(i); - Node *tlist_key; - - Assert(key && IsA(key, PathKeyItem)); - - tlist_key = matching_tlist_expr(key->key, join_rel_tlist); - if (tlist_key) - new_pathkey = lcons(makePathKeyItem(tlist_key, - key->sortop), - new_pathkey); - - foreach(j, joinclauses) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); - Expr *joinclause = restrictinfo->clause; - /* We assume the clause is a binary opclause... */ - Node *l = (Node *) get_leftop(joinclause); - Node *r = (Node *) get_rightop(joinclause); - Node *other_var = NULL; - Oid other_sortop = InvalidOid; - - if (equal(key->key, l)) - { - other_var = r; - other_sortop = restrictinfo->right_sortop; - } - else if (equal(key->key, r)) - { - other_var = l; - other_sortop = restrictinfo->left_sortop; - } - - if (other_var && other_sortop) - { - tlist_key = matching_tlist_expr(other_var, join_rel_tlist); - if (tlist_key) - new_pathkey = lcons(makePathKeyItem(tlist_key, - other_sortop), - new_pathkey); - } - } - } - - return new_pathkey; -} - -/* - * commute_pathkeys - * Attempt to commute the operators in a set of pathkeys, producing - * pathkeys that describe the reverse sort order (DESC instead of ASC). - * Returns TRUE if successful (all the operators have commutators). - * - * CAUTION: given pathkeys are modified in place, even if not successful!! - * Usually, caller should have just built or copied the pathkeys list to - * ensure there are no unwanted side-effects. - */ -bool -commute_pathkeys(List *pathkeys) -{ - List *i; - - foreach(i, pathkeys) - { - List *pathkey = lfirst(i); - List *j; - - foreach(j, pathkey) - { - PathKeyItem *key = lfirst(j); - - key->sortop = get_commutator(key->sortop); - if (key->sortop == InvalidOid) - return false; - } - } - return true; /* successful */ + /* + * This used to be quite a complex bit of code, but now that all + * pathkey sublists start out life canonicalized, we don't have to + * do a darn thing here! The inner-rel vars we used to need to add + * are *already* part of the outer pathkey! + * + * I'd remove the routine entirely, but maybe someday we'll need it... + */ + return outer_pathkeys; } /**************************************************************************** @@ -529,11 +626,18 @@ commute_pathkeys(List *pathkeys) * Generate a pathkeys list that represents the sort order specified * by a list of SortClauses (GroupClauses will work too!) * + * NB: the result is NOT in canonical form, but must be passed through + * canonicalize_pathkeys() before it can be used for comparisons or + * labeling relation sort orders. (We do things this way because + * union_planner needs to be able to construct requested pathkeys before + * the pathkey equivalence sets have been created for the query.) + * * 'sortclauses' is a list of SortClause or GroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ List * -make_pathkeys_for_sortclauses(List *sortclauses, List *tlist) +make_pathkeys_for_sortclauses(List *sortclauses, + List *tlist) { List *pathkeys = NIL; List *i; @@ -546,7 +650,11 @@ make_pathkeys_for_sortclauses(List *sortclauses, List *tlist) sortkey = get_sortgroupclause_expr(sortcl, tlist); pathkey = makePathKeyItem(sortkey, sortcl->sortop); - /* pathkey becomes a one-element sublist */ + /* + * The pathkey becomes a one-element sublist, for now; + * canonicalize_pathkeys() might replace it with a longer + * sublist later. + */ pathkeys = lappend(pathkeys, lcons(pathkey, NIL)); } return pathkeys; @@ -599,6 +707,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) { PathKeyItem *keyitem = lfirst(j); Node *key = keyitem->key; + Oid keyop = keyitem->sortop; List *k; foreach(k, restrictinfos) @@ -607,8 +716,10 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) Assert(restrictinfo->mergejoinoperator != InvalidOid); - if ((equal(key, get_leftop(restrictinfo->clause)) || - equal(key, get_rightop(restrictinfo->clause))) && + if (((keyop == restrictinfo->left_sortop && + equal(key, get_leftop(restrictinfo->clause))) || + (keyop == restrictinfo->right_sortop && + equal(key, get_rightop(restrictinfo->clause)))) && ! member(restrictinfo, mergeclauses)) { matched_restrictinfo = restrictinfo; @@ -645,7 +756,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses * that will be used in a merge join. * 'tlist' is a relation target list for either the inner or outer - * side of the proposed join rel. + * side of the proposed join rel. (Not actually needed anymore) * * Returns a pathkeys list that can be applied to the indicated relation. * @@ -654,7 +765,9 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) * just make the keys, eh? */ List * -make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist) +make_pathkeys_for_mergeclauses(Query *root, + List *mergeclauses, + List *tlist) { List *pathkeys = NIL; List *i; @@ -664,32 +777,24 @@ make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist) RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); Node *key; Oid sortop; + PathKeyItem *item; Assert(restrictinfo->mergejoinoperator != InvalidOid); /* * Find the key and sortop needed for this mergeclause. * - * We can use either side of the mergeclause, since we haven't yet - * committed to which side will be inner. + * Both sides of the mergeclause should appear in one of the + * query's pathkey equivalence classes, so it doesn't matter + * which one we use here. */ - key = matching_tlist_expr((Node *) get_leftop(restrictinfo->clause), - tlist); + key = (Node *) get_leftop(restrictinfo->clause); sortop = restrictinfo->left_sortop; - if (! key) - { - key = matching_tlist_expr((Node *) get_rightop(restrictinfo->clause), - tlist); - sortop = restrictinfo->right_sortop; - } - if (! key) - elog(ERROR, "make_pathkeys_for_mergeclauses: can't find key"); /* * Add a pathkey sublist for this sort item */ - pathkeys = lappend(pathkeys, - lcons(makePathKeyItem(key, sortop), - NIL)); + item = makePathKeyItem(key, sortop); + pathkeys = lappend(pathkeys, make_canonical_pathkey(root, item)); } return pathkeys; diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index ab0427ef32263d36eb468dc75af4f2dbf9dc6562..1e7dc43473b24f638a02263ae750c575ffb144b8 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.4 2000/02/07 04:40:59 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.5 2000/02/15 20:49:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,7 +36,7 @@ #include "parser/parsetree.h" #include "utils/lsyscache.h" -static List *create_tidscan_joinpaths(RelOptInfo *); +static void create_tidscan_joinpaths(RelOptInfo *rel); static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo); static bool isEvaluable(int varno, Node *node); static Node *TidequalClause(int varno, Expr *node); @@ -234,61 +234,54 @@ TidqualFromRestrictinfo(List *relids, List *restrictinfo) /* * create_tidscan_joinpaths - * Creates a path corresponding to a tid_direct scan, returning the - * pathnode. + * Create innerjoin paths if there are suitable joinclauses. * + * XXX does this actually work? */ -List * +static void create_tidscan_joinpaths(RelOptInfo *rel) { List *rlst = NIL, *lst; - TidPath *pathnode = (TidPath *) NULL; - List *restinfo, - *tideval; foreach (lst, rel->joininfo) { - JoinInfo *joininfo = (JoinInfo *)lfirst(lst); + JoinInfo *joininfo = (JoinInfo *) lfirst(lst); + List *restinfo, + *tideval; restinfo = joininfo->jinfo_restrictinfo; tideval = TidqualFromRestrictinfo(rel->relids, restinfo); if (length(tideval) == 1) { - pathnode = makeNode(TidPath); + TidPath *pathnode = makeNode(TidPath); pathnode->path.pathtype = T_TidScan; pathnode->path.parent = rel; pathnode->path.pathkeys = NIL; - pathnode->path.path_cost = cost_tidscan(rel, tideval); pathnode->tideval = tideval; pathnode->unjoined_relids = joininfo->unjoined_relids; + + cost_tidscan(&pathnode->path, rel, tideval); + rlst = lappend(rlst, pathnode); } } rel->innerjoin = nconc(rel->innerjoin, rlst); - return rlst; } /* * create_tidscan_paths - * Creates a path corresponding to a tid direct scan, returning the - * pathnode List. - * + * Creates paths corresponding to tid direct scans of the given rel. + * Candidate paths are added to the rel's pathlist (using add_path). */ -List * +void create_tidscan_paths(Query *root, RelOptInfo *rel) { - List *rlst = NIL; - TidPath *pathnode = (TidPath *) NULL; List *tideval = TidqualFromRestrictinfo(rel->relids, rel->baserestrictinfo); if (tideval) - pathnode = create_tidscan_path(rel, tideval); - if (pathnode) - rlst = lcons(pathnode, rlst); + add_path(rel, (Path *) create_tidscan_path(rel, tideval)); create_tidscan_joinpaths(rel); - - return rlst; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 97a021a2dd221fbf9deeaacc765099108dc077a9..55af1426fdca3e75bb7ec953da212a2eb31a65da 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.84 2000/02/07 04:41:00 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.85 2000/02/15 20:49:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,7 +57,9 @@ static Node *fix_indxqual_operand(Node *node, int baserelid, Form_pg_index index, Oid *opclass); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, - List *indxid, List *indxqual, List *indxqualorig); + List *indxid, List *indxqual, + List *indxqualorig, + ScanDirection indexscandir); static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, List *tideval); static NestLoop *make_nestloop(List *qptlist, List *qpqual, Plan *lefttree, @@ -427,9 +429,13 @@ create_indexscan_node(Query *root, baserelid, best_path->indexid, fixed_indxqual, - indxqual); + indxqual, + best_path->indexscandir); copy_path_costsize(&scan_node->scan.plan, &best_path->path); + /* set up rows estimate (just to make EXPLAIN output reasonable) */ + if (plan_rows < 1.0) + plan_rows = 1.0; scan_node->scan.plan.plan_rows = plan_rows; return scan_node; @@ -437,16 +443,14 @@ create_indexscan_node(Query *root, static TidScan * make_tidscan(List *qptlist, - List *qpqual, - Index scanrelid, - List *tideval) + List *qpqual, + Index scanrelid, + List *tideval) { TidScan *node = makeNode(TidScan); Plan *plan = &node->scan.plan; - plan->cost = 0; - plan->plan_rows = 0; - plan->plan_width = 0; + /* cost should be inserted by caller */ plan->state = (EState *) NULL; plan->targetlist = qptlist; plan->qual = qpqual; @@ -1038,13 +1042,15 @@ copy_path_costsize(Plan *dest, Path *src) { if (src) { - dest->cost = src->path_cost; + dest->startup_cost = src->startup_cost; + dest->total_cost = src->total_cost; dest->plan_rows = src->parent->rows; dest->plan_width = src->parent->width; } else { - dest->cost = 0; + dest->startup_cost = 0; + dest->total_cost = 0; dest->plan_rows = 0; dest->plan_width = 0; } @@ -1061,13 +1067,15 @@ copy_plan_costsize(Plan *dest, Plan *src) { if (src) { - dest->cost = src->cost; + dest->startup_cost = src->startup_cost; + dest->total_cost = src->total_cost; dest->plan_rows = src->plan_rows; dest->plan_width = src->plan_width; } else { - dest->cost = 0; + dest->startup_cost = 0; + dest->total_cost = 0; dest->plan_rows = 0; dest->plan_width = 0; } @@ -1130,7 +1138,7 @@ make_seqscan(List *qptlist, SeqScan *node = makeNode(SeqScan); Plan *plan = &node->plan; - copy_plan_costsize(plan, NULL); + /* cost should be inserted by caller */ plan->state = (EState *) NULL; plan->targetlist = qptlist; plan->qual = qpqual; @@ -1148,12 +1156,13 @@ make_indexscan(List *qptlist, Index scanrelid, List *indxid, List *indxqual, - List *indxqualorig) + List *indxqualorig, + ScanDirection indexscandir) { IndexScan *node = makeNode(IndexScan); Plan *plan = &node->scan.plan; - copy_plan_costsize(plan, NULL); + /* cost should be inserted by caller */ plan->state = (EState *) NULL; plan->targetlist = qptlist; plan->qual = qpqual; @@ -1163,7 +1172,7 @@ make_indexscan(List *qptlist, node->indxid = indxid; node->indxqual = indxqual; node->indxqualorig = indxqualorig; - node->indxorderdir = NoMovementScanDirection; + node->indxorderdir = indexscandir; node->scan.scanstate = (CommonScanState *) NULL; return node; @@ -1219,6 +1228,10 @@ make_hash(List *tlist, Var *hashkey, Plan *lefttree) Plan *plan = &node->plan; copy_plan_costsize(plan, lefttree); + /* For plausibility, make startup & total costs equal total cost of + * input plan; this only affects EXPLAIN display not decisions. + */ + plan->startup_cost = plan->total_cost; plan->state = (EState *) NULL; plan->targetlist = tlist; plan->qual = NULL; @@ -1255,9 +1268,12 @@ make_sort(List *tlist, Oid nonameid, Plan *lefttree, int keycount) { Sort *node = makeNode(Sort); Plan *plan = &node->plan; + Path sort_path; /* dummy for result of cost_sort */ - copy_plan_costsize(plan, lefttree); - plan->cost += cost_sort(NIL, plan->plan_rows, plan->plan_width); + copy_plan_costsize(plan, lefttree); /* only care about copying size */ + cost_sort(&sort_path, NIL, lefttree->plan_rows, lefttree->plan_width); + plan->startup_cost = sort_path.startup_cost + lefttree->total_cost; + plan->total_cost = sort_path.total_cost + lefttree->total_cost; plan->state = (EState *) NULL; plan->targetlist = tlist; plan->qual = NIL; @@ -1279,7 +1295,11 @@ make_material(List *tlist, Plan *plan = &node->plan; copy_plan_costsize(plan, lefttree); - /* XXX shouldn't we charge some additional cost for materialization? */ + /* For plausibility, make startup & total costs equal total cost of + * input plan; this only affects EXPLAIN display not decisions. + * XXX shouldn't we charge some additional cost for materialization? + */ + plan->startup_cost = plan->total_cost; plan->state = (EState *) NULL; plan->targetlist = tlist; plan->qual = NIL; @@ -1292,30 +1312,38 @@ make_material(List *tlist, } Agg * -make_agg(List *tlist, Plan *lefttree) +make_agg(List *tlist, List *qual, Plan *lefttree) { Agg *node = makeNode(Agg); + Plan *plan = &node->plan; - copy_plan_costsize(&node->plan, lefttree); + copy_plan_costsize(plan, lefttree); + /* + * Charge one cpu_operator_cost per aggregate function per input tuple. + */ + plan->total_cost += cpu_operator_cost * plan->plan_rows * + (length(pull_agg_clause((Node *) tlist)) + + length(pull_agg_clause((Node *) qual))); /* - * The tuple width from the input node is OK, as is the cost (we are - * ignoring the cost of computing the aggregate; is there any value - * in accounting for it?). But the tuple count is bogus. We will - * produce a single tuple if the input is not a Group, and a tuple - * per group otherwise. For now, estimate the number of groups as - * 10% of the number of tuples --- bogus, but how to do better? + * We will produce a single output tuple if the input is not a Group, + * and a tuple per group otherwise. For now, estimate the number of + * groups as 10% of the number of tuples --- bogus, but how to do better? * (Note we assume the input Group node is in "tuplePerGroup" mode, * so it didn't reduce its row count already.) */ if (IsA(lefttree, Group)) - node->plan.plan_rows *= 0.1; + plan->plan_rows *= 0.1; else - node->plan.plan_rows = 1; - node->plan.state = (EState *) NULL; - node->plan.qual = NULL; - node->plan.targetlist = tlist; - node->plan.lefttree = lefttree; - node->plan.righttree = (Plan *) NULL; + { + plan->plan_rows = 1; + plan->startup_cost = plan->total_cost; + } + + plan->state = (EState *) NULL; + plan->qual = qual; + plan->targetlist = tlist; + plan->lefttree = lefttree; + plan->righttree = (Plan *) NULL; return node; } @@ -1328,8 +1356,14 @@ make_group(List *tlist, Plan *lefttree) { Group *node = makeNode(Group); + Plan *plan = &node->plan; - copy_plan_costsize(&node->plan, lefttree); + copy_plan_costsize(plan, lefttree); + /* + * Charge one cpu_operator_cost per comparison per input tuple. + * We assume all columns get compared at most of the tuples. + */ + plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp; /* * If tuplePerGroup (which is named exactly backwards) is true, * we will return all the input tuples, so the input node's row count @@ -1338,12 +1372,13 @@ make_group(List *tlist, * tuples --- bogus, but how to do better? */ if (! tuplePerGroup) - node->plan.plan_rows *= 0.1; - node->plan.state = (EState *) NULL; - node->plan.qual = NULL; - node->plan.targetlist = tlist; - node->plan.lefttree = lefttree; - node->plan.righttree = (Plan *) NULL; + plan->plan_rows *= 0.1; + + plan->state = (EState *) NULL; + plan->qual = NULL; + plan->targetlist = tlist; + plan->lefttree = lefttree; + plan->righttree = (Plan *) NULL; node->tuplePerGroup = tuplePerGroup; node->numCols = ngrp; node->grpColIdx = grpColIdx; @@ -1367,11 +1402,17 @@ make_unique(List *tlist, Plan *lefttree, List *distinctList) List *slitem; copy_plan_costsize(plan, lefttree); + /* + * Charge one cpu_operator_cost per comparison per input tuple. + * We assume all columns get compared at most of the tuples. + */ + plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols; /* * As for Group, we make the unsupported assumption that there will be * 10% as many tuples out as in. */ plan->plan_rows *= 0.1; + plan->state = (EState *) NULL; plan->targetlist = tlist; plan->qual = NIL; diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index b94cc3e4b425e532d1f8683c62df909143fdbaa8..6b6f3971719d05b0eefec89cb022a6daa29c7998 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.44 2000/02/07 04:41:00 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.45 2000/02/15 20:49:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,6 +21,7 @@ #include "optimizer/cost.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" +#include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" #include "optimizer/var.h" @@ -31,7 +32,6 @@ static void add_restrict_and_join_to_rel(Query *root, Node *clause); static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, Relids join_relids); static void add_vars_to_targetlist(Query *root, List *vars); -static void set_restrictinfo_joininfo(RestrictInfo *restrictinfo); static void check_mergejoinable(RestrictInfo *restrictinfo); static void check_hashjoinable(RestrictInfo *restrictinfo); @@ -150,7 +150,9 @@ add_restrict_and_join_to_rels(Query *root, List *clauses) * Add clause information to either the 'RestrictInfo' or 'JoinInfo' field * (depending on whether the clause is a join) of each base relation * mentioned in the clause. A RestrictInfo node is created and added to - * the appropriate list for each rel. + * the appropriate list for each rel. Also, if the clause uses a + * mergejoinable operator, enter the left- and right-side expressions + * into the query's lists of equijoined vars. */ static void add_restrict_and_join_to_rel(Query *root, Node *clause) @@ -181,14 +183,29 @@ add_restrict_and_join_to_rel(Query *root, Node *clause) rel->baserestrictinfo = lcons(restrictinfo, rel->baserestrictinfo); + /* + * Check for a "mergejoinable" clause even though it's not a join + * clause. This is so that we can recognize that "a.x = a.y" makes + * x and y eligible to be considered equal, even when they belong + * to the same rel. Without this, we would not recognize that + * "a.x = a.y AND a.x = b.z AND a.y = c.q" allows us to consider + * z and q equal after their rels are joined. + */ + check_mergejoinable(restrictinfo); } else { /* * 'clause' is a join clause, since there is more than one atom in * the relid list. Set additional RestrictInfo fields for joining. + * + * We need the merge info whether or not mergejoin is enabled (for + * constructing equijoined-var lists), but we don't bother setting + * hash info if hashjoin is disabled. */ - set_restrictinfo_joininfo(restrictinfo); + check_mergejoinable(restrictinfo); + if (enable_hashjoin) + check_hashjoinable(restrictinfo); /* * Add clause to the join lists of all the relevant * relations. (If, perchance, 'clause' contains NO vars, then @@ -202,6 +219,15 @@ add_restrict_and_join_to_rel(Query *root, Node *clause) */ add_vars_to_targetlist(root, vars); } + + /* + * If the clause has a mergejoinable operator, then the two sides + * represent equivalent PathKeyItems for path keys: any path that is + * sorted by one side will also be sorted by the other (after joining, + * that is). Record the key equivalence for future use. + */ + if (restrictinfo->mergejoinoperator != InvalidOid) + add_equijoined_keys(root, restrictinfo); } /* @@ -247,24 +273,10 @@ add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, /***************************************************************************** * - * JOININFO + * CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES * *****************************************************************************/ -/* - * set_restrictinfo_joininfo - * Examine a RestrictInfo that has been determined to be a join clause, - * and set the merge and hash info fields if it can be merge/hash joined. - */ -static void -set_restrictinfo_joininfo(RestrictInfo *restrictinfo) -{ - if (enable_mergejoin) - check_mergejoinable(restrictinfo); - if (enable_hashjoin) - check_hashjoinable(restrictinfo); -} - /* * check_mergejoinable * If the restrictinfo's clause is mergejoinable, set the mergejoin @@ -272,10 +284,7 @@ set_restrictinfo_joininfo(RestrictInfo *restrictinfo) * * Currently, we support mergejoin for binary opclauses where * both operands are simple Vars and the operator is a mergejoinable - * operator. (Note: since we are only examining clauses that were - * classified as joins, it is certain that the two Vars belong to - * different relations... if we accepted more general clause structures - * we might need to check that the two sides refer to different rels...) + * operator. */ static void check_mergejoinable(RestrictInfo *restrictinfo) @@ -320,10 +329,7 @@ check_mergejoinable(RestrictInfo *restrictinfo) * * Currently, we support hashjoin for binary opclauses where * both operands are simple Vars and the operator is a hashjoinable - * operator. (Note: since we are only examining clauses that were - * classified as joins, it is certain that the two Vars belong to - * different relations... if we accepted more general clause structures - * we might need to check that the two sides refer to different rels...) + * operator. */ static void check_hashjoinable(RestrictInfo *restrictinfo) diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index a414a910fefe8e55fe5f3bee9e4cd5424f367307..cfa134a3889fea78419bfa7c0eab2baaee99ba48 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.51 2000/02/07 04:41:00 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.52 2000/02/15 20:49:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/prep.h" @@ -27,10 +28,11 @@ #include "utils/lsyscache.h" -static Plan *subplanner(Query *root, List *flat_tlist, List *qual); +static Plan *subplanner(Query *root, List *flat_tlist, List *qual, + double tuple_fraction); -/* +/*-------------------- * query_planner * Routine to create a query plan. It does so by first creating a * subplan for the topmost level of attributes in the query. Then, @@ -41,25 +43,41 @@ static Plan *subplanner(Query *root, List *flat_tlist, List *qual); * be placed where and any relation level qualifications to be * satisfied. * - * tlist is the target list of the query (do NOT use root->targetList!) - * qual is the qualification of the query (likewise!) + * tlist is the target list of the query (do NOT use root->targetList!) + * qual is the qualification of the query (likewise!) + * tuple_fraction is the fraction of tuples we expect will be retrieved + * + * Note: the Query node now also includes a query_pathkeys field, which + * is both an input and an output of query_planner(). The input value + * signals query_planner that the indicated sort order is wanted in the + * final output plan. The output value is the actual pathkeys of the + * selected path. This might not be the same as what the caller requested; + * the caller must do pathkeys_contained_in() to decide whether an + * explicit sort is still needed. (The main reason query_pathkeys is a + * Query field and not a passed parameter is that the low-level routines + * in indxpath.c need to see it.) The pathkeys value passed to query_planner + * has not yet been "canonicalized", since the necessary info does not get + * computed until subplanner() scans the qual clauses. We canonicalize it + * inside subplanner() as soon as that task is done. The output value + * will be in canonical form as well. * - * Note: the Query node now also includes a query_pathkeys field, which - * is both an input and an output of query_planner(). The input value - * signals query_planner that the indicated sort order is wanted in the - * final output plan. The output value is the actual pathkeys of the - * selected path. This might not be the same as what the caller requested; - * the caller must do pathkeys_contained_in() to decide whether an - * explicit sort is still needed. (The main reason query_pathkeys is a - * Query field and not a passed parameter is that the low-level routines - * in indxpath.c need to see it.) + * tuple_fraction is interpreted as follows: + * 0 (or less): expect all tuples to be retrieved (normal case) + * 0 < tuple_fraction < 1: expect the given fraction of tuples available + * from the plan to be retrieved + * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples + * expected to be retrieved (ie, a LIMIT specification) + * Note that while this routine and its subroutines treat a negative + * tuple_fraction the same as 0, union_planner has a different interpretation. * - * Returns a query plan. + * Returns a query plan. + *-------------------- */ Plan * query_planner(Query *root, List *tlist, - List *qual) + List *qual, + double tuple_fraction) { List *constant_qual = NIL; List *var_only_tlist; @@ -149,7 +167,7 @@ query_planner(Query *root, /* * Choose the best access path and build a plan for it. */ - subplan = subplanner(root, var_only_tlist, qual); + subplan = subplanner(root, var_only_tlist, qual, tuple_fraction); /* * Build a result node to control the plan if we have constant quals. @@ -192,33 +210,50 @@ query_planner(Query *root, * Subplanner creates an entire plan consisting of joins and scans * for processing a single level of attributes. * - * flat_tlist is the flattened target list - * qual is the qualification to be satisfied + * flat_tlist is the flattened target list + * qual is the qualification to be satisfied + * tuple_fraction is the fraction of tuples we expect will be retrieved * - * Returns a subplan. + * See query_planner() comments about the interpretation of tuple_fraction. * + * Returns a subplan. */ static Plan * subplanner(Query *root, List *flat_tlist, - List *qual) + List *qual, + double tuple_fraction) { RelOptInfo *final_rel; - Cost cheapest_cost; - Path *sortedpath; + Path *cheapestpath; + Path sort_path; /* dummy for result of cost_sort */ + Path *presortedpath; /* * Initialize the targetlist and qualification, adding entries to * base_rel_list as relation references are found (e.g., in the - * qualification, the targetlist, etc.) + * qualification, the targetlist, etc.). Restrict and join clauses + * are added to appropriate lists belonging to the mentioned relations, + * and we also build lists of equijoined keys for pathkey construction. */ root->base_rel_list = NIL; root->join_rel_list = NIL; + root->equi_key_list = NIL; make_var_only_tlist(root, flat_tlist); add_restrict_and_join_to_rels(root, qual); add_missing_rels_to_query(root); + /* + * We should now have all the pathkey equivalence sets built, + * so it's now possible to convert the requested query_pathkeys + * to canonical form. + */ + root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); + + /* + * Ready to do the primary planning. + */ final_rel = make_one_rel(root); if (! final_rel) @@ -258,96 +293,81 @@ subplanner(Query *root, foreach(pathnode, final_rel->pathlist) { if (xfunc_do_predmig((Path *) lfirst(pathnode))) - set_cheapest(final_rel, final_rel->pathlist); + set_cheapest(final_rel); } } #endif /* - * Determine the cheapest path and create a subplan to execute it. + * Now that we have an estimate of the final rel's size, we can convert + * a tuple_fraction specified as an absolute count (ie, a LIMIT option) + * into a fraction of the total tuples. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= final_rel->rows; + + /* + * Determine the cheapest path, independently of any ordering + * considerations. We do, however, take into account whether the + * whole plan is expected to be evaluated or not. + */ + if (tuple_fraction <= 0.0 || tuple_fraction >= 1.0) + cheapestpath = final_rel->cheapest_total_path; + else + cheapestpath = + get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, + NIL, + tuple_fraction); + + Assert(cheapestpath != NULL); + + /* + * Select the best path and create a subplan to execute it. * * If no special sort order is wanted, or if the cheapest path is - * already appropriately ordered, just use the cheapest path. + * already appropriately ordered, we use the cheapest path found above. */ if (root->query_pathkeys == NIL || pathkeys_contained_in(root->query_pathkeys, - final_rel->cheapestpath->pathkeys)) + cheapestpath->pathkeys)) { - root->query_pathkeys = final_rel->cheapestpath->pathkeys; - return create_plan(root, final_rel->cheapestpath); + root->query_pathkeys = cheapestpath->pathkeys; + return create_plan(root, cheapestpath); } /* * Otherwise, look to see if we have an already-ordered path that is - * cheaper than doing an explicit sort on cheapestpath. + * cheaper than doing an explicit sort on the cheapest-total-cost path. */ - cheapest_cost = final_rel->cheapestpath->path_cost + - cost_sort(root->query_pathkeys, final_rel->rows, final_rel->width); - - sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist, - root->query_pathkeys, - false); - if (sortedpath) + cheapestpath = final_rel->cheapest_total_path; + cost_sort(&sort_path, root->query_pathkeys, + final_rel->rows, final_rel->width); + sort_path.startup_cost += cheapestpath->total_cost; + sort_path.total_cost += cheapestpath->total_cost; + + presortedpath = + get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, + root->query_pathkeys, + tuple_fraction); + if (presortedpath) { - if (sortedpath->path_cost <= cheapest_cost) + if (compare_fractional_path_costs(presortedpath, &sort_path, + tuple_fraction) <= 0) { /* Found a better presorted path, use it */ - root->query_pathkeys = sortedpath->pathkeys; - return create_plan(root, sortedpath); + root->query_pathkeys = presortedpath->pathkeys; + return create_plan(root, presortedpath); } /* otherwise, doing it the hard way is still cheaper */ } - else - { - /* - * If we found no usable presorted path at all, it is possible - * that the user asked for descending sort order. Check to see - * if we can satisfy the pathkeys by using a backwards indexscan. - * To do this, we commute all the operators in the pathkeys and - * then look for a matching path that is an IndexPath. - */ - List *commuted_pathkeys = copyObject(root->query_pathkeys); - - if (commute_pathkeys(commuted_pathkeys)) - { - /* pass 'true' to force only IndexPaths to be considered */ - sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist, - commuted_pathkeys, - true); - if (sortedpath && sortedpath->path_cost <= cheapest_cost) - { - /* - * Kluge here: since IndexPath has no representation for - * backwards scan, we have to convert to Plan format and - * then poke the result. - */ - Plan *sortedplan = create_plan(root, sortedpath); - List *sortedpathkeys; - - Assert(IsA(sortedplan, IndexScan)); - ((IndexScan *) sortedplan)->indxorderdir = BackwardScanDirection; - /* - * Need to generate commuted keys representing the actual - * sort order. This should succeed, probably, but just in - * case it does not, use the original root->query_pathkeys - * as a conservative approximation. - */ - sortedpathkeys = copyObject(sortedpath->pathkeys); - if (commute_pathkeys(sortedpathkeys)) - root->query_pathkeys = sortedpathkeys; - - return sortedplan; - } - } - } /* - * Nothing for it but to sort the cheapestpath --- but we let the - * caller do that. union_planner has to be able to add a sort node + * Nothing for it but to sort the cheapest-total-cost path --- but we let + * the caller do that. union_planner has to be able to add a sort node * anyway, so no need for extra code here. (Furthermore, the given - * pathkeys might involve something we can't compute here, such as - * an aggregate function...) + * pathkeys might involve something we can't compute here, such as an + * aggregate function...) */ - root->query_pathkeys = final_rel->cheapestpath->pathkeys; - return create_plan(root, final_rel->cheapestpath); + root->query_pathkeys = cheapestpath->pathkeys; + return create_plan(root, cheapestpath); } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 28483fd4734966caab4ec8ee3bbeadbe6a6d099c..cf400f8df1bb84b77bc176657f3c86f96e96f005 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.74 2000/01/27 18:11:31 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.75 2000/02/15 20:49:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -61,7 +61,7 @@ planner(Query *parse) transformKeySetQuery(parse); - result_plan = union_planner(parse); + result_plan = union_planner(parse, -1.0 /* default case */); Assert(PlannerQueryLevel == 1); if (PlannerPlanId > 0) @@ -76,23 +76,39 @@ planner(Query *parse) return result_plan; } -/* +/*-------------------- * union_planner + * Invokes the planner on union-type queries (both regular UNIONs and + * appends produced by inheritance), recursing if necessary to get them + * all, then processes normal plans. * - * Invokes the planner on union queries if there are any left, - * recursing if necessary to get them all, then processes normal plans. + * parse is the querytree produced by the parser & rewriter. + * tuple_fraction is the fraction of tuples we expect will be retrieved * - * Returns a query plan. + * tuple_fraction is interpreted as follows: + * < 0: determine fraction by inspection of query (normal case) + * 0: expect all tuples to be retrieved + * 0 < tuple_fraction < 1: expect the given fraction of tuples available + * from the plan to be retrieved + * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples + * expected to be retrieved (ie, a LIMIT specification) + * The normal case is to pass -1, but some callers pass values >= 0 to + * override this routine's determination of the appropriate fraction. * + * Returns a query plan. + *-------------------- */ Plan * -union_planner(Query *parse) +union_planner(Query *parse, + double tuple_fraction) { List *tlist = parse->targetList; List *rangetable = parse->rtable; Plan *result_plan = (Plan *) NULL; AttrNumber *groupColIdx = NULL; List *current_pathkeys = NIL; + List *group_pathkeys; + List *sort_pathkeys; Index rt_index; /* @@ -139,6 +155,12 @@ union_planner(Query *parse) * Actually, for a normal UNION we have done an explicit sort; ought * to change interface to plan_union_queries to pass that info back! */ + + /* Calculate pathkeys that represent grouping/ordering requirements */ + group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause, + tlist); + sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, + tlist); } else if ((rt_index = first_inherit_rt_entry(rangetable)) != -1) { @@ -176,6 +198,12 @@ union_planner(Query *parse) * We leave current_pathkeys NIL indicating we do not know sort order * of the Append-ed results. */ + + /* Calculate pathkeys that represent grouping/ordering requirements */ + group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause, + tlist); + sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, + tlist); } else { @@ -229,32 +257,131 @@ union_planner(Query *parse) */ sub_tlist = make_subplanTargetList(parse, tlist, &groupColIdx); + /* Calculate pathkeys that represent grouping/ordering requirements */ + group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause, + tlist); + sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, + tlist); + /* * Figure out whether we need a sorted result from query_planner. * * If we have a GROUP BY clause, then we want a result sorted * properly for grouping. Otherwise, if there is an ORDER BY clause, - * we want to sort by the ORDER BY clause. + * we want to sort by the ORDER BY clause. (Note: if we have both, + * and ORDER BY is a superset of GROUP BY, it would be tempting to + * request sort by ORDER BY --- but that might just leave us failing + * to exploit an available sort order at all. Needs more thought...) */ if (parse->groupClause) + parse->query_pathkeys = group_pathkeys; + else if (parse->sortClause) + parse->query_pathkeys = sort_pathkeys; + else + parse->query_pathkeys = NIL; + + /* + * Figure out whether we expect to retrieve all the tuples that the + * plan can generate, or to stop early due to a LIMIT or other + * factors. If the caller passed a value >= 0, believe that value, + * else do our own examination of the query context. + */ + if (tuple_fraction < 0.0) { - parse->query_pathkeys = - make_pathkeys_for_sortclauses(parse->groupClause, tlist); + /* Initial assumption is we need all the tuples */ + tuple_fraction = 0.0; + /* + * Check for a LIMIT. + * + * For now, we deliberately ignore the OFFSET clause, so that + * queries with the same LIMIT and different OFFSETs will get + * the same queryplan and therefore generate consistent results + * (to the extent the planner can guarantee that, anyway). + * XXX Perhaps it would be better to use the OFFSET too, and tell + * users to specify ORDER BY if they want consistent results + * across different LIMIT queries. + */ + if (parse->limitCount != NULL) + { + if (IsA(parse->limitCount, Const)) + { + Const *ccount = (Const *) parse->limitCount; + tuple_fraction = (double) ((int) (ccount->constvalue)); + /* the constant can legally be either 0 ("ALL") or a + * positive integer; either is consistent with our + * conventions for tuple_fraction. + */ + } + else + { + /* It's a PARAM ... don't know exactly what the limit + * will be, but for lack of a better idea assume 10% + * of the plan's result is wanted. + */ + tuple_fraction = 0.10; + } + } + /* + * Check for a retrieve-into-portal, ie DECLARE CURSOR. + * + * We have no real idea how many tuples the user will ultimately + * FETCH from a cursor, but it seems a good bet that he doesn't + * want 'em all. Optimize for 10% retrieval (you gotta better + * number?) + */ + if (parse->isPortal) + tuple_fraction = 0.10; } - else if (parse->sortClause) + /* + * Adjust tuple_fraction if we see that we are going to apply + * grouping/aggregation/etc. This is not overridable by the + * caller, since it reflects plan actions that this routine + * will certainly take, not assumptions about context. + */ + if (parse->groupClause) { - parse->query_pathkeys = - make_pathkeys_for_sortclauses(parse->sortClause, tlist); + /* + * In GROUP BY mode, we have the little problem that we don't + * really know how many input tuples will be needed to make a + * group, so we can't translate an output LIMIT count into an + * input count. For lack of a better idea, assume 10% of the + * input data will be processed if there is any output limit. + */ + if (tuple_fraction > 0.0) + tuple_fraction = 0.10; + /* + * If both GROUP BY and ORDER BY are specified, we will need + * two levels of sort --- and, therefore, certainly need to + * read all the input tuples --- unless ORDER BY is a subset + * of GROUP BY. (Although we are comparing non-canonicalized + * pathkeys here, it should be OK since they will both contain + * only single-element sublists at this point. See pathkeys.c.) + */ + if (parse->groupClause && parse->sortClause && + ! pathkeys_contained_in(sort_pathkeys, group_pathkeys)) + tuple_fraction = 0.0; } - else + else if (parse->hasAggs) { - parse->query_pathkeys = NIL; + /* Ungrouped aggregate will certainly want all the input tuples. */ + tuple_fraction = 0.0; + } + else if (parse->distinctClause) + { + /* + * SELECT DISTINCT, like GROUP, will absorb an unpredictable + * number of input tuples per output tuple. So, fall back to + * our same old 10% default... + */ + if (tuple_fraction > 0.0) + tuple_fraction = 0.10; } /* Generate the (sub) plan */ result_plan = query_planner(parse, sub_tlist, - (List *) parse->qual); + (List *) parse->qual, + tuple_fraction); /* query_planner returns actual sort order (which is not * necessarily what we requested) in query_pathkeys. @@ -266,6 +393,13 @@ union_planner(Query *parse) if (! result_plan) elog(ERROR, "union_planner: failed to create plan"); + /* + * We couldn't canonicalize group_pathkeys and sort_pathkeys before + * running query_planner(), so do it now. + */ + group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys); + sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); + /* * If we have a GROUP BY clause, insert a group node (plus the * appropriate sort node, if necessary). @@ -274,7 +408,6 @@ union_planner(Query *parse) { bool tuplePerGroup; List *group_tlist; - List *group_pathkeys; bool is_sorted; /* @@ -300,8 +433,6 @@ union_planner(Query *parse) * Figure out whether the path result is already ordered the way we * need it --- if so, no need for an explicit sort step. */ - group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause, - tlist); if (pathkeys_contained_in(group_pathkeys, current_pathkeys)) { is_sorted = true; /* no sort needed now */ @@ -352,15 +483,15 @@ union_planner(Query *parse) } /* - * If aggregate is present, insert the agg node + * If aggregate is present, insert the Agg node + * + * HAVING clause, if any, becomes qual of the Agg node */ if (parse->hasAggs) { - result_plan = (Plan *) make_agg(tlist, result_plan); - - /* HAVING clause, if any, becomes qual of the Agg node */ - result_plan->qual = (List *) parse->havingQual; - + result_plan = (Plan *) make_agg(tlist, + (List *) parse->havingQual, + result_plan); /* Note: Agg does not affect any existing sort order of the tuples */ } @@ -370,10 +501,6 @@ union_planner(Query *parse) */ if (parse->sortClause) { - List *sort_pathkeys; - - sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, - tlist); if (! pathkeys_contained_in(sort_pathkeys, current_pathkeys)) { result_plan = make_sortplan(tlist, parse->sortClause, result_plan); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index da95a2df41f6796f739848cbccf60a5e858b3d05..2790b2740b69ba1f5e0280cba6e4bb375f25db61 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.27 2000/01/26 05:56:38 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.28 2000/02/15 20:49:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -123,6 +123,7 @@ static Node * make_subplan(SubLink *slink) { SubPlan *node = makeNode(SubPlan); + double tuple_fraction; Plan *plan; List *lst; Node *result; @@ -132,7 +133,26 @@ make_subplan(SubLink *slink) PlannerQueryLevel++; /* we becomes child */ - node->plan = plan = union_planner((Query *) slink->subselect); + /* + * For an EXISTS subplan, tell lower-level planner to expect that + * only the first tuple will be retrieved. For ALL, ANY, and MULTIEXPR + * subplans, we will be able to stop evaluating if the test condition + * fails, so very often not all the tuples will be retrieved; for lack + * of a better idea, specify 50% retrieval. For EXPR_SUBLINK use default + * behavior. + * + * NOTE: if you change these numbers, also change cost_qual_eval_walker + * in costsize.c. + */ + if (slink->subLinkType == EXISTS_SUBLINK) + tuple_fraction = 1.0; /* just like a LIMIT 1 */ + else if (slink->subLinkType == EXPR_SUBLINK) + tuple_fraction = -1.0; /* default behavior */ + else + tuple_fraction = 0.5; /* 50% */ + + node->plan = plan = union_planner((Query *) slink->subselect, + tuple_fraction); /* * Assign subPlan, extParam and locParam to plan nodes. At the moment, diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 9a86cb23488dd051012213c8e522bdee6a70324b..10a48c666e66ca7645bab4e5404237901dacec59 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.44 2000/02/15 03:37:26 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.45 2000/02/15 20:49:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -122,28 +122,35 @@ plan_union_queries(Query *parse) /* Is this a simple one */ if (!union_all_found || !union_found || - /* A trailing UNION negates the affect of earlier UNION ALLs */ + /* A trailing UNION negates the effect of earlier UNION ALLs */ !last_union_all_flag) { List *hold_unionClause = parse->unionClause; + double tuple_fraction = -1.0; /* default processing */ - /* we will do this later, so don't do it now */ + /* we will do sorting later, so don't do it now */ if (!union_all_found || !last_union_all_flag) { parse->sortClause = NIL; parse->distinctClause = NIL; + /* + * force lower-level planning to assume that all tuples will + * be retrieved, even if it sees a LIMIT in the query node. + */ + tuple_fraction = 0.0; } parse->unionClause = NIL; /* prevent recursion */ - union_plans = lcons(union_planner(parse), NIL); + union_plans = lcons(union_planner(parse, tuple_fraction), NIL); union_rts = lcons(parse->rtable, NIL); foreach(ulist, hold_unionClause) { Query *union_query = lfirst(ulist); - union_plans = lappend(union_plans, union_planner(union_query)); + union_plans = lappend(union_plans, + union_planner(union_query, tuple_fraction)); union_rts = lappend(union_rts, union_query->rtable); } } @@ -165,9 +172,12 @@ plan_union_queries(Query *parse) /* * Recursion, but UNION only. The last one is a UNION, so it will - * not come here in recursion, + * not come here in recursion. + * + * XXX is it OK to pass default -1 to union_planner in this path, + * or should we force a tuple_fraction value? */ - union_plans = lcons(union_planner(parse), NIL); + union_plans = lcons(union_planner(parse, -1.0), NIL); union_rts = lcons(parse->rtable, NIL); /* Append the remaining UNION ALLs */ @@ -175,7 +185,8 @@ plan_union_queries(Query *parse) { Query *union_all_query = lfirst(ulist); - union_plans = lappend(union_plans, union_planner(union_all_query)); + union_plans = lappend(union_plans, + union_planner(union_all_query, -1.0)); union_rts = lappend(union_rts, union_all_query->rtable); } } @@ -295,6 +306,7 @@ plan_inherit_query(Relids relids, List *union_plans = NIL; List *union_rtentries = NIL; List *save_tlist = root->targetList; + double tuple_fraction; List *i; /* @@ -303,6 +315,17 @@ plan_inherit_query(Relids relids, */ root->targetList = NIL; + /* + * If we are going to need sorting or grouping at the top level, + * force lower-level planners to assume that all tuples will be + * retrieved. + */ + if (root->distinctClause || root->sortClause || + root->groupClause || root->hasAggs) + tuple_fraction = 0.0; /* will need all tuples from each subplan */ + else + tuple_fraction = -1.0; /* default behavior is OK (I think) */ + foreach(i, relids) { int relid = lfirsti(i); @@ -344,7 +367,8 @@ plan_inherit_query(Relids relids, relid, new_root); - union_plans = lappend(union_plans, union_planner(new_root)); + union_plans = lappend(union_plans, + union_planner(new_root, tuple_fraction)); union_rtentries = lappend(union_rtentries, new_rt_entry); } @@ -551,14 +575,17 @@ make_append(List *appendplans, node->unionrtables = unionrtables; node->inheritrelid = rt_index; node->inheritrtable = inheritrtable; - node->plan.cost = 0; + node->plan.startup_cost = 0; + node->plan.total_cost = 0; node->plan.plan_rows = 0; node->plan.plan_width = 0; foreach(subnode, appendplans) { Plan *subplan = (Plan *) lfirst(subnode); - node->plan.cost += subplan->cost; + if (subnode == appendplans) /* first node? */ + node->plan.startup_cost = subplan->startup_cost; + node->plan.total_cost += subplan->total_cost; node->plan.plan_rows += subplan->plan_rows; if (node->plan.plan_width < subplan->plan_width) node->plan.plan_width = subplan->plan_width; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 7c3c20b855f0327ae097eedb25f878f64bd0d72a..ba991388de0962d7a234c2d3876e90fde93a6e62 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.59 2000/02/07 04:41:01 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.60 2000/02/15 20:49:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,67 +29,122 @@ *****************************************************************************/ /* - * path_is_cheaper - * Returns t iff 'path1' is cheaper than 'path2'. + * compare_path_costs + * Return -1, 0, or +1 according as path1 is cheaper, the same cost, + * or more expensive than path2 for the specified criterion. + */ +int +compare_path_costs(Path *path1, Path *path2, CostSelector criterion) +{ + if (criterion == STARTUP_COST) + { + if (path1->startup_cost < path2->startup_cost) + return -1; + if (path1->startup_cost > path2->startup_cost) + return +1; + /* + * If paths have the same startup cost (not at all unlikely), + * order them by total cost. + */ + if (path1->total_cost < path2->total_cost) + return -1; + if (path1->total_cost > path2->total_cost) + return +1; + } + else + { + if (path1->total_cost < path2->total_cost) + return -1; + if (path1->total_cost > path2->total_cost) + return +1; + /* + * If paths have the same total cost, order them by startup cost. + */ + if (path1->startup_cost < path2->startup_cost) + return -1; + if (path1->startup_cost > path2->startup_cost) + return +1; + } + return 0; +} + +/* + * compare_path_fractional_costs + * Return -1, 0, or +1 according as path1 is cheaper, the same cost, + * or more expensive than path2 for fetching the specified fraction + * of the total tuples. * + * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the + * path with the cheaper total_cost. */ -bool -path_is_cheaper(Path *path1, Path *path2) +int +compare_fractional_path_costs(Path *path1, Path *path2, + double fraction) { - return (bool) (path1->path_cost < path2->path_cost); + Cost cost1, + cost2; + + if (fraction <= 0.0 || fraction >= 1.0) + return compare_path_costs(path1, path2, TOTAL_COST); + cost1 = path1->startup_cost + + fraction * (path1->total_cost - path1->startup_cost); + cost2 = path2->startup_cost + + fraction * (path2->total_cost - path2->startup_cost); + if (cost1 < cost2) + return -1; + if (cost1 > cost2) + return +1; + return 0; } /* * set_cheapest - * Finds the minimum cost path from among a relation's paths. + * Find the minimum-cost paths from among a relation's paths, + * and save them in the rel's cheapest-path fields. * - * 'parent_rel' is the parent relation - * 'pathlist' is a list of path nodes corresponding to 'parent_rel' - * - * Returns and sets the relation entry field with the pathnode that - * is minimum. + * This is normally called only after we've finished constructing the path + * list for the rel node. * + * If we find two paths of identical costs, try to keep the better-sorted one. + * The paths might have unrelated sort orderings, in which case we can only + * guess which might be better to keep, but if one is superior then we + * definitely should keep it. */ -Path * -set_cheapest(RelOptInfo *parent_rel, List *pathlist) +void +set_cheapest(RelOptInfo *parent_rel) { + List *pathlist = parent_rel->pathlist; List *p; - Path *cheapest_so_far; + Path *cheapest_startup_path; + Path *cheapest_total_path; Assert(IsA(parent_rel, RelOptInfo)); Assert(pathlist != NIL); - cheapest_so_far = (Path *) lfirst(pathlist); + cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist); foreach(p, lnext(pathlist)) { Path *path = (Path *) lfirst(p); - - if (path_is_cheaper(path, cheapest_so_far)) - cheapest_so_far = path; + int cmp; + + cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST); + if (cmp > 0 || + (cmp == 0 && + compare_pathkeys(cheapest_startup_path->pathkeys, + path->pathkeys) == PATHKEYS_BETTER2)) + cheapest_startup_path = path; + + cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST); + if (cmp > 0 || + (cmp == 0 && + compare_pathkeys(cheapest_total_path->pathkeys, + path->pathkeys) == PATHKEYS_BETTER2)) + cheapest_total_path = path; } - parent_rel->cheapestpath = cheapest_so_far; - - return cheapest_so_far; -} - -/* - * add_pathlist - * Consider each path given in new_paths, and add it to the parent rel's - * pathlist if it seems worthy. - */ -void -add_pathlist(RelOptInfo *parent_rel, List *new_paths) -{ - List *p1; - - foreach(p1, new_paths) - { - Path *new_path = (Path *) lfirst(p1); - - add_path(parent_rel, new_path); - } + parent_rel->cheapest_startup_path = cheapest_startup_path; + parent_rel->cheapest_total_path = cheapest_total_path; } /* @@ -97,12 +152,18 @@ add_pathlist(RelOptInfo *parent_rel, List *new_paths) * Consider a potential implementation path for the specified parent rel, * and add it to the rel's pathlist if it is worthy of consideration. * A path is worthy if it has either a better sort order (better pathkeys) - * or cheaper cost than any of the existing old paths. + * or cheaper cost (on either dimension) than any of the existing old paths. * * Unless parent_rel->pruneable is false, we also remove from the rel's * pathlist any old paths that are dominated by new_path --- that is, * new_path is both cheaper and at least as well ordered. * + * NOTE: discarded Path objects are immediately pfree'd to reduce planner + * memory consumption. We dare not try to free the substructure of a Path, + * since much of it may be shared with other Paths or the query tree itself; + * but just recycling discarded Path nodes is a very useful savings in + * a large join tree. + * * 'parent_rel' is the relation entry to which the path corresponds. * 'new_path' is a potential path for parent_rel. * @@ -124,26 +185,40 @@ add_path(RelOptInfo *parent_rel, Path *new_path) { Path *old_path = (Path *) lfirst(p1); bool remove_old = false; /* unless new proves superior */ + int costcmp; - switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys)) + costcmp = compare_path_costs(new_path, old_path, TOTAL_COST); + /* + * If the two paths compare differently for startup and total cost, + * then we want to keep both, and we can skip the (much slower) + * comparison of pathkeys. If they compare the same, proceed with + * the pathkeys comparison. Note this test relies on the fact that + * compare_path_costs will only return 0 if both costs are equal + * (and, therefore, there's no need to call it twice in that case). + */ + if (costcmp == 0 || + costcmp == compare_path_costs(new_path, old_path, STARTUP_COST)) { - case PATHKEYS_EQUAL: - if (new_path->path_cost < old_path->path_cost) - remove_old = true; /* new dominates old */ - else - accept_new = false; /* old equals or dominates new */ - break; - case PATHKEYS_BETTER1: - if (new_path->path_cost <= old_path->path_cost) - remove_old = true; /* new dominates old */ - break; - case PATHKEYS_BETTER2: - if (new_path->path_cost >= old_path->path_cost) - accept_new = false; /* old dominates new */ - break; - case PATHKEYS_DIFFERENT: - /* keep both paths, since they have different ordering */ - break; + switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys)) + { + case PATHKEYS_EQUAL: + if (costcmp < 0) + remove_old = true; /* new dominates old */ + else + accept_new = false; /* old equals or dominates new */ + break; + case PATHKEYS_BETTER1: + if (costcmp <= 0) + remove_old = true; /* new dominates old */ + break; + case PATHKEYS_BETTER2: + if (costcmp >= 0) + accept_new = false; /* old dominates new */ + break; + case PATHKEYS_DIFFERENT: + /* keep both paths, since they have different ordering */ + break; + } } /* @@ -156,6 +231,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) lnext(p1_prev) = lnext(p1); else parent_rel->pathlist = lnext(p1); + pfree(old_path); } else p1_prev = p1; @@ -174,6 +250,11 @@ add_path(RelOptInfo *parent_rel, Path *new_path) /* Accept the path */ parent_rel->pathlist = lcons(new_path, parent_rel->pathlist); } + else + { + /* Reject and recycle the path */ + pfree(new_path); + } } @@ -195,7 +276,8 @@ create_seqscan_path(RelOptInfo *rel) pathnode->pathtype = T_SeqScan; pathnode->parent = rel; pathnode->pathkeys = NIL; /* seqscan has unordered result */ - pathnode->path_cost = cost_seqscan(rel); + + cost_seqscan(pathnode, rel); return pathnode; } @@ -208,6 +290,10 @@ create_seqscan_path(RelOptInfo *rel) * 'index' is an index on 'rel' * 'restriction_clauses' is a list of RestrictInfo nodes * to be used as index qual conditions in the scan. + * 'indexscandir' is ForwardScanDirection or BackwardScanDirection + * if the caller expects a specific scan direction, + * or NoMovementScanDirection if the caller is willing to accept + * an unordered index. * * Returns the new path node. */ @@ -215,14 +301,31 @@ IndexPath * create_index_path(Query *root, RelOptInfo *rel, IndexOptInfo *index, - List *restriction_clauses) + List *restriction_clauses, + ScanDirection indexscandir) { IndexPath *pathnode = makeNode(IndexPath); List *indexquals; pathnode->path.pathtype = T_IndexScan; pathnode->path.parent = rel; - pathnode->path.pathkeys = build_index_pathkeys(root, rel, index); + + pathnode->path.pathkeys = build_index_pathkeys(root, rel, index, + indexscandir); + if (pathnode->path.pathkeys == NIL) + { + /* No ordering available from index, is that OK? */ + if (! ScanDirectionIsNoMovement(indexscandir)) + elog(ERROR, "create_index_path: failed to create ordered index scan"); + } + else + { + /* The index is ordered, and build_index_pathkeys defaulted to + * forward scan, so make sure we mark the pathnode properly. + */ + if (ScanDirectionIsNoMovement(indexscandir)) + indexscandir = ForwardScanDirection; + } indexquals = get_actual_clauses(restriction_clauses); /* expand special operators to indexquals the executor can handle */ @@ -234,10 +337,10 @@ create_index_path(Query *root, */ pathnode->indexid = lconsi(index->indexoid, NIL); pathnode->indexqual = lcons(indexquals, NIL); + pathnode->indexscandir = indexscandir; pathnode->joinrelids = NIL; /* no join clauses here */ - pathnode->path.path_cost = cost_index(root, rel, index, indexquals, - false); + cost_index(&pathnode->path, root, rel, index, indexquals, false); return pathnode; } @@ -256,13 +359,14 @@ create_tidscan_path(RelOptInfo *rel, List *tideval) pathnode->path.pathtype = T_TidScan; pathnode->path.parent = rel; pathnode->path.pathkeys = NIL; - pathnode->path.path_cost = cost_tidscan(rel, tideval); - /* divide selectivity for each clause to get an equal selectivity - * as IndexScan does OK ? - */ pathnode->tideval = copyObject(tideval); /* is copy really necessary? */ pathnode->unjoined_relids = NIL; + cost_tidscan(&pathnode->path, rel, tideval); + /* divide selectivity for each clause to get an equal selectivity + * as IndexScan does OK ? + */ + return pathnode; } @@ -296,9 +400,8 @@ create_nestloop_path(RelOptInfo *joinrel, pathnode->joinrestrictinfo = restrict_clauses; pathnode->path.pathkeys = pathkeys; - pathnode->path.path_cost = cost_nestloop(outer_path, - inner_path, - IsA(inner_path, IndexPath)); + cost_nestloop(&pathnode->path, outer_path, inner_path, + restrict_clauses, IsA(inner_path, IndexPath)); return pathnode; } @@ -350,10 +453,13 @@ create_mergejoin_path(RelOptInfo *joinrel, pathnode->path_mergeclauses = mergeclauses; pathnode->outersortkeys = outersortkeys; pathnode->innersortkeys = innersortkeys; - pathnode->jpath.path.path_cost = cost_mergejoin(outer_path, - inner_path, - outersortkeys, - innersortkeys); + + cost_mergejoin(&pathnode->jpath.path, + outer_path, + inner_path, + restrict_clauses, + outersortkeys, + innersortkeys); return pathnode; } @@ -388,9 +494,12 @@ create_hashjoin_path(RelOptInfo *joinrel, /* A hashjoin never has pathkeys, since its ordering is unpredictable */ pathnode->jpath.path.pathkeys = NIL; pathnode->path_hashclauses = hashclauses; - pathnode->jpath.path.path_cost = cost_hashjoin(outer_path, - inner_path, - innerdisbursion); + + cost_hashjoin(&pathnode->jpath.path, + outer_path, + inner_path, + restrict_clauses, + innerdisbursion); return pathnode; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index d81eebfbb1317113fd58dc293302a47e4ae4be76..8663cdb0241de0d4aaa475cbfca59a20b9514ee1 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.46 2000/01/26 05:56:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.47 2000/02/15 20:49:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -118,6 +118,7 @@ find_secondary_indexes(Query *root, Index relid) } else info->indpred = NIL; + info->lossy = index->indislossy; for (i = 0; i < INDEX_MAX_KEYS; i++) info->indexkeys[i] = index->indkey[i]; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index d22daa0f638c7677abe51fa0ffebd12c69c5fe4a..f11dd60d243f3fe7f1a10a78eac2b4156a9bf7bc 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.23 2000/02/07 04:41:02 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.24 2000/02/15 20:49:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,12 +62,14 @@ get_base_rel(Query *root, int relid) rel->width = 0; rel->targetlist = NIL; rel->pathlist = NIL; - rel->cheapestpath = (Path *) NULL; + rel->cheapest_startup_path = NULL; + rel->cheapest_total_path = NULL; rel->pruneable = true; rel->indexed = false; rel->pages = 0; rel->tuples = 0; rel->baserestrictinfo = NIL; + rel->baserestrictcost = 0; rel->joininfo = NIL; rel->innerjoin = NIL; @@ -180,12 +182,14 @@ get_join_rel(Query *root, joinrel->width = 0; joinrel->targetlist = NIL; joinrel->pathlist = NIL; - joinrel->cheapestpath = (Path *) NULL; + joinrel->cheapest_startup_path = NULL; + joinrel->cheapest_total_path = NULL; joinrel->pruneable = true; joinrel->indexed = false; joinrel->pages = 0; joinrel->tuples = 0; joinrel->baserestrictinfo = NIL; + joinrel->baserestrictcost = 0; joinrel->joininfo = NIL; joinrel->innerjoin = NIL; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index ab41413432c5d6d5481cf0799ee60fc0ccc3d7fa..30106744ded9f3e091e5dbbb5260eb3b62f69d3a 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.54 2000/01/26 05:57:14 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.55 2000/02/15 20:49:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -756,7 +756,9 @@ getattstatistics(Oid relid, static void genericcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, Selectivity *indexSelectivity) + Cost *indexStartupCost, + Cost *indexTotalCost, + Selectivity *indexSelectivity) { double numIndexTuples; double numIndexPages; @@ -771,8 +773,17 @@ genericcostestimate(Query *root, RelOptInfo *rel, /* Estimate the number of index pages that will be retrieved */ numIndexPages = *indexSelectivity * index->pages; - /* Compute the index access cost */ - *indexAccessCost = numIndexPages + cpu_index_page_weight * numIndexTuples; + /* + * Compute the index access cost. + * + * Our generic assumption is that the index pages will be read + * sequentially, so they have cost 1.0 each, not random_page_cost. + * Also, we charge for evaluation of the indexquals at each index tuple. + * All the costs are assumed to be paid incrementally during the scan. + */ + *indexStartupCost = 0; + *indexTotalCost = numIndexPages + + (cpu_index_tuple_cost + cost_qual_eval(indexQuals)) * numIndexTuples; } /* @@ -782,35 +793,43 @@ genericcostestimate(Query *root, RelOptInfo *rel, void btcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, Selectivity *indexSelectivity) + Cost *indexStartupCost, + Cost *indexTotalCost, + Selectivity *indexSelectivity) { genericcostestimate(root, rel, index, indexQuals, - indexAccessCost, indexSelectivity); + indexStartupCost, indexTotalCost, indexSelectivity); } void rtcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, Selectivity *indexSelectivity) + Cost *indexStartupCost, + Cost *indexTotalCost, + Selectivity *indexSelectivity) { genericcostestimate(root, rel, index, indexQuals, - indexAccessCost, indexSelectivity); + indexStartupCost, indexTotalCost, indexSelectivity); } void hashcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, Selectivity *indexSelectivity) + Cost *indexStartupCost, + Cost *indexTotalCost, + Selectivity *indexSelectivity) { genericcostestimate(root, rel, index, indexQuals, - indexAccessCost, indexSelectivity); + indexStartupCost, indexTotalCost, indexSelectivity); } void gistcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, Selectivity *indexSelectivity) + Cost *indexStartupCost, + Cost *indexTotalCost, + Selectivity *indexSelectivity) { genericcostestimate(root, rel, index, indexQuals, - indexAccessCost, indexSelectivity); + indexStartupCost, indexTotalCost, indexSelectivity); } diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 50a15fc7d0b721ad17d92505b5b3482a2fdce90a..8f8c2fad8c7dd0c94a1cb85e02c3ce529727d9f3 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3,7 +3,7 @@ * * Copyright 2000 by PostgreSQL Global Development Group * - * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.10 2000/02/07 23:10:07 petere Exp $ + * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.11 2000/02/15 20:49:22 tgl Exp $ */ /*----------- @@ -172,8 +172,30 @@ char ** psql_completion(char *text, int start, int end) }; static char * pgsql_variables[] = { - "Client_Encoding", "Names", "DateStyle", "Server_Encoding", "TimeZone", - "TRANSACTION", "Cost_Heap", "Cost_Index", "GEQO", "KSQO", "Query_Limit", + /* these SET arguments are known in gram.y */ + "TRANSACTION ISOLATION LEVEL", + "NAMES", + /* rest should match table in src/backend/commands/variable.c */ + "DateStyle", + "TimeZone", + "effective_cache_size", + "random_page_cost", + "cpu_tuple_cost", + "cpu_index_tuple_cost", + "cpu_operator_cost", + "enable_seqscan", + "enable_indexscan", + "enable_tidscan", + "enable_sort", + "enable_nestloop", + "enable_mergejoin", + "enable_hashjoin", + "GEQO", + "client_encoding", + "server_encoding", + "KSQO", + "XactIsoLevel", + "PG_Options", NULL }; diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index d6d8ff6ffba407164c55fa4f40c4ec0e8dcedd39..bf93830ca21ccd3ca45352e8815fa2f7fd7a2640 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: catversion.h,v 1.13 2000/01/27 18:11:40 tgl Exp $ + * $Id: catversion.h,v 1.14 2000/02/15 20:49:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200001271 +#define CATALOG_VERSION_NO 200002151 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 064232d62ccaaae15666415d52ab2bc1b10ac1bc..87107b1df367553325787ed1c12876d32a2f74ae 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.120 2000/02/10 19:51:46 momjian Exp $ + * $Id: pg_proc.h,v 1.121 2000/02/15 20:49:23 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -212,9 +212,9 @@ DESCR("not equal"); DATA(insert OID = 89 ( version PGUID 11 f t f 0 f 25 "" 100 0 0 100 version - )); DESCR("PostgreSQL version string"); -DATA(insert OID = 1265 ( rtcostestimate PGUID 11 f t f 6 f 0 "0 0 0 0 0 0" 100 0 0 100 rtcostestimate - )); +DATA(insert OID = 1265 ( rtcostestimate PGUID 11 f t f 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100 rtcostestimate - )); DESCR("r-tree cost estimator"); -DATA(insert OID = 1268 ( btcostestimate PGUID 11 f t f 6 f 0 "0 0 0 0 0 0" 100 0 0 100 btcostestimate - )); +DATA(insert OID = 1268 ( btcostestimate PGUID 11 f t f 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100 btcostestimate - )); DESCR("btree cost estimator"); /* OIDS 100 - 199 */ @@ -796,7 +796,7 @@ DESCR("convert name to char()"); DATA(insert OID = 409 ( bpchar_name PGUID 11 f t t 1 f 19 "1042" 100 0 0 100 bpchar_name - )); DESCR("convert char() to name"); -DATA(insert OID = 438 ( hashcostestimate PGUID 11 f t f 6 f 0 "0 0 0 0 0 0" 100 0 0 100 hashcostestimate - )); +DATA(insert OID = 438 ( hashcostestimate PGUID 11 f t f 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100 hashcostestimate - )); DESCR("hash index cost estimator"); DATA(insert OID = 440 ( hashgettuple PGUID 11 f t f 2 f 23 "0" 100 0 0 100 hashgettuple - )); @@ -1031,7 +1031,7 @@ DESCR("larger of two"); DATA(insert OID = 771 ( int2smaller PGUID 11 f t t 2 f 21 "21 21" 100 0 0 100 int2smaller - )); DESCR("smaller of two"); -DATA(insert OID = 772 ( gistcostestimate PGUID 11 f t f 6 f 0 "0 0 0 0 0 0" 100 0 0 100 gistcostestimate - )); +DATA(insert OID = 772 ( gistcostestimate PGUID 11 f t f 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100 gistcostestimate - )); DESCR("gist cost estimator"); DATA(insert OID = 774 ( gistgettuple PGUID 11 f t f 2 f 23 "0" 100 0 0 100 gistgettuple - )); DESCR("gist(internal)"); diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c1d6c4c1ca5f427f803564de7fd2927f48bcfe5d..161b53c25af97db7ad2d96d18aaccde3f7c75f94 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodes.h,v 1.63 2000/01/26 05:58:16 momjian Exp $ + * $Id: nodes.h,v 1.64 2000/02/15 20:49:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -257,6 +257,9 @@ typedef struct Node (IsA(t, Noname) || IsA(t, Material) || IsA(t, Sort) || \ IsA(t, Unique)) +#define IsA_Value(t) \ + (IsA(t, Integer) || IsA(t, Float) || IsA(t, String)) + /* ---------------------------------------------------------------- * extern declarations follow * ---------------------------------------------------------------- diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 6eb47618c5e02ee14961e94ea4722291851ed187..df7bec10f0008912ca33a9ec008fbd73a4d4cf1e 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: parsenodes.h,v 1.98 2000/02/15 03:38:14 thomas Exp $ + * $Id: parsenodes.h,v 1.99 2000/02/15 20:49:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,6 +72,7 @@ typedef struct Query /* internal to planner */ List *base_rel_list; /* list of base-relation RelOptInfos */ List *join_rel_list; /* list of join-relation RelOptInfos */ + List *equi_key_list; /* list of lists of equijoined PathKeyItems */ List *query_pathkeys; /* pathkeys for query_planner()'s result */ } Query; @@ -1124,7 +1125,6 @@ typedef struct RangeTblEntry { NodeTag type; char *relname; /* real name of the relation */ -// char *refname; /* reference name (given in FROM clause) */ #ifndef DISABLE_JOIN_SYNTAX Attr *ref; /* reference names (given in FROM clause) */ #endif diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index ff83431e580841812a81e70e7013392822d0776a..2731c57948cc13db433cd55f81a9b417ffc7cba2 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: plannodes.h,v 1.37 2000/01/27 18:11:44 tgl Exp $ + * $Id: plannodes.h,v 1.38 2000/02/15 20:49:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -65,10 +65,15 @@ typedef struct Plan { NodeTag type; - /* planner's estimates of cost and result size */ - Cost cost; - double plan_rows; - int plan_width; + /* estimated execution costs for plan (see costsize.c for more info) */ + Cost startup_cost; /* cost expended before fetching any tuples */ + Cost total_cost; /* total cost (assuming all tuples fetched) */ + + /* planner's estimate of result size (note: LIMIT, if any, is not + * considered in setting plan_rows) + */ + double plan_rows; /* number of rows plan is expected to emit */ + int plan_width; /* average row width in bytes */ EState *state; /* at execution time, state's of * individual nodes point to one EState diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 529aa5cea7a8c88d0e64c102739cc48d8946ca82..3efdaa5b32562da5c1689dcd02e3d35e541f602a 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,13 +7,14 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: relation.h,v 1.43 2000/02/07 04:41:02 tgl Exp $ + * $Id: relation.h,v 1.44 2000/02/15 20:49:25 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef RELATION_H #define RELATION_H +#include "access/sdir.h" #include "nodes/parsenodes.h" /* @@ -25,6 +26,12 @@ typedef List *Relids; +/* + * When looking for a "cheapest path", this enum specifies whether we want + * cheapest startup cost or cheapest total cost. + */ +typedef enum CostSelector { STARTUP_COST, TOTAL_COST } CostSelector; + /* * RelOptInfo * Per-relation information for planning/optimization @@ -38,10 +45,14 @@ typedef List *Relids; * clauses have been applied (ie, output rows of a plan for it) * width - avg. number of bytes per tuple in the relation after the * appropriate projections have been done (ie, output width) - * targetlist - List of TargetList nodes + * targetlist - List of TargetEntry nodes for the attributes we need + * to output from this relation * pathlist - List of Path nodes, one for each potentially useful * method of generating the relation - * cheapestpath - least expensive Path (regardless of ordering) + * cheapest_startup_path - the pathlist member with lowest startup cost + * (regardless of its ordering) + * cheapest_total_path - the pathlist member with lowest total cost + * (regardless of its ordering) * pruneable - flag to let the planner know whether it can prune the * pathlist of this RelOptInfo or not. * @@ -57,6 +68,8 @@ typedef List *Relids; * baserestrictinfo - List of RestrictInfo nodes, containing info about * each qualification clause in which this relation * participates (only used for base rels) + * baserestrictcost - Estimated cost of evaluating the baserestrictinfo + * clauses at a single tuple (only used for base rels) * joininfo - List of JoinInfo nodes, containing info about each join * clause in which this relation participates * innerjoin - List of Path nodes that represent indices that may be used @@ -74,6 +87,10 @@ typedef List *Relids; * (field joinrestrictinfo), not in the parent relation. But it's OK for * the RelOptInfo to store the joininfo lists, because those are the same * for a given rel no matter how we form it. + * + * We store baserestrictcost in the RelOptInfo (for base relations) because + * we know we will need it at least once (to price the sequential scan) + * and may need it multiple times to price index scans. */ typedef struct RelOptInfo @@ -90,7 +107,8 @@ typedef struct RelOptInfo /* materialization information */ List *targetlist; List *pathlist; /* Path structures */ - struct Path *cheapestpath; + struct Path *cheapest_startup_path; + struct Path *cheapest_total_path; bool pruneable; /* statistics from pg_class (only valid if it's a base rel!) */ @@ -100,6 +118,7 @@ typedef struct RelOptInfo /* used by various scans and joins: */ List *baserestrictinfo; /* RestrictInfo structures (if base rel) */ + Cost baserestrictcost; /* cost of evaluating the above */ List *joininfo; /* JoinInfo structures */ List *innerjoin; /* potential indexscans for nestloop joins */ /* innerjoin indexscans are not in the main pathlist because they are @@ -126,6 +145,7 @@ typedef struct RelOptInfo * amcostestimate - OID of the relam's cost estimator * indproc - OID of the function if a functional index, else 0 * indpred - index predicate if a partial index, else NULL + * lossy - true if index is lossy (may return non-matching tuples) * * NB. the last element of the arrays classlist, indexkeys and ordering * is always 0. @@ -151,6 +171,7 @@ typedef struct IndexOptInfo Oid indproc; /* if a functional index */ List *indpred; /* if a partial index */ + bool lossy; /* if a lossy index */ } IndexOptInfo; /* @@ -190,7 +211,9 @@ typedef struct Path RelOptInfo *parent; /* the relation this path can build */ - Cost path_cost; /* estimated execution cost of path */ + /* estimated execution costs for path (see costsize.c for more info) */ + Cost startup_cost; /* cost expended before fetching any tuples */ + Cost total_cost; /* total cost (assuming all tuples fetched) */ NodeTag pathtype; /* tag identifying scan/join method */ /* XXX why is pathtype separate from the NodeTag? */ @@ -207,27 +230,34 @@ typedef struct Path * the same tuple more than once, even if it is matched in multiple scans.) * * 'indexid' is a list of index relation OIDs, one per scan to be performed. + * * 'indexqual' is a list of index qualifications, also one per scan. * Each entry in 'indexqual' is a sublist of qualification expressions with * implicit AND semantics across the sublist items. Only expressions that * are usable as indexquals (as determined by indxpath.c) may appear here. - * * NOTE that the semantics of the top-level list in 'indexqual' is OR * combination, while the sublists are implicitly AND combinations! + * + * 'indexscandir' is one of: + * ForwardScanDirection: forward scan of an ordered index + * BackwardScanDirection: backward scan of an ordered index + * NoMovementScanDirection: scan of an unordered index, or don't care + * (The executor doesn't care whether it gets ForwardScanDirection or + * NoMovementScanDirection for an indexscan, but the planner wants to + * distinguish ordered from unordered indexes for building pathkeys.) + * + * 'joinrelids' is only used in IndexPaths that are constructed for use + * as the inner path of a nestloop join. These paths have indexquals + * that refer to values of other rels, so those other rels must be + * included in the outer joinrel in order to make a usable join. *---------- */ - typedef struct IndexPath { Path path; List *indexid; List *indexqual; - /* - * joinrelids is only used in IndexPaths that are constructed for use - * as the inner path of a nestloop join. These paths have indexquals - * that refer to values of other rels, so those other rels must be - * included in the outer joinrel in order to make a usable join. - */ + ScanDirection indexscandir; Relids joinrelids; /* other rels mentioned in indexqual */ } IndexPath; diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 79153c01d83e421466cf4b4487e1e6d392861b03..960a2ea9e9aafd72d9a79689ff6581cf0e6c276b 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: cost.h,v 1.29 2000/02/07 04:41:04 tgl Exp $ + * $Id: cost.h,v 1.30 2000/02/15 20:49:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,9 +17,12 @@ #include "nodes/relation.h" /* defaults for costsize.c's Cost parameters */ -/* NB: cost-estimation code should use the variables, not the constants! */ -#define CPU_PAGE_WEIGHT 0.033 -#define CPU_INDEX_PAGE_WEIGHT 0.017 +/* NB: cost-estimation code should use the variables, not these constants! */ +#define DEFAULT_EFFECTIVE_CACHE_SIZE 1000.0 /* measured in pages */ +#define DEFAULT_RANDOM_PAGE_COST 4.0 +#define DEFAULT_CPU_TUPLE_COST 0.01 +#define DEFAULT_CPU_INDEX_TUPLE_COST 0.001 +#define DEFAULT_CPU_OPERATOR_COST 0.0025 /* defaults for function attributes used for expensive function calculations */ #define BYTE_PCT 100 @@ -33,8 +36,12 @@ * routines to compute costs and sizes */ -extern Cost cpu_page_weight; -extern Cost cpu_index_page_weight; +/* parameter variables and flags */ +extern double effective_cache_size; +extern Cost random_page_cost; +extern Cost cpu_tuple_cost; +extern Cost cpu_index_tuple_cost; +extern Cost cpu_operator_cost; extern Cost disable_cost; extern bool enable_seqscan; extern bool enable_indexscan; @@ -44,17 +51,20 @@ extern bool enable_nestloop; extern bool enable_mergejoin; extern bool enable_hashjoin; -extern Cost cost_seqscan(RelOptInfo *baserel); -extern Cost cost_index(Query *root, RelOptInfo *baserel, IndexOptInfo *index, +extern void cost_seqscan(Path *path, RelOptInfo *baserel); +extern void cost_index(Path *path, Query *root, + RelOptInfo *baserel, IndexOptInfo *index, List *indexQuals, bool is_injoin); -extern Cost cost_tidscan(RelOptInfo *baserel, List *tideval); -extern Cost cost_sort(List *pathkeys, double tuples, int width); -extern Cost cost_nestloop(Path *outer_path, Path *inner_path, - bool is_indexjoin); -extern Cost cost_mergejoin(Path *outer_path, Path *inner_path, +extern void cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval); +extern void cost_sort(Path *path, List *pathkeys, double tuples, int width); +extern void cost_nestloop(Path *path, Path *outer_path, Path *inner_path, + List *restrictlist, bool is_indexjoin); +extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path, + List *restrictlist, List *outersortkeys, List *innersortkeys); -extern Cost cost_hashjoin(Path *outer_path, Path *inner_path, - Selectivity innerdisbursion); +extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path, + List *restrictlist, Selectivity innerdisbursion); +extern Cost cost_qual_eval(List *quals); extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel); extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel, RelOptInfo *outer_rel, diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index eefb2553b3d6649aefad807b2404693189980f89..e59848278f4f626c011a0277e984e8451c6bb5b0 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pathnode.h,v 1.25 2000/02/07 04:41:04 tgl Exp $ + * $Id: pathnode.h,v 1.26 2000/02/15 20:49:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,15 +19,18 @@ /* * prototypes for pathnode.c */ -extern bool path_is_cheaper(Path *path1, Path *path2); -extern Path *set_cheapest(RelOptInfo *parent_rel, List *pathlist); +extern int compare_path_costs(Path *path1, Path *path2, + CostSelector criterion); +extern int compare_fractional_path_costs(Path *path1, Path *path2, + double fraction); +extern void set_cheapest(RelOptInfo *parent_rel); extern void add_path(RelOptInfo *parent_rel, Path *new_path); -extern void add_pathlist(RelOptInfo *parent_rel, List *new_paths); extern Path *create_seqscan_path(RelOptInfo *rel); extern IndexPath *create_index_path(Query *root, RelOptInfo *rel, IndexOptInfo *index, - List *restriction_clauses); + List *restriction_clauses, + ScanDirection indexscandir); extern TidPath *create_tidscan_path(RelOptInfo *rel, List *tideval); extern NestPath *create_nestloop_path(RelOptInfo *joinrel, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 256aac90d754c2dd6db3ac1784f3788bd9b787dd..d7a0cc2d54602f4c6b21977d2adb13f2314cc3fc 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: paths.h,v 1.42 2000/02/07 04:41:04 tgl Exp $ + * $Id: paths.h,v 1.43 2000/02/15 20:49:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,9 +33,9 @@ extern RelOptInfo *make_one_rel(Query *root); * indxpath.c * routines to generate index paths */ -extern List *create_index_paths(Query *root, RelOptInfo *rel, List *indices, - List *restrictinfo_list, - List *joininfo_list); +extern void create_index_paths(Query *root, RelOptInfo *rel, List *indices, + List *restrictinfo_list, + List *joininfo_list); extern Oid indexable_operator(Expr *clause, Oid opclass, Oid relam, bool indexkey_on_left); extern List *extract_or_indexqual_conditions(RelOptInfo *rel, @@ -47,14 +47,14 @@ extern List *expand_indexqual_conditions(List *indexquals); * orindxpath.c * additional routines for indexable OR clauses */ -extern List *create_or_index_paths(Query *root, RelOptInfo *rel, - List *clauses); +extern void create_or_index_paths(Query *root, RelOptInfo *rel, + List *clauses); /* * tidpath.h * routines to generate tid paths */ -extern List *create_tidscan_paths(Query *root, RelOptInfo *rel); +extern void create_tidscan_paths(Query *root, RelOptInfo *rel); /* * joinpath.c @@ -89,20 +89,27 @@ typedef enum PATHKEYS_DIFFERENT /* neither pathkey includes the other */ } PathKeysComparison; +extern void add_equijoined_keys(Query *root, RestrictInfo *restrictinfo); +extern List *canonicalize_pathkeys(Query *root, List *pathkeys); extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, - bool indexpaths_only); + CostSelector cost_criterion); +extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, + List *pathkeys, + double fraction); extern List *build_index_pathkeys(Query *root, RelOptInfo *rel, - IndexOptInfo *index); + IndexOptInfo *index, + ScanDirection scandir); extern List *build_join_pathkeys(List *outer_pathkeys, - List *join_rel_tlist, List *joinclauses); -extern bool commute_pathkeys(List *pathkeys); + List *join_rel_tlist, + List *equi_key_list); extern List *make_pathkeys_for_sortclauses(List *sortclauses, List *tlist); extern List *find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos); -extern List *make_pathkeys_for_mergeclauses(List *mergeclauses, +extern List *make_pathkeys_for_mergeclauses(Query *root, + List *mergeclauses, List *tlist); #endif /* PATHS_H */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 340f54485cb7c0470cd24d2328d5d90a4530104b..2d61a035bf86ea72d77a42f5a17aa6873938dc46 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: planmain.h,v 1.37 2000/01/27 18:11:45 tgl Exp $ + * $Id: planmain.h,v 1.38 2000/02/15 20:49:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,7 +20,8 @@ /* * prototypes for plan/planmain.c */ -extern Plan *query_planner(Query *root, List *tlist, List *qual); +extern Plan *query_planner(Query *root, List *tlist, List *qual, + double tuple_fraction); /* * prototypes for plan/createplan.c @@ -29,7 +30,7 @@ extern Plan *create_plan(Query *root, Path *best_path); extern SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); extern Sort *make_sort(List *tlist, Oid nonameid, Plan *lefttree, int keycount); -extern Agg *make_agg(List *tlist, Plan *lefttree); +extern Agg *make_agg(List *tlist, List *qual, Plan *lefttree); extern Group *make_group(List *tlist, bool tuplePerGroup, int ngrp, AttrNumber *grpColIdx, Plan *lefttree); extern Noname *make_noname(List *tlist, List *pathkeys, Plan *subplan); diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index 00a6e55dfd0affdb4ed3891a9601bf0a1a14776b..c06f41b852ebd022ceec1bf135559783d7b767b1 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: planner.h,v 1.13 2000/01/26 05:58:21 momjian Exp $ + * $Id: planner.h,v 1.14 2000/02/15 20:49:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,7 +21,7 @@ #include "nodes/plannodes.h" extern Plan *planner(Query *parse); -extern Plan *union_planner(Query *parse); +extern Plan *union_planner(Query *parse, double tuple_fraction); extern void pg_checkretval(Oid rettype, List *querytree_list); #endif /* PLANNER_H */ diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index cb7ab0e802736f18a0e0f968da9c920510af9c58..7f9dcc6c4696757a93532358dadb7c947f717a95 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: builtins.h,v 1.100 2000/02/10 19:51:52 momjian Exp $ + * $Id: builtins.h,v 1.101 2000/02/15 20:49:27 tgl Exp $ * * NOTES * This should normally only be included by fmgr.h. @@ -403,19 +403,23 @@ extern bool convert_to_scalar(Datum value, Oid typid, double *scaleval); extern void btcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, + Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity); extern void rtcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, + Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity); extern void hashcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, + Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity); extern void gistcostestimate(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, - Cost *indexAccessCost, + Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity); /* tid.c */ diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 94bf6bf0b866fdfbb437c81bb9d3bdf063e2bf89..9202cb23c965431b284de3b0026b55a519a186ac 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-connect.c,v 1.118 2000/02/07 23:10:09 petere Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-connect.c,v 1.119 2000/02/15 20:49:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -172,12 +172,6 @@ static struct EnvironmentOptions }, #endif /* internal performance-related settings */ - { - "PGCOSTHEAP", "cost_heap" - }, - { - "PGCOSTINDEX", "cost_index" - }, { "PGGEQO", "geqo" }, diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 53561ab33c8d94903750c89d0354ae6781da8859..174925df849c77702856a39183d120f827ce1cf5 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -93,8 +93,11 @@ SELECT * FROM tmp; DROP TABLE tmp; -- -- rename - --- should preserve indices +-- should preserve indices, which we can check by seeing if a SELECT +-- chooses an indexscan; however, in the absence of vacuum statistics +-- it might not. Therefore, vacuum first. -- +VACUUM ANALYZE tenk1; ALTER TABLE tenk1 RENAME TO ten_k; -- 20 values, sorted SELECT unique1 FROM ten_k WHERE unique1 < 20; diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out index d6953ab5a571b6a54a16bee590e8182a299292ec..ed6cbac1df06d61e884e60393f32683154d17373 100644 --- a/src/test/regress/expected/select.out +++ b/src/test/regress/expected/select.out @@ -4,7 +4,8 @@ -- btree index -- awk '{if($1<10){print;}else{next;}}' onek.data | sort +0n -1 -- -SELECT onek.* WHERE onek.unique1 < 10; +SELECT onek.* WHERE onek.unique1 < 10 + ORDER BY onek.unique1; unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 ---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- 0 | 998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | KMBAAA | OOOOxx diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index bef443b9302553e9017a37a2acfdd92e30bc6d17..5ba66c46e8f62d3d4cd3acdfe6b5e2eb6b8cf1f9 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -141,8 +141,12 @@ DROP TABLE tmp; -- -- rename - --- should preserve indices +-- should preserve indices, which we can check by seeing if a SELECT +-- chooses an indexscan; however, in the absence of vacuum statistics +-- it might not. Therefore, vacuum first. -- +VACUUM ANALYZE tenk1; + ALTER TABLE tenk1 RENAME TO ten_k; -- 20 values, sorted diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql index 3d5e66c98cd3d6709069a4e31aa0667506313960..42b664eaaee200cc465d70a32817a359474564cd 100644 --- a/src/test/regress/sql/select.sql +++ b/src/test/regress/sql/select.sql @@ -5,7 +5,8 @@ -- btree index -- awk '{if($1<10){print;}else{next;}}' onek.data | sort +0n -1 -- -SELECT onek.* WHERE onek.unique1 < 10; +SELECT onek.* WHERE onek.unique1 < 10 + ORDER BY onek.unique1; -- -- awk '{if($1<20){print $1,$14;}else{next;}}' onek.data | sort +0nr -1