diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index f81c920d26d3afcf6a8b6da30810b2270c912533..ad4a6f98456f5f1fab34def3ce18c7127c67b3e5 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.231 2004/01/21 23:33:34 tgl Exp $ +$PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.232 2004/01/23 23:54:20 tgl Exp $ --> <Chapter Id="runtime"> @@ -1396,7 +1396,7 @@ SET ENABLE_SEQSCAN TO OFF; Use genetic query optimization to plan queries with at least this many <literal>FROM</> items involved. (Note that an outer <literal>JOIN</> construct counts as only one <literal>FROM</> - item.) The default is 11. For simpler queries it is usually best + item.) The default is 12. For simpler queries it is usually best to use the deterministic, exhaustive planner, but for queries with many tables the deterministic planner takes too long. </para> @@ -1404,25 +1404,33 @@ SET ENABLE_SEQSCAN TO OFF; </varlistentry> <varlistentry> + <term><varname>geqo_effort</varname> (<type>integer</type>)</term> <term><varname>geqo_pool_size</varname> (<type>integer</type>)</term> <term><varname>geqo_generations</varname> (<type>integer</type>)</term> - <term><varname>geqo_effort</varname> (<type>integer</type>)</term> <term><varname>geqo_selection_bias</varname> (<type>floating point</type>)</term> <listitem> <para> Various tuning parameters for the genetic query optimization - algorithm. The pool size is the number of individuals in one - population. Valid values are between 128 and 1024. If it is set - to 0 (the default) a pool size of 2^(QS+1), where QS is the - number of <literal>FROM</> items in the query, is used. + algorithm. The recommended one to modify is + <varname>geqo_effort</varname>, which can range from 1 to 10 with + a default of 5. Larger values increase the time spent in planning + but make it more likely that a good plan will be found. + <varname>geqo_effort</varname> doesn't actually do anything directly, + it is just used to compute the default values for the other + parameters. If you prefer, you can set the other parameters by hand + instead. + The pool size is the number of individuals in the genetic population. + It must be at least two, and useful values are typically 100 to 1000. + If it is set to zero (the default setting) then a suitable default + is chosen based on <varname>geqo_effort</varname> and the number of + tables in the query. Generations specifies the number of iterations of the algorithm. - The value must be a positive integer. If 0 is specified then - <literal>Effort * Log2(PoolSize)</literal> is used. + It must be at least one, and useful values are in the same range + as the pool size. + If it is set to zero (the default setting) then a suitable default + is chosen based on the pool size. The run time of the algorithm is roughly proportional to the sum of pool size and generations. - <varname>geqo_effort</varname> is only used in computing the default - generations setting, as just described. The default value is 40, - and the allowed range 1 to 100. The selection bias is the selective pressure within the population. Values can be from 1.50 to 2.00; the latter is the default. diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 33a98ba6a854e8b89df547bc9dc3573d5e65160a..ccb095aad03bd528e34bd344a0b92a4697ba611a 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.66 2003/11/29 19:51:50 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.67 2004/01/23 23:54:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,13 +31,17 @@ #include "utils/memutils.h" +static bool desirable_join(Query *root, + RelOptInfo *outer_rel, RelOptInfo *inner_rel); + + /* * geqo_eval * * Returns cost of a query tree as an individual of the population. */ Cost -geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) +geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata) { MemoryContext mycontext; MemoryContext oldcxt; @@ -52,9 +56,9 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) * redundant cost calculations, we simply reject tours where tour[0] > * tour[1], assigning them an artificially bad fitness. * - * (It would be better to tweak the GEQO logic to not generate such tours - * in the first place, but I'm not sure of all the implications in the - * mutation logic.) + * init_tour() is aware of this rule and so we should never reject a + * tour during the initial filling of the pool. It seems difficult to + * persuade the recombination logic never to break the rule, however. */ if (num_gene >= 2 && tour[0] > tour[1]) return DBL_MAX; @@ -80,10 +84,10 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) * this, it'll be pointing at recycled storage after the * MemoryContextDelete below. */ - savelist = root->join_rel_list; + savelist = evaldata->root->join_rel_list; /* construct the best path for the given combination of relations */ - joinrel = gimme_tree(root, initial_rels, tour, num_gene); + joinrel = gimme_tree(tour, num_gene, evaldata); /* * compute fitness @@ -97,7 +101,7 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) fitness = DBL_MAX; /* restore join_rel_list */ - root->join_rel_list = savelist; + evaldata->root->join_rel_list = savelist; /* release all the memory acquired within gimme_tree */ MemoryContextSwitchTo(oldcxt); @@ -111,63 +115,156 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) * Form planner estimates for a join tree constructed in the specified * order. * - * 'root' is the Query - * 'initial_rels' is the list of initial relations (FROM-list items) * 'tour' is the proposed join order, of length 'num_gene' + * 'evaldata' contains the context we need * * Returns a new join relation whose cheapest path is the best plan for * this join order. NB: will return NULL if join order is invalid. * - * Note that at each step we consider using the next rel as both left and - * right side of a join. However, we cannot build general ("bushy") plan - * trees this way, only left-sided and right-sided trees. + * The original implementation of this routine always joined in the specified + * order, and so could only build left-sided plans (and right-sided and + * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). + * It could never produce a "bushy" plan. This had a couple of big problems, + * of which the worst was that as of 7.4, there are situations involving IN + * subqueries where the only valid plans are bushy. + * + * The present implementation takes the given tour as a guideline, but + * postpones joins that seem unsuitable according to some heuristic rules. + * This allows correct bushy plans to be generated at need, and as a nice + * side-effect it seems to materially improve the quality of the generated + * plans. */ RelOptInfo * -gimme_tree(Query *root, List *initial_rels, - Gene *tour, int num_gene) +gimme_tree(Gene *tour, int num_gene, GeqoEvalData *evaldata) { + RelOptInfo **stack; + int stack_depth; RelOptInfo *joinrel; - int cur_rel_index; int rel_count; /* - * Start with the first relation ... + * Create a stack to hold not-yet-joined relations. */ - cur_rel_index = (int) tour[0]; - - joinrel = (RelOptInfo *) nth(cur_rel_index - 1, initial_rels); + stack = (RelOptInfo **) palloc(num_gene * sizeof(RelOptInfo *)); + stack_depth = 0; /* - * And add on each relation in the specified order ... + * Push each relation onto the stack in the specified order. After + * pushing each relation, see whether the top two stack entries are + * joinable according to the desirable_join() heuristics. If so, + * join them into one stack entry, and try again to combine with the + * next stack entry down (if any). When the stack top is no longer + * joinable, continue to the next input relation. After we have pushed + * the last input relation, the heuristics are disabled and we force + * joining all the remaining stack entries. + * + * If desirable_join() always returns true, this produces a straight + * left-to-right join just like the old code. Otherwise we may produce + * a bushy plan or a left/right-sided plan that really corresponds to + * some tour other than the one given. To the extent that the heuristics + * are helpful, however, this will be a better plan than the raw tour. + * + * Also, when a join attempt fails (because of IN-clause constraints), + * we may be able to recover and produce a workable plan, where the old + * code just had to give up. This case acts the same as a false result + * from desirable_join(). */ - for (rel_count = 1; rel_count < num_gene; rel_count++) + for (rel_count = 0; rel_count < num_gene; rel_count++) { - RelOptInfo *inner_rel; - RelOptInfo *new_rel; + int cur_rel_index; + /* Get the next input relation and push it */ cur_rel_index = (int) tour[rel_count]; - - inner_rel = (RelOptInfo *) nth(cur_rel_index - 1, initial_rels); + stack[stack_depth] = (RelOptInfo *) nth(cur_rel_index - 1, + evaldata->initial_rels); + stack_depth++; /* - * Construct a RelOptInfo representing the previous joinrel joined - * to inner_rel. These are always inner joins. Note that we - * expect the joinrel not to exist in root->join_rel_list yet, and - * so the paths constructed for it will only include the ones we - * want. + * While it's feasible, pop the top two stack entries and replace + * with their join. */ - new_rel = make_join_rel(root, joinrel, inner_rel, JOIN_INNER); + while (stack_depth >= 2) + { + RelOptInfo *outer_rel = stack[stack_depth - 2]; + RelOptInfo *inner_rel = stack[stack_depth - 1]; + + /* + * Don't pop if heuristics say not to join now. However, + * once we have exhausted the input, the heuristics can't + * prevent popping. + */ + if (rel_count < num_gene - 1 && + !desirable_join(evaldata->root, outer_rel, inner_rel)) + break; - /* Fail if join order is not valid */ - if (new_rel == NULL) - return NULL; + /* + * Construct a RelOptInfo representing the join of these + * two input relations. These are always inner joins. + * Note that we expect the joinrel not to exist in + * root->join_rel_list yet, and so the paths constructed for it + * will only include the ones we want. + */ + joinrel = make_join_rel(evaldata->root, outer_rel, inner_rel, + JOIN_INNER); - /* Find and save the cheapest paths for this rel */ - set_cheapest(new_rel); + /* Can't pop stack here if join order is not valid */ + if (!joinrel) + break; - /* and repeat... */ - joinrel = new_rel; + /* Find and save the cheapest paths for this rel */ + set_cheapest(joinrel); + + /* Pop the stack and replace the inputs with their join */ + stack_depth--; + stack[stack_depth - 1] = joinrel; + } } + /* Did we succeed in forming a single join relation? */ + if (stack_depth == 1) + joinrel = stack[0]; + else + joinrel = NULL; + + pfree(stack); + return joinrel; } + +/* + * Heuristics for gimme_tree: do we want to join these two relations? + */ +static bool +desirable_join(Query *root, + RelOptInfo *outer_rel, RelOptInfo *inner_rel) +{ + List *i; + + /* + * Join if there is an applicable join clause. + */ + foreach(i, outer_rel->joininfo) + { + JoinInfo *joininfo = (JoinInfo *) lfirst(i); + + if (bms_is_subset(joininfo->unjoined_relids, inner_rel->relids)) + return true; + } + + /* + * Join if the rels are members of the same IN sub-select. This is + * needed to improve the odds that we will find a valid solution in + * a case where an IN sub-select has a clauseless join. + */ + foreach(i, root->in_info_list) + { + InClauseInfo *ininfo = (InClauseInfo *) lfirst(i); + + if (bms_is_subset(outer_rel->relids, ininfo->righthand) && + bms_is_subset(inner_rel->relids, ininfo->righthand)) + return true; + } + + /* Otherwise postpone the join till later. */ + return false; +} diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c index 6a883fa313799ac8e26e570967deea764a61d414..caba3b593f802e6f09b4a81830da79f87d49d4b8 100644 --- a/src/backend/optimizer/geqo/geqo_main.c +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.42 2004/01/21 23:33:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.43 2004/01/23 23:54:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,9 +37,9 @@ /* * Configuration options */ +int Geqo_effort; int Geqo_pool_size; int Geqo_generations; -int Geqo_effort; double Geqo_selection_bias; @@ -66,6 +66,7 @@ static int gimme_number_generations(int pool_size); RelOptInfo * geqo(Query *root, int number_of_rels, List *initial_rels) { + GeqoEvalData evaldata; int generation; Chromosome *momma; Chromosome *daddy; @@ -90,6 +91,10 @@ geqo(Query *root, int number_of_rels, List *initial_rels) int mutations = 0; #endif +/* set up evaldata */ + evaldata.root = root; + evaldata.initial_rels = initial_rels; + /* set GA parameters */ pool_size = gimme_pool_size(number_of_rels); number_generations = gimme_number_generations(pool_size); @@ -99,7 +104,7 @@ geqo(Query *root, int number_of_rels, List *initial_rels) pool = alloc_pool(pool_size, number_of_rels); /* random initialization of the pool */ - random_init_pool(root, initial_rels, pool, 0, pool->size); + random_init_pool(pool, &evaldata); /* sort the pool according to cheapest path as fitness */ sort_pool(pool); /* we have to do it only one time, since @@ -107,41 +112,54 @@ geqo(Query *root, int number_of_rels, List *initial_rels) * in future (-> geqo_pool.c:spread_chromo * ) */ +#ifdef GEQO_DEBUG + elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f", + pool_size, + pool->data[0].worth, + pool->data[pool_size - 1].worth); +#endif + /* allocate chromosome momma and daddy memory */ momma = alloc_chromo(pool->string_length); daddy = alloc_chromo(pool->string_length); #if defined (ERX) - ereport(DEBUG2, - (errmsg_internal("using edge recombination crossover [ERX]"))); +#ifdef GEQO_DEBUG + elog(DEBUG2, "using edge recombination crossover [ERX]"); +#endif /* allocate edge table memory */ edge_table = alloc_edge_table(pool->string_length); #elif defined(PMX) - ereport(DEBUG2, - (errmsg_internal("using partially matched crossover [PMX]"))); +#ifdef GEQO_DEBUG + elog(DEBUG2, "using partially matched crossover [PMX]"); +#endif /* allocate chromosome kid memory */ kid = alloc_chromo(pool->string_length); #elif defined(CX) - ereport(DEBUG2, - (errmsg_internal("using cycle crossover [CX]"))); +#ifdef GEQO_DEBUG + elog(DEBUG2, "using cycle crossover [CX]"); +#endif /* allocate city table memory */ kid = alloc_chromo(pool->string_length); city_table = alloc_city_table(pool->string_length); #elif defined(PX) - ereport(DEBUG2, - (errmsg_internal("using position crossover [PX]"))); +#ifdef GEQO_DEBUG + elog(DEBUG2, "using position crossover [PX]"); +#endif /* allocate city table memory */ kid = alloc_chromo(pool->string_length); city_table = alloc_city_table(pool->string_length); #elif defined(OX1) - ereport(DEBUG2, - (errmsg_internal("using order crossover [OX1]"))); +#ifdef GEQO_DEBUG + elog(DEBUG2, "using order crossover [OX1]"); +#endif /* allocate city table memory */ kid = alloc_chromo(pool->string_length); city_table = alloc_city_table(pool->string_length); #elif defined(OX2) - ereport(DEBUG2, - (errmsg_internal("using order crossover [OX2]"))); +#ifdef GEQO_DEBUG + elog(DEBUG2, "using order crossover [OX2]"); +#endif /* allocate city table memory */ kid = alloc_chromo(pool->string_length); city_table = alloc_city_table(pool->string_length); @@ -189,8 +207,7 @@ geqo(Query *root, int number_of_rels, List *initial_rels) /* EVALUATE FITNESS */ - kid->worth = geqo_eval(root, initial_rels, - kid->string, pool->string_length); + kid->worth = geqo_eval(kid->string, pool->string_length, &evaldata); /* push the kid into the wilderness of life according to its worth */ spread_chromo(kid, pool); @@ -207,24 +224,28 @@ geqo(Query *root, int number_of_rels, List *initial_rels) #if defined(ERX) && defined(GEQO_DEBUG) if (edge_failures != 0) elog(LOG, "[GEQO] failures: %d, average: %d", - edge_failures, (int) generation / edge_failures); + edge_failures, (int) number_generations / edge_failures); else elog(LOG, "[GEQO] no edge failures detected"); #endif - #if defined(CX) && defined(GEQO_DEBUG) if (mutations != 0) - elog(LOG, "[GEQO] mutations: %d, generations: %d", mutations, generation); + elog(LOG, "[GEQO] mutations: %d, generations: %d", + mutations, number_generations); else elog(LOG, "[GEQO] no mutations processed"); #endif - #ifdef GEQO_DEBUG print_pool(stdout, pool, 0, pool_size - 1); #endif +#ifdef GEQO_DEBUG + elog(DEBUG1, "GEQO best is %.2f after %d generations", + pool->data[0].worth, number_generations); +#endif + /* * got the cheapest query tree processed by geqo; first element of the @@ -233,8 +254,7 @@ geqo(Query *root, int number_of_rels, List *initial_rels) best_tour = (Gene *) pool->data[0].string; /* root->join_rel_list will be modified during this ! */ - best_rel = gimme_tree(root, initial_rels, - best_tour, pool->string_length); + best_rel = gimme_tree(best_tour, pool->string_length, &evaldata); if (best_rel == NULL) elog(ERROR, "failed to make a valid plan"); @@ -272,53 +292,49 @@ geqo(Query *root, int number_of_rels, List *initial_rels) } - /* - * Return either configured pool size or - * a good default based on query size (no. of relations) - * = 2^(QS+1) - * also constrain between 128 and 1024 + * Return either configured pool size or a good default + * + * The default is based on query size (no. of relations) = 2^(QS+1), + * but constrained to a range based on the effort value. */ static int gimme_pool_size(int nr_rel) { double size; + int minsize; + int maxsize; - if (Geqo_pool_size > 0) + /* Legal pool size *must* be at least 2, so ignore attempt to select 1 */ + if (Geqo_pool_size >= 2) return Geqo_pool_size; size = pow(2.0, nr_rel + 1.0); - if (size < MIN_GEQO_POOL_SIZE) - return MIN_GEQO_POOL_SIZE; - else if (size > MAX_GEQO_POOL_SIZE) - return MAX_GEQO_POOL_SIZE; - else - return (int) ceil(size); -} + maxsize = 50 * Geqo_effort; /* 50 to 500 individuals */ + if (size > maxsize) + return maxsize; + minsize = 10 * Geqo_effort; /* 10 to 100 individuals */ + if (size < minsize) + return minsize; + + return (int) ceil(size); +} /* - * Return either configured number of generations or - * some reasonable default calculated on the fly. - * = Effort * Log2(PoolSize) + * Return either configured number of generations or a good default + * + * The default is the same as the pool size, which allows us to be + * sure that less-fit individuals get pushed out of the breeding + * population before the run finishes. */ static int gimme_number_generations(int pool_size) { - double gens; - if (Geqo_generations > 0) return Geqo_generations; - gens = Geqo_effort * log((double) pool_size) / log(2.0); - - /* bound it to a sane range */ - if (gens <= 0) - gens = 1; - else if (gens > 10000) - gens = 10000; - - return (int) ceil(gens); + return pool_size; } diff --git a/src/backend/optimizer/geqo/geqo_pool.c b/src/backend/optimizer/geqo/geqo_pool.c index d1ee094b359dc585bf040d75db7f41b41f8cf29c..258622edc4cb010298737bc0d07e082e360bc6fe 100644 --- a/src/backend/optimizer/geqo/geqo_pool.c +++ b/src/backend/optimizer/geqo/geqo_pool.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_pool.c,v 1.22 2003/11/29 22:39:49 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_pool.c,v 1.23 2004/01/23 23:54:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,11 @@ /* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ #include "postgres.h" + +#include <float.h> +#include <limits.h> +#include <math.h> + #include "optimizer/geqo.h" #include "optimizer/geqo_copy.h" #include "optimizer/geqo_pool.h" @@ -84,19 +89,42 @@ free_pool(Pool *pool) * initialize genetic pool */ void -random_init_pool(Query *root, List *initial_rels, - Pool *pool, int strt, int stp) +random_init_pool(Pool *pool, GeqoEvalData *evaldata) { Chromosome *chromo = (Chromosome *) pool->data; int i; + int bad = 0; - for (i = strt; i < stp; i++) + /* + * We immediately discard any invalid individuals (those that geqo_eval + * returns DBL_MAX for), thereby not wasting pool space on them. + * + * If we fail to make any valid individuals after 10000 tries, give up; + * this probably means something is broken, and we shouldn't just let + * ourselves get stuck in an infinite loop. + */ + i = 0; + while (i < pool->size) { init_tour(chromo[i].string, pool->string_length); - pool->data[i].worth = geqo_eval(root, initial_rels, - chromo[i].string, - pool->string_length); + pool->data[i].worth = geqo_eval(chromo[i].string, + pool->string_length, + evaldata); + if (pool->data[i].worth < DBL_MAX) + i++; + else + { + bad++; + if (i == 0 && bad >= 10000) + elog(ERROR, "failed to make a valid plan"); + } } + +#ifdef GEQO_DEBUG + if (bad > 0) + elog(DEBUG1, "%d invalid tours found while selecting %d pool entries", + bad, pool->size); +#endif } /* @@ -113,20 +141,17 @@ sort_pool(Pool *pool) /* * compare - * static input function for pg_sort - * - * return values for sort from smallest to largest are prooved! - * don't change them! + * qsort comparison function for sort_pool */ static int compare(const void *arg1, const void *arg2) { - Chromosome chromo1 = *(Chromosome *) arg1; - Chromosome chromo2 = *(Chromosome *) arg2; + const Chromosome *chromo1 = (const Chromosome *) arg1; + const Chromosome *chromo2 = (const Chromosome *) arg2; - if (chromo1.worth == chromo2.worth) + if (chromo1->worth == chromo2->worth) return 0; - else if (chromo1.worth > chromo2.worth) + else if (chromo1->worth > chromo2->worth) return 1; else return -1; diff --git a/src/backend/optimizer/geqo/geqo_recombination.c b/src/backend/optimizer/geqo/geqo_recombination.c index d438a32f9afa56c16c60ee03609b8650843cdc17..f018902bed7dd79c6da15f3363ceff976cb54c3a 100644 --- a/src/backend/optimizer/geqo/geqo_recombination.c +++ b/src/backend/optimizer/geqo/geqo_recombination.c @@ -3,7 +3,7 @@ * geqo_recombination.c * misc recombination procedures * -* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_recombination.c,v 1.12 2003/11/29 22:39:49 pgsql Exp $ +* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_recombination.c,v 1.13 2004/01/23 23:54:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ /* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ #include "postgres.h" + #include "optimizer/geqo_random.h" #include "optimizer/geqo_recombination.h" @@ -32,7 +33,6 @@ * points on the tour and randomly chooses the 'next' city from * this array. When a city is chosen, the array is shortened * and the procedure repeated. - * */ void init_tour(Gene *tour, int num_gene) @@ -42,31 +42,43 @@ init_tour(Gene *tour, int num_gene) int next, i; + /* Fill a temp array with the IDs of all not-yet-visited cities */ tmp = (Gene *) palloc(num_gene * sizeof(Gene)); for (i = 0; i < num_gene; i++) - { - tmp[i] = (Gene) i + 1; /* builds tours "1 - 2 - 3" etc. */ - } + tmp[i] = (Gene) (i + 1); remainder = num_gene - 1; for (i = 0; i < num_gene; i++) { - next = (int) geqo_randint(remainder, 0); /* choose city between 0 - * and remainder */ + /* choose value between 0 and remainder inclusive */ + next = (int) geqo_randint(remainder, 0); + /* output that element of the tmp array */ tour[i] = tmp[next]; + /* and delete it */ tmp[next] = tmp[remainder]; remainder--; } + /* + * Since geqo_eval() will reject tours where tour[0] > tour[1], + * we may as well switch the two to make it a valid tour. + */ + if (num_gene >= 2 && tour[0] > tour[1]) + { + Gene gtmp = tour[0]; + + tour[0] = tour[1]; + tour[1] = gtmp; + } + pfree(tmp); } /* alloc_city_table * * allocate memory for city table - * */ City * alloc_city_table(int num_gene) @@ -77,7 +89,6 @@ alloc_city_table(int num_gene) * palloc one extra location so that nodes numbered 1..n can be * indexed directly; 0 will not be used */ - city_table = (City *) palloc((num_gene + 1) * sizeof(City)); return city_table; @@ -86,7 +97,6 @@ alloc_city_table(int num_gene) /* free_city_table * * deallocate memory of city table - * */ void free_city_table(City *city_table) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 2633bf9e4d3ec0573560de0eca5f1e60d92cdb4d..04a7e7287b1b625ad1c275d743bd16e1a856e30e 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut <peter_e@gmx.net>. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.178 2004/01/21 23:33:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.179 2004/01/23 23:54:21 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -921,33 +921,32 @@ static struct config_int ConfigureNamesInt[] = NULL }, &geqo_threshold, - 11, 2, INT_MAX, NULL, NULL + 12, 2, INT_MAX, NULL, NULL }, { - {"geqo_pool_size", PGC_USERSET, QUERY_TUNING_GEQO, - gettext_noop("GEQO: number of individuals in one population."), + {"geqo_effort", PGC_USERSET, QUERY_TUNING_GEQO, + gettext_noop("GEQO: effort is used to set the default for other GEQO parameters."), NULL }, + &Geqo_effort, + DEFAULT_GEQO_EFFORT, MIN_GEQO_EFFORT, MAX_GEQO_EFFORT, NULL, NULL + }, + { + {"geqo_pool_size", PGC_USERSET, QUERY_TUNING_GEQO, + gettext_noop("GEQO: number of individuals in the population."), + gettext_noop("Zero selects a suitable default value.") + }, &Geqo_pool_size, - DEFAULT_GEQO_POOL_SIZE, 0, MAX_GEQO_POOL_SIZE, NULL, NULL + 0, 0, INT_MAX, NULL, NULL }, { {"geqo_generations", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("GEQO: number of iterations of the algorithm."), - gettext_noop("The value must be a positive integer. If 0 is " - "specified then effort * log2(poolsize) is used.") + gettext_noop("Zero selects a suitable default value.") }, &Geqo_generations, 0, 0, INT_MAX, NULL, NULL }, - { - {"geqo_effort", PGC_USERSET, QUERY_TUNING_GEQO, - gettext_noop("GEQO: effort is used to set the default for generations."), - NULL - }, - &Geqo_effort, - DEFAULT_GEQO_EFFORT, MIN_GEQO_EFFORT, MAX_GEQO_EFFORT, NULL, NULL - }, { {"deadlock_timeout", PGC_SIGHUP, LOCK_MANAGEMENT, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 0fdf6b2e99fd2e46534ee75dd3a3d69704078bec..30e7ebe1670d1b8f8d168402cbdbcfdb32438cd7 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -122,11 +122,10 @@ # - Genetic Query Optimizer - #geqo = true -#geqo_threshold = 11 -#geqo_pool_size = 0 # default based on tables in statement, - # range 128-1024 -#geqo_generations = 0 # use default: effort * log2(pool_size) -#geqo_effort = 40 # range 1-100 +#geqo_threshold = 12 +#geqo_effort = 5 # range 1-10 +#geqo_pool_size = 0 # selects default based on effort +#geqo_generations = 0 # selects default based on effort #geqo_selection_bias = 2.0 # range 1.5-2.0 # - Other Planner Options - diff --git a/src/include/optimizer/geqo.h b/src/include/optimizer/geqo.h index caa05f52ee971f9e53fd924440258ef22becf56d..e9bfdfcff66aa784f16cbb8687f2e9d8cb8d6c23 100644 --- a/src/include/optimizer/geqo.h +++ b/src/include/optimizer/geqo.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/geqo.h,v 1.34 2004/01/21 23:33:34 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/geqo.h,v 1.35 2004/01/23 23:54:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,6 +25,7 @@ #include "nodes/relation.h" #include "optimizer/geqo_gene.h" + /* GEQO debug flag */ /* #define GEQO_DEBUG @@ -47,21 +48,15 @@ * * If you change these, update backend/utils/misc/postgresql.sample.conf */ -extern int Geqo_pool_size; - -#define DEFAULT_GEQO_POOL_SIZE 0 /* = default based on no. of relations. */ -#define MIN_GEQO_POOL_SIZE 128 -#define MAX_GEQO_POOL_SIZE 1024 +extern int Geqo_effort; /* 1 .. 10, knob for adjustment of defaults */ -extern int Geqo_generations; /* 1 .. inf, or 0 to use default based on - * pool size */ +#define DEFAULT_GEQO_EFFORT 5 +#define MIN_GEQO_EFFORT 1 +#define MAX_GEQO_EFFORT 10 -extern int Geqo_effort; /* only used to calculate default for - * generations */ +extern int Geqo_pool_size; /* 2 .. inf, or 0 to use default */ -#define DEFAULT_GEQO_EFFORT 40 -#define MIN_GEQO_EFFORT 1 -#define MAX_GEQO_EFFORT 100 +extern int Geqo_generations; /* 1 .. inf, or 0 to use default */ extern double Geqo_selection_bias; @@ -70,13 +65,23 @@ extern double Geqo_selection_bias; #define MAX_GEQO_SELECTION_BIAS 2.0 +/* + * Data structure to encapsulate information needed for building plan trees + * (i.e., geqo_eval and gimme_tree). + */ +typedef struct +{ + Query *root; /* the query we are planning */ + List *initial_rels; /* the base relations */ +} GeqoEvalData; + + /* routines in geqo_main.c */ extern RelOptInfo *geqo(Query *root, int number_of_rels, List *initial_rels); /* routines in geqo_eval.c */ -extern Cost geqo_eval(Query *root, List *initial_rels, - Gene *tour, int num_gene); -extern RelOptInfo *gimme_tree(Query *root, List *initial_rels, - Gene *tour, int num_gene); +extern Cost geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata); +extern RelOptInfo *gimme_tree(Gene *tour, int num_gene, + GeqoEvalData *evaldata); #endif /* GEQO_H */ diff --git a/src/include/optimizer/geqo_pool.h b/src/include/optimizer/geqo_pool.h index 4028b2c104df50334c1dc87702a8d0c2b128ab67..a4f40c98897728d0a8e4bfb3e22d1c8dd5ac94aa 100644 --- a/src/include/optimizer/geqo_pool.h +++ b/src/include/optimizer/geqo_pool.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/geqo_pool.h,v 1.18 2003/11/29 22:41:07 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/geqo_pool.h,v 1.19 2004/01/23 23:54:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,14 +23,13 @@ #ifndef GEQO_POOL_H #define GEQO_POOL_H -#include "optimizer/geqo_gene.h" -#include "nodes/parsenodes.h" +#include "optimizer/geqo.h" + extern Pool *alloc_pool(int pool_size, int string_length); extern void free_pool(Pool *pool); -extern void random_init_pool(Query *root, List *initial_rels, - Pool *pool, int strt, int stop); +extern void random_init_pool(Pool *pool, GeqoEvalData *evaldata); extern Chromosome *alloc_chromo(int string_length); extern void free_chromo(Chromosome *chromo);