From ba2ea6e0f5f270571e7f661cd2c7645160a9562a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 19 Sep 2000 18:42:34 +0000
Subject: [PATCH] Fix GEQO optimizer to work correctly with new
 outer-join-capable query representation.  Note that GEQO_RELS setting is now
 interpreted as the number of top-level items in the FROM list, not
 necessarily the number of relations in the query.  This seems appropriate
 since we are only doing join-path searching over the top-level items.

---
 src/backend/optimizer/geqo/geqo_eval.c | 50 +++++++++++++++-----------
 src/backend/optimizer/geqo/geqo_main.c | 20 +++++------
 src/backend/optimizer/geqo/geqo_pool.c | 14 ++++----
 src/backend/optimizer/path/allpaths.c  |  4 +--
 src/include/optimizer/geqo.h           | 12 ++++---
 src/include/optimizer/geqo_pool.h      |  5 +--
 6 files changed, 57 insertions(+), 48 deletions(-)

diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index f32b0d64eeb..1970ca9a43a 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: geqo_eval.c,v 1.54 2000/09/12 21:06:50 tgl Exp $
+ * $Id: geqo_eval.c,v 1.55 2000/09/19 18:42:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,7 +36,7 @@
  * Returns cost of a query tree as an individual of the population.
  */
 Cost
-geqo_eval(Query *root, Gene *tour, int num_gene)
+geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
 {
 	MemoryContext mycontext;
 	MemoryContext oldcxt;
@@ -64,7 +64,7 @@ geqo_eval(Query *root, Gene *tour, int num_gene)
 	savelist = root->join_rel_list;
 
 	/* construct the best path for the given combination of relations */
-	joinrel = gimme_tree(root, tour, 0, num_gene, NULL);
+	joinrel = gimme_tree(root, initial_rels, tour, num_gene, 0, NULL);
 
 	/*
 	 * compute fitness
@@ -86,35 +86,42 @@ geqo_eval(Query *root, Gene *tour, int num_gene)
 
 /*
  * gimme_tree
- *	  this program presumes that only LEFT-SIDED TREES are considered!
+ *	  this routine considers only LEFT-SIDED TREES!
  *
- * 'old_rel' is the preceding join
+ *	 'root' is the Query
+ *	 'initial_rels' is the list of initial relations (FROM-list items)
+ *	 'tour' is the proposed join order, of length 'num_gene'
+ *	 'rel_count' is number of initial_rels items already joined (initially 0)
+ *	 'old_rel' is the preceding join (initially NULL)
  *
  * Returns a new join relation incorporating all joins in a left-sided tree.
  */
 RelOptInfo *
-gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene,
-		   RelOptInfo *old_rel)
+gimme_tree(Query *root, List *initial_rels,
+		   Gene *tour, int num_gene,
+		   int rel_count, RelOptInfo *old_rel)
 {
 	RelOptInfo *inner_rel;		/* current relation */
-	int			base_rel_index;
+	int			init_rel_index;
 
 	if (rel_count < num_gene)
-	{							/* tree not yet finished */
+	{
+		/* tree not yet finished */
+		init_rel_index = (int) tour[rel_count];
 
-		/* tour[0] = 3; tour[1] = 1; tour[2] = 2 */
-		base_rel_index = (int) tour[rel_count];
-
-		inner_rel = (RelOptInfo *) nth(base_rel_index - 1, root->base_rel_list);
+		inner_rel = (RelOptInfo *) nth(init_rel_index - 1, initial_rels);
 
 		if (rel_count == 0)
-		{						/* processing first join with
-								 * base_rel_index = (int) tour[0] */
+		{
+			/* processing first join with init_rel_index = (int) tour[0] */
 			rel_count++;
-			return gimme_tree(root, tour, rel_count, num_gene, inner_rel);
+			return gimme_tree(root, initial_rels,
+							  tour, num_gene,
+							  rel_count, inner_rel);
 		}
 		else
-		{						/* tree main part */
+		{
+			/* tree main part */
 			List	   *acceptable_rels = lcons(inner_rel, NIL);
 			List	   *new_rels;
 			RelOptInfo *new_rel;
@@ -133,13 +140,14 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene,
 			}
 			new_rel = (RelOptInfo *) lfirst(new_rels);
 
-			rel_count++;
-			Assert(length(new_rel->relids) == rel_count);
-
 			/* Find and save the cheapest paths for this rel */
 			set_cheapest(new_rel);
 
-			return gimme_tree(root, tour, rel_count, num_gene, new_rel);
+			/* and recurse... */
+			rel_count++;
+			return gimme_tree(root, initial_rels,
+							  tour, num_gene,
+							  rel_count, new_rel);
 		}
 	}
 
diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c
index eb99c2478f8..755c33168d6 100644
--- a/src/backend/optimizer/geqo/geqo_main.c
+++ b/src/backend/optimizer/geqo/geqo_main.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: geqo_main.c,v 1.23 2000/08/07 00:51:23 tgl Exp $
+ * $Id: geqo_main.c,v 1.24 2000/09/19 18:42:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -65,13 +65,12 @@ static int	gimme_number_generations(int pool_size, int effort);
  */
 
 RelOptInfo *
-geqo(Query *root)
+geqo(Query *root, int number_of_rels, List *initial_rels)
 {
 	int			generation;
 	Chromosome *momma;
 	Chromosome *daddy;
 	Chromosome *kid;
-	int			number_of_rels;
 	Pool	   *pool;
 	int			pool_size,
 				number_generations,
@@ -95,9 +94,6 @@ geqo(Query *root)
 
 #endif
 
-/* set tour size */
-	number_of_rels = length(root->base_rel_list);
-
 /* set GA parameters */
 	pool_size = gimme_pool_size(number_of_rels);
 	number_generations = gimme_number_generations(pool_size, Geqo_effort);
@@ -114,7 +110,7 @@ geqo(Query *root)
 	pool = alloc_pool(pool_size, number_of_rels);
 
 /* random initialization of the pool */
-	random_init_pool(root, pool, 0, pool->size);
+	random_init_pool(root, initial_rels, pool, 0, pool->size);
 
 /* sort the pool according to cheapest path as fitness */
 	sort_pool(pool);			/* we have to do it only one time, since
@@ -204,7 +200,8 @@ geqo(Query *root)
 
 
 		/* EVALUATE FITNESS */
-		kid->worth = geqo_eval(root, kid->string, pool->string_length);
+		kid->worth = geqo_eval(root, initial_rels,
+							   kid->string, pool->string_length);
 
 		/* push the kid into the wilderness of life according to its worth */
 		spread_chromo(kid, pool);
@@ -247,9 +244,10 @@ geqo(Query *root)
 
 	best_tour = (Gene *) pool->data[0].string;
 
-/* root->join_relation_list_ will be modified during this ! */
-	best_rel = (RelOptInfo *) gimme_tree(root, best_tour, 0,
-										 pool->string_length, NULL);
+/* root->join_rel_list will be modified during this ! */
+	best_rel = (RelOptInfo *) gimme_tree(root, initial_rels,
+										 best_tour, pool->string_length,
+										 0, NULL);
 
 /* DBG: show the query plan
 print_plan(best_plan, root);
diff --git a/src/backend/optimizer/geqo/geqo_pool.c b/src/backend/optimizer/geqo/geqo_pool.c
index c8a970373ab..a149632dfa5 100644
--- a/src/backend/optimizer/geqo/geqo_pool.c
+++ b/src/backend/optimizer/geqo/geqo_pool.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: geqo_pool.c,v 1.17 2000/01/26 05:56:33 momjian Exp $
+ * $Id: geqo_pool.c,v 1.18 2000/09/19 18:42:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,18 +84,18 @@ free_pool(Pool *pool)
  *		initialize genetic pool
  */
 void
-random_init_pool(Query *root, Pool *pool, int strt, int stp)
+random_init_pool(Query *root, List *initial_rels,
+				 Pool *pool, int strt, int stp)
 {
 	Chromosome *chromo = (Chromosome *) pool->data;
 	int			i;
 
 	for (i = strt; i < stp; i++)
 	{
-		init_tour(chromo[i].string, pool->string_length);		/* from
-																 * "geqo_recombination.c"
-																 * */
-
-		pool->data[i].worth = geqo_eval(root, chromo[i].string, pool->string_length);	/* "from geqo_eval.c" */
+		init_tour(chromo[i].string, pool->string_length);
+		pool->data[i].worth = geqo_eval(root, initial_rels,
+										chromo[i].string,
+										pool->string_length);
 	}
 }
 
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 605b60b5845..be4a5ca56a2 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.63 2000/09/12 21:06:52 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.64 2000/09/19 18:42:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -188,7 +188,7 @@ make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels)
 	 * rest will be skipped in case of GEQO    *
 	 *******************************************/
 	if (enable_geqo && levels_needed >= geqo_rels)
-		return geqo(root);
+		return geqo(root, levels_needed, initial_rels);
 
 	/*
 	 * We employ a simple "dynamic programming" algorithm: we first find
diff --git a/src/include/optimizer/geqo.h b/src/include/optimizer/geqo.h
index 1c6182f8d5f..e22d70badd9 100644
--- a/src/include/optimizer/geqo.h
+++ b/src/include/optimizer/geqo.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: geqo.h,v 1.20 2000/06/28 03:33:22 tgl Exp $
+ * $Id: geqo.h,v 1.21 2000/09/19 18:42:32 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,11 +62,13 @@ extern int          Geqo_random_seed; /* or negative to use current time */
 
 
 /* routines in geqo_main.c */
-extern RelOptInfo *geqo(Query *root);
+extern RelOptInfo *geqo(Query *root, int number_of_rels, List *initial_rels);
 
 /* routines in geqo_eval.c */
-extern Cost geqo_eval(Query *root, Gene *tour, int num_gene);
-extern RelOptInfo *gimme_tree(Query *root, Gene *tour, int rel_count,
-		   int num_gene, RelOptInfo *old_rel);
+extern Cost geqo_eval(Query *root, List *initial_rels,
+					  Gene *tour, int num_gene);
+extern RelOptInfo *gimme_tree(Query *root, List *initial_rels,
+							  Gene *tour, int num_gene,
+							  int rel_count, RelOptInfo *old_rel);
 
 #endif	 /* GEQO_H */
diff --git a/src/include/optimizer/geqo_pool.h b/src/include/optimizer/geqo_pool.h
index f0791039fb7..d96e67139b1 100644
--- a/src/include/optimizer/geqo_pool.h
+++ b/src/include/optimizer/geqo_pool.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: geqo_pool.h,v 1.9 2000/01/26 05:58:20 momjian Exp $
+ * $Id: geqo_pool.h,v 1.10 2000/09/19 18:42:32 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -29,7 +29,8 @@
 extern Pool *alloc_pool(int pool_size, int string_length);
 extern void free_pool(Pool *pool);
 
-extern void random_init_pool(Query *root, Pool *pool, int strt, int stop);
+extern void random_init_pool(Query *root, List *initial_rels,
+							 Pool *pool, int strt, int stop);
 extern Chromosome *alloc_chromo(int string_length);
 extern void free_chromo(Chromosome *chromo);
 
-- 
GitLab