From 0b8b1fe3aababb9fbdf2d0e15f70ca41e1392fe4 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Mon, 17 May 1999 00:26:33 +0000 Subject: [PATCH] Tighten coding in new_join_pathkey, which seems to be a hotspot for GEQO ... --- src/backend/optimizer/path/pathkeys.c | 149 +++++++++----------------- 1 file changed, 53 insertions(+), 96 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 1cde4a686cf..09a5b8a51f1 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.8 1999/04/30 03:59:06 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.9 1999/05/17 00:26:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,30 +29,34 @@ static int match_pathkey_joinkeys(List *pathkey, List *joinkeys, int outer_or_inner); static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist, List *joinclauses); -static List *get_joinvars_for_var(Var *pathkey, List **considered_pathkeys, - List *join_rel_tlist, List *joinclauses); -/* +/*-------------------- * Explanation of Path.pathkeys * * Path.pathkeys is a List of List of Var nodes that represent the sort * order of the result generated by the Path. * * In single/base relation RelOptInfo's, the Path's represent various ways - * of generate the relation. Sequential scan Paths have a NIL pathkeys. - * Index scans have Path.pathkeys that represent the chosen index. A - * single-key index pathkeys would be { {tab1_indexkey1} }. The pathkeys - * entry for a multi-key index would be { {tab1_indexkey1}, {tab1_indexkey2} }. + * of generating the relation and the resulting ordering of the tuples. + * Sequential scan Paths have NIL pathkeys, indicating no known ordering. + * Index scans have Path.pathkeys that represent the chosen index. + * A single-key index pathkeys would be { {tab1_indexkey1} }. For a + * multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} }, + * indicating major sort by indexkey1 and minor sort by indexkey2. * * Multi-relation RelOptInfo Path's are more complicated. Mergejoins are - * only performed with equajoins("="). Because of this, the multi-relation + * only performed with equijoins ("="). Because of this, the multi-relation * path actually has more than one primary Var key. For example, a * mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of - * { {tab1.col1, tab2.col1} }. This allows future joins to use either Var - * as a pre-sorted key to prevent Mergejoins from having to re-sort the Path. - * They are equal, so they are both primary sort keys. This is why pathkeys - * is a List of Lists. + * { {tab1.col1, tab2.col1} }, indicating that the major sort order of the + * Path can be taken to be *either* tab1.col1 or tab2.col1. + * They are equal, so they are both primary sort keys. This allows future + * joins to use either Var as a pre-sorted key to prevent upper Mergejoins + * from having to re-sort the Path. This is why pathkeys is a List of Lists. + * + * Note that while the order of the top list is meaningful (primary vs. + * secondary sort key), the order of each sublist is arbitrary. * * For multi-key sorts, if the outer is sorted by a multi-key index, the * multi-key index remains after the join. If the inner has a multi-key @@ -60,11 +64,16 @@ static List *get_joinvars_for_var(Var *pathkey, List **considered_pathkeys, * Mergejoins only join on the primary key. Currently, non-primary keys * in the pathkeys List are of limited value. * - * HashJoin has similar functionality. NestJoin does not perform sorting, - * and allows non-equajoins, so it does not allow useful pathkeys. + * Although Hashjoins also work only with equijoin operators, it is *not* + * safe to consider the output of a Hashjoin to be sorted in any particular + * order --- not even the outer path's order. This is true because the + * executor might have to split the join into multiple batches. + * + * NestJoin does not perform sorting, and allows non-equijoins, so it does + * not allow useful pathkeys. (But couldn't we use the outer path's order?) * * -- bjm - * + *-------------------- */ /**************************************************************************** @@ -228,7 +237,7 @@ get_cheapest_path_for_joinkeys(List *joinkeys, int outer_or_inner) { Path *matched_path = NULL; - List *i = NIL; + List *i; foreach(i, paths) { @@ -337,16 +346,16 @@ new_join_pathkeys(List *outer_pathkeys, List *join_rel_tlist, List *joinclauses) { - List *outer_pathkey = NIL; List *final_pathkeys = NIL; - List *new_pathkey; - List *i = NIL; + List *i; foreach(i, outer_pathkeys) { - outer_pathkey = lfirst(i); + List *outer_pathkey = lfirst(i); + List *new_pathkey; + new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist, - joinclauses); + joinclauses); if (new_pathkey != NIL) final_pathkeys = lappend(final_pathkeys, new_pathkey); } @@ -355,19 +364,18 @@ new_join_pathkeys(List *outer_pathkeys, /* * new_join_pathkey - * Finds new vars that become pathkeys due to qualification clauses that - * contain any previously considered pathkeys. These new pathkeys plus the - * pathkeys from 'pathkeys' form a new pathkey for the join relation. + * Generate an individual pathkey sublist, consisting of the outer vars + * already mentioned in 'pathkey' plus any inner vars that are joined to + * them (and thus can now also be considered path keys, per discussion + * at the top of this file). * * Note that each returned pathkey is the var node found in * 'join_rel_tlist' rather than the joinclause var node. + * (Is this important?) Also, we return a fully copied list + * that does not share any subnodes with existing data structures. + * (Is that important, either?) * - * 'pathkeys' is a list of pathkeys for which matching pathkeys are to be - * found - * 'considered_pathkeys' is the current list of all pathkeys corresponding - * to a given pathkey - * - * Returns a new pathkey(list of pathkeys). + * Returns a new pathkey (list of pathkey variables). * */ static List * @@ -375,84 +383,33 @@ new_join_pathkey(List *pathkey, List *join_rel_tlist, List *joinclauses) { - List *final_pathkey = NIL; - List *i = NIL; - List *considered_pathkeys = NIL; + List *new_pathkey = NIL; + List *i, + *j; foreach(i, pathkey) { Var *key = (Var *) lfirst(i); - List *joined_keys; Expr *tlist_key; Assert(key); - joined_keys = get_joinvars_for_var(key, &considered_pathkeys, - join_rel_tlist, joinclauses); - if (joined_keys) - { - considered_pathkeys = nconc(considered_pathkeys, joined_keys); - final_pathkey = nconc(final_pathkey, joined_keys); - } tlist_key = matching_tlist_var(key, join_rel_tlist); - if (tlist_key && !member(tlist_key, considered_pathkeys)) - { - /* - * If pathkey is in the target list, and not considered, - * add it - */ - considered_pathkeys = lcons(tlist_key, considered_pathkeys); - final_pathkey = lcons(tlist_key, final_pathkey); - } - } - return copyObject(final_pathkey); -} + if (tlist_key && !member(tlist_key, new_pathkey)) + new_pathkey = lcons(copyObject(tlist_key), new_pathkey); -/* - * get_joinvars_for_var - * Returns a list of new pathkeys: - * (1) which are not listed in 'considered_pathkeys' - * (2) for which the "other" variable in some clause in 'joinclauses' is - * 'pathkey' - * (3) which are mentioned in 'join_rel_tlist' - * - * Note that each returned pathkey is the var node found in - * 'join_rel_tlist' rather than the joinclause var node. - * - * 'pathkey' is the var node for which we are trying to find matching - * clauses - * - * Returns a list of new pathkeys. - * - */ -static List * -get_joinvars_for_var(Var *key, - List **considered_pathkeys, - List *join_rel_tlist, - List *joinclauses) -{ - List *final_pathkey = NIL; - Expr *joinclause; - List *i; - Expr *tlist_other_var; - - foreach(i, joinclauses) - { - joinclause = lfirst(i); + foreach(j, joinclauses) + { + Expr *joinclause = lfirst(j); + Expr *tlist_other_var; - tlist_other_var = matching_tlist_var( + tlist_other_var = matching_tlist_var( other_join_clause_var(key, joinclause), join_rel_tlist); - if (tlist_other_var && - !member(tlist_other_var, *considered_pathkeys)) - { - /* - * The key has a join variable that is in the target list, - * and has not been considered. - */ - *considered_pathkeys = lcons(tlist_other_var, *considered_pathkeys); - final_pathkey = lcons(tlist_other_var, final_pathkey); + if (tlist_other_var && !member(tlist_other_var, new_pathkey)) + new_pathkey = lcons(copyObject(tlist_other_var), new_pathkey); } } - return final_pathkey; + + return new_pathkey; } -- GitLab