From bdfbfde1b168b3332c4cdac34ac86a80aaf4d442 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Mon, 20 Jan 2003 18:55:07 +0000 Subject: [PATCH] IN clauses appearing at top level of WHERE can now be handled as joins. There are two implementation techniques: the executor understands a new JOIN_IN jointype, which emits at most one matching row per left-hand row, or the result of the IN's sub-select can be fed through a DISTINCT filter and then joined as an ordinary relation. Along the way, some minor code cleanup in the optimizer; notably, break out most of the jointree-rearrangement preprocessing in planner.c and put it in a new file prep/prepjointree.c. --- doc/src/sgml/release.sgml | 3 +- src/backend/executor/nodeHashjoin.c | 12 +- src/backend/executor/nodeMergejoin.c | 16 +- src/backend/executor/nodeNestloop.c | 12 +- src/backend/nodes/copyfuncs.c | 26 +- src/backend/nodes/equalfuncs.c | 21 +- src/backend/nodes/list.c | 10 +- src/backend/nodes/outfuncs.c | 30 +- src/backend/optimizer/README | 1 + src/backend/optimizer/geqo/geqo_eval.c | 15 +- src/backend/optimizer/geqo/geqo_main.c | 23 +- src/backend/optimizer/path/allpaths.c | 6 +- src/backend/optimizer/path/costsize.c | 46 +- src/backend/optimizer/path/indxpath.c | 6 +- src/backend/optimizer/path/joinpath.c | 287 ++++----- src/backend/optimizer/path/joinrels.c | 151 ++++- src/backend/optimizer/plan/createplan.c | 145 ++++- src/backend/optimizer/plan/initsplan.c | 13 +- src/backend/optimizer/plan/planmain.c | 4 +- src/backend/optimizer/plan/planner.c | 529 ++--------------- src/backend/optimizer/plan/setrefs.c | 121 +++- src/backend/optimizer/plan/subselect.c | 147 ++++- src/backend/optimizer/prep/Makefile | 4 +- src/backend/optimizer/prep/prepjointree.c | 680 ++++++++++++++++++++++ src/backend/optimizer/prep/prepunion.c | 70 ++- src/backend/optimizer/util/clauses.c | 24 +- src/backend/optimizer/util/joininfo.c | 35 +- src/backend/optimizer/util/pathnode.c | 110 +++- src/backend/optimizer/util/relnode.c | 18 +- src/backend/optimizer/util/tlist.c | 24 +- src/backend/optimizer/util/var.c | 153 ++++- src/backend/rewrite/rewriteManip.c | 60 +- src/backend/utils/adt/selfuncs.c | 15 +- src/include/nodes/nodes.h | 19 +- src/include/nodes/parsenodes.h | 3 +- src/include/nodes/pg_list.h | 5 +- src/include/nodes/relation.h | 43 +- src/include/optimizer/joininfo.h | 3 +- src/include/optimizer/pathnode.h | 5 +- src/include/optimizer/planmain.h | 4 +- src/include/optimizer/planner.h | 5 +- src/include/optimizer/prep.h | 12 +- src/include/optimizer/subselect.h | 8 +- src/include/optimizer/tlist.h | 4 +- src/include/optimizer/var.h | 8 +- src/include/utils/selfuncs.h | 4 +- src/test/regress/expected/subselect.out | 12 +- 47 files changed, 2076 insertions(+), 876 deletions(-) create mode 100644 src/backend/optimizer/prep/prepjointree.c diff --git a/doc/src/sgml/release.sgml b/doc/src/sgml/release.sgml index 77fed8d8b02..2911e1828f9 100644 --- a/doc/src/sgml/release.sgml +++ b/doc/src/sgml/release.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.178 2003/01/11 21:02:49 momjian Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.179 2003/01/20 18:54:44 tgl Exp $ --> <appendix id="release"> @@ -24,6 +24,7 @@ CDATA means the content is "SGML-free", so you can write without worries about funny characters. --> <literallayout><![CDATA[ +Performance of "foo IN (SELECT ...)" queries has been considerably improved FETCH 0 now re-fetches cursor's current row, per SQL spec Revised executor state representation; plan trees are read-only to executor now Information schema diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 48cf30c21f4..d452d3865f5 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.46 2002/12/30 15:21:20 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.47 2003/01/20 18:54:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -95,6 +95,15 @@ ExecHashJoin(HashJoinState *node) node->js.ps.ps_TupFromTlist = false; } + /* + * If we're doing an IN join, we want to return at most one row per + * outer tuple; so we can stop scanning the inner scan if we matched on + * the previous try. + */ + if (node->js.jointype == JOIN_IN && + node->hj_MatchedOuter) + node->hj_NeedNewOuter = true; + /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note this can't @@ -353,6 +362,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate) switch (node->join.jointype) { case JOIN_INNER: + case JOIN_IN: break; case JOIN_LEFT: hjstate->hj_NullInnerTupleSlot = diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index af6cd8d6f3f..d5dc7f421aa 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.55 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.56 2003/01/20 18:54:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -381,6 +381,7 @@ ExecMergeJoin(MergeJoinState *node) switch (node->js.jointype) { case JOIN_INNER: + case JOIN_IN: doFillOuter = false; doFillInner = false; break; @@ -581,9 +582,15 @@ ExecMergeJoin(MergeJoinState *node) * the econtext's tuple pointers were set up before * checking the merge qual, so we needn't do it again. */ - qualResult = (joinqual == NIL || - ExecQual(joinqual, econtext, false)); - MJ_DEBUG_QUAL(joinqual, qualResult); + if (node->js.jointype == JOIN_IN && + node->mj_MatchedOuter) + qualResult = false; + else + { + qualResult = (joinqual == NIL || + ExecQual(joinqual, econtext, false)); + MJ_DEBUG_QUAL(joinqual, qualResult); + } if (qualResult) { @@ -1452,6 +1459,7 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate) switch (node->join.jointype) { case JOIN_INNER: + case JOIN_IN: break; case JOIN_LEFT: mergestate->mj_NullInnerTupleSlot = diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 917a7011cbf..1bae9805898 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v 1.29 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v 1.30 2003/01/20 18:54:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -101,6 +101,15 @@ ExecNestLoop(NestLoopState *node) node->js.ps.ps_TupFromTlist = false; } + /* + * If we're doing an IN join, we want to return at most one row per + * outer tuple; so we can stop scanning the inner scan if we matched on + * the previous try. + */ + if (node->js.jointype == JOIN_IN && + node->nl_MatchedOuter) + node->nl_NeedNewOuter = true; + /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note this can't @@ -312,6 +321,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate) switch (node->join.jointype) { case JOIN_INNER: + case JOIN_IN: break; case JOIN_LEFT: nlstate->nl_NullInnerTupleSlot = diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 8663c6c4a14..f8e81431ec0 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.236 2003/01/15 19:35:35 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.237 2003/01/20 18:54:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1095,6 +1095,21 @@ _copyJoinInfo(JoinInfo *from) return newnode; } +/* + * _copyInClauseInfo + */ +static InClauseInfo * +_copyInClauseInfo(InClauseInfo *from) +{ + InClauseInfo *newnode = makeNode(InClauseInfo); + + COPY_INTLIST_FIELD(lefthand); + COPY_INTLIST_FIELD(righthand); + COPY_NODE_FIELD(sub_targetlist); + + return newnode; +} + /* **************************************************************** * parsenodes.h copy functions * **************************************************************** @@ -1424,9 +1439,9 @@ _copyQuery(Query *from) /* * We do not copy the planner internal fields: base_rel_list, - * other_rel_list, join_rel_list, equi_key_list, query_pathkeys, - * hasJoinRTEs. That would get us into copying RelOptInfo/Path - * trees, which we don't want to do. + * other_rel_list, join_rel_list, equi_key_list, in_info_list, + * query_pathkeys, hasJoinRTEs. That would get us into copying + * RelOptInfo/Path trees, which we don't want to do. */ return newnode; @@ -2490,6 +2505,9 @@ copyObject(void *from) case T_JoinInfo: retval = _copyJoinInfo(from); break; + case T_InClauseInfo: + retval = _copyInClauseInfo(from); + break; /* * VALUE NODES diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index a4e9e1092d8..5d3e194e3c2 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.180 2003/01/15 19:35:37 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.181 2003/01/20 18:54:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -486,6 +486,16 @@ _equalJoinInfo(JoinInfo *a, JoinInfo *b) return true; } +static bool +_equalInClauseInfo(InClauseInfo *a, InClauseInfo *b) +{ + COMPARE_INTLIST_FIELD(lefthand); + COMPARE_INTLIST_FIELD(righthand); + COMPARE_NODE_FIELD(sub_targetlist); + + return true; +} + /* * Stuff from parsenodes.h @@ -518,9 +528,9 @@ _equalQuery(Query *a, Query *b) /* * We do not check the internal-to-the-planner fields: base_rel_list, - * other_rel_list, join_rel_list, equi_key_list, query_pathkeys, - * hasJoinRTEs. They might not be set yet, and in any case they should - * be derivable from the other fields. + * other_rel_list, join_rel_list, equi_key_list, in_info_list, + * query_pathkeys, hasJoinRTEs. They might not be set yet, and in any + * case they should be derivable from the other fields. */ return true; } @@ -1618,6 +1628,9 @@ equal(void *a, void *b) case T_JoinInfo: retval = _equalJoinInfo(a, b); break; + case T_InClauseInfo: + retval = _equalInClauseInfo(a, b); + break; /* * LIST NODES diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index b3c6a18496f..e896b479018 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/list.c,v 1.43 2002/12/17 01:18:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/list.c,v 1.44 2003/01/20 18:54:47 tgl Exp $ * * NOTES * XXX a few of the following functions are duplicated to handle @@ -638,10 +638,10 @@ lreverse(List *l) } /* - * Return t if two integer lists have no members in common. + * Return t if two integer lists have any members in common. */ bool -nonoverlap_setsi(List *list1, List *list2) +overlap_setsi(List *list1, List *list2) { List *x; @@ -650,9 +650,9 @@ nonoverlap_setsi(List *list1, List *list2) int e = lfirsti(x); if (intMember(e, list2)) - return false; + return true; } - return true; + return false; } /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e72b52570e5..fd18c957d9b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.193 2003/01/15 19:35:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.194 2003/01/20 18:54:47 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -905,6 +905,18 @@ _outMaterialPath(StringInfo str, MaterialPath *node) WRITE_NODE_FIELD(subpath); } +static void +_outUniquePath(StringInfo str, UniquePath *node) +{ + WRITE_NODE_TYPE("UNIQUEPATH"); + + _outPathInfo(str, (Path *) node); + + WRITE_NODE_FIELD(subpath); + WRITE_BOOL_FIELD(use_hash); + WRITE_FLOAT_FIELD(rows, "%.0f"); +} + static void _outNestPath(StringInfo str, NestPath *node) { @@ -969,6 +981,16 @@ _outJoinInfo(StringInfo str, JoinInfo *node) WRITE_NODE_FIELD(jinfo_restrictinfo); } +static void +_outInClauseInfo(StringInfo str, InClauseInfo *node) +{ + WRITE_NODE_TYPE("INCLAUSEINFO"); + + WRITE_INTLIST_FIELD(lefthand); + WRITE_INTLIST_FIELD(righthand); + WRITE_NODE_FIELD(sub_targetlist); +} + /***************************************************************************** * * Stuff from parsenodes.h. @@ -1563,6 +1585,9 @@ _outNode(StringInfo str, void *obj) case T_MaterialPath: _outMaterialPath(str, obj); break; + case T_UniquePath: + _outUniquePath(str, obj); + break; case T_NestPath: _outNestPath(str, obj); break; @@ -1581,6 +1606,9 @@ _outNode(StringInfo str, void *obj) case T_JoinInfo: _outJoinInfo(str, obj); break; + case T_InClauseInfo: + _outInClauseInfo(str, obj); + break; case T_CreateStmt: _outCreateStmt(str, obj); diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 955e022d8f6..ef086992e8b 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -263,6 +263,7 @@ RelOptInfo - a relation or joined relations AppendPath - append multiple subpaths together ResultPath - a Result plan node (used for variable-free tlist or qual) MaterialPath - a Material plan node + UniquePath - remove duplicate rows NestPath - nested-loop joins MergePath - merge joins HashPath - hash joins diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 91b6c75e8e2..d53a160a4eb 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.60 2002/12/16 21:30:29 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.61 2003/01/20 18:54:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,8 +22,8 @@ #include "postgres.h" #include <float.h> -#include <math.h> #include <limits.h> +#include <math.h> #include "optimizer/geqo.h" #include "optimizer/pathnode.h" @@ -91,7 +91,10 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) * XXX geqo does not currently support optimization for partial result * retrieval --- how to fix? */ - fitness = joinrel->cheapest_total_path->total_cost; + if (joinrel) + fitness = joinrel->cheapest_total_path->total_cost; + else + fitness = DBL_MAX; /* restore join_rel_list */ root->join_rel_list = savelist; @@ -113,7 +116,7 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) * 'tour' is the proposed join order, of length 'num_gene' * * Returns a new join relation whose cheapest path is the best plan for - * this join order. + * this join order. NB: will return NULL if join order is invalid. * * Note that at each step we consider using the next rel as both left and * right side of a join. However, we cannot build general ("bushy") plan @@ -154,6 +157,10 @@ gimme_tree(Query *root, List *initial_rels, */ new_rel = make_join_rel(root, joinrel, inner_rel, JOIN_INNER); + /* Fail if join order is not valid */ + if (new_rel == NULL) + return NULL; + /* Find and save the cheapest paths for this rel */ set_cheapest(new_rel); diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c index c9993680b50..c517652dab6 100644 --- a/src/backend/optimizer/geqo/geqo_main.c +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.33 2002/12/16 21:30:29 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.34 2003/01/20 18:54:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -228,20 +228,25 @@ geqo(Query *root, int number_of_rels, List *initial_rels) #endif -/* got the cheapest query tree processed by geqo; - first element of the population indicates the best query tree */ - + /* + * got the cheapest query tree processed by geqo; + * first element of the population indicates the best query tree + */ best_tour = (Gene *) pool->data[0].string; -/* root->join_rel_list will be modified during this ! */ + /* root->join_rel_list will be modified during this ! */ best_rel = gimme_tree(root, initial_rels, best_tour, pool->string_length); -/* DBG: show the query plan -print_plan(best_plan, root); - DBG */ + if (best_rel == NULL) + elog(ERROR, "geqo: failed to make a valid plan"); + + /* DBG: show the query plan */ +#ifdef NOT_USED + print_plan(best_plan, root); +#endif -/* ... free memory stuff */ + /* ... free memory stuff */ free_chromo(momma); free_chromo(daddy); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c0b3ab40da1..f85144b8184 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.94 2003/01/20 18:54:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -750,6 +750,10 @@ print_path(Query *root, Path *path, int indent) ptype = "Material"; subpath = ((MaterialPath *) path)->subpath; break; + case T_UniquePath: + ptype = "Unique"; + subpath = ((UniquePath *) path)->subpath; + break; case T_NestPath: ptype = "NestLoop"; join = true; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index efd80dff1ed..5146517132f 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.100 2003/01/15 19:35:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.101 2003/01/20 18:54:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1024,12 +1024,17 @@ cost_hashjoin(Path *path, Query *root, * Bias against putting larger relation on inside. We don't want an * absolute prohibition, though, since larger relation might have * better bucketsize --- and we can't trust the size estimates - * unreservedly, anyway. Instead, inflate the startup cost by the + * unreservedly, anyway. Instead, inflate the run cost by the * square root of the size ratio. (Why square root? No real good * reason, but it seems reasonable...) + * + * Note: before 7.4 we implemented this by inflating startup cost; + * but if there's a disable_cost component in the input paths' + * startup cost, that unfairly penalizes the hash. Probably it'd + * be better to keep track of disable penalty separately from cost. */ if (innerbytes > outerbytes && outerbytes > 0) - startup_cost *= sqrt(innerbytes / outerbytes); + run_cost *= sqrt(innerbytes / outerbytes); path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost; @@ -1492,22 +1497,26 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, JoinType jointype, List *restrictlist) { + Selectivity selec; double temp; - - /* Start with the Cartesian product */ - temp = outer_rel->rows * inner_rel->rows; + UniquePath *upath; /* - * Apply join restrictivity. Note that we are only considering + * Compute joinclause selectivity. Note that we are only considering * clauses that become restriction clauses at this join level; we are * not double-counting them because they were not considered in * estimating the sizes of the component rels. */ - temp *= restrictlist_selectivity(root, + selec = restrictlist_selectivity(root, restrictlist, 0); /* + * Normally, we multiply size of Cartesian product by selectivity. + * But for JOIN_IN, we just multiply the lefthand size by the selectivity + * (is that really right?). For UNIQUE_OUTER or UNIQUE_INNER, use + * the estimated number of distinct rows (again, is that right?) + * * If we are doing an outer join, take that into account: the output * must be at least as large as the non-nullable input. (Is there any * chance of being even smarter?) @@ -1515,24 +1524,45 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, switch (jointype) { case JOIN_INNER: + temp = outer_rel->rows * inner_rel->rows * selec; break; case JOIN_LEFT: + temp = outer_rel->rows * inner_rel->rows * selec; if (temp < outer_rel->rows) temp = outer_rel->rows; break; case JOIN_RIGHT: + temp = outer_rel->rows * inner_rel->rows * selec; if (temp < inner_rel->rows) temp = inner_rel->rows; break; case JOIN_FULL: + temp = outer_rel->rows * inner_rel->rows * selec; if (temp < outer_rel->rows) temp = outer_rel->rows; if (temp < inner_rel->rows) temp = inner_rel->rows; break; + case JOIN_IN: + temp = outer_rel->rows * selec; + break; + case JOIN_REVERSE_IN: + temp = inner_rel->rows * selec; + break; + case JOIN_UNIQUE_OUTER: + upath = create_unique_path(root, outer_rel, + outer_rel->cheapest_total_path); + temp = upath->rows * inner_rel->rows * selec; + break; + case JOIN_UNIQUE_INNER: + upath = create_unique_path(root, inner_rel, + inner_rel->cheapest_total_path); + temp = outer_rel->rows * upath->rows * selec; + break; default: elog(ERROR, "set_joinrel_size_estimates: unsupported join type %d", (int) jointype); + temp = 0; /* keep compiler quiet */ break; } diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 7e68c41ef37..02a92fd9960 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.131 2003/01/15 19:35:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.132 2003/01/20 18:54:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1401,11 +1401,13 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, MemoryContext oldcontext; /* - * Nestloop only supports inner and left joins. + * Nestloop only supports inner, left, and IN joins. */ switch (jointype) { case JOIN_INNER: + case JOIN_IN: + case JOIN_UNIQUE_OUTER: isouterjoin = false; break; case JOIN_LEFT: diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 8a6fcd3f060..0cbe7bbf83b 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.75 2003/01/15 19:35:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.76 2003/01/20 18:54:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,13 +32,6 @@ static void match_unsorted_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype); - -#ifdef NOT_USED -static void match_unsorted_inner(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, List *mergeclause_list, - JoinType jointype); -#endif static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype); @@ -149,6 +142,8 @@ sort_inner_and_outer(Query *root, JoinType jointype) { bool useallclauses; + Path *outer_path; + Path *inner_path; List *all_pathkeys; List *i; @@ -160,6 +155,9 @@ sort_inner_and_outer(Query *root, { case JOIN_INNER: case JOIN_LEFT: + case JOIN_IN: + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: useallclauses = false; break; case JOIN_RIGHT: @@ -173,6 +171,28 @@ sort_inner_and_outer(Query *root, break; } + /* + * We only consider the cheapest-total-cost input paths, since we are + * assuming here that a sort is required. We will consider + * cheapest-startup-cost input paths later, and only if they don't + * need a sort. + * + * If unique-ification is requested, do it and then handle as a plain + * inner join. + */ + outer_path = outerrel->cheapest_total_path; + inner_path = innerrel->cheapest_total_path; + if (jointype == JOIN_UNIQUE_OUTER) + { + outer_path = (Path *) create_unique_path(root, outerrel, outer_path); + jointype = JOIN_INNER; + } + else if (jointype == JOIN_UNIQUE_INNER) + { + inner_path = (Path *) create_unique_path(root, innerrel, inner_path); + jointype = JOIN_INNER; + } + /* * Each possible ordering of the available mergejoin clauses will * generate a differently-sorted result path at essentially the same @@ -254,17 +274,14 @@ sort_inner_and_outer(Query *root, merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys); /* - * And now we can make the path. We only consider the cheapest- - * total-cost input paths, since we are assuming here that a sort - * is required. We will consider cheapest-startup-cost input - * paths later, and only if they don't need a sort. + * And now we can make the path. */ add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, - outerrel->cheapest_total_path, - innerrel->cheapest_total_path, + outer_path, + inner_path, restrictlist, merge_pathkeys, cur_mergeclauses, @@ -314,15 +331,18 @@ match_unsorted_outer(Query *root, List *mergeclause_list, JoinType jointype) { + JoinType save_jointype = jointype; bool nestjoinOK; bool useallclauses; + Path *inner_cheapest_startup = innerrel->cheapest_startup_path; + Path *inner_cheapest_total = innerrel->cheapest_total_path; Path *matpath = NULL; Path *bestinnerjoin = NULL; List *i; /* - * Nestloop only supports inner and left joins. Also, if we are doing - * a right or full join, we must use *all* the mergeclauses as join + * Nestloop only supports inner, left, and IN joins. Also, if we are + * doing a right or full join, we must use *all* the mergeclauses as join * clauses, else we will not have a valid plan. (Although these two * flags are currently inverses, keep them separate for clarity and * possible future changes.) @@ -331,6 +351,9 @@ match_unsorted_outer(Query *root, { case JOIN_INNER: case JOIN_LEFT: + case JOIN_IN: + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: nestjoinOK = true; useallclauses = false; break; @@ -347,18 +370,28 @@ match_unsorted_outer(Query *root, break; } - if (nestjoinOK) + /* + * If we need to unique-ify the inner path, we will consider only + * the cheapest inner. + */ + if (jointype == JOIN_UNIQUE_INNER) + { + inner_cheapest_total = (Path *) + create_unique_path(root, innerrel, inner_cheapest_total); + inner_cheapest_startup = inner_cheapest_total; + jointype = JOIN_INNER; + } + else if (nestjoinOK) { /* * If the cheapest inner path is a join or seqscan, we should consider * materializing it. (This is a heuristic: we could consider it * always, but for inner indexscans it's probably a waste of time.) */ - if (!(IsA(innerrel->cheapest_total_path, IndexPath) || - IsA(innerrel->cheapest_total_path, TidPath))) + if (!(IsA(inner_cheapest_total, IndexPath) || + IsA(inner_cheapest_total, TidPath))) matpath = (Path *) - create_material_path(innerrel, - innerrel->cheapest_total_path); + create_material_path(innerrel, inner_cheapest_total); /* * Get the best innerjoin indexpath (if any) for this outer rel. It's @@ -380,6 +413,18 @@ match_unsorted_outer(Query *root, int num_sortkeys; int sortkeycnt; + /* + * If we need to unique-ify the outer path, it's pointless to consider + * any but the cheapest outer. + */ + if (save_jointype == JOIN_UNIQUE_OUTER) + { + if (outerpath != outerrel->cheapest_total_path) + continue; + outerpath = (Path *) create_unique_path(root, outerrel, outerpath); + jointype = JOIN_INNER; + } + /* * The result will have this sort order (even if it is implemented * as a nestloop, and even if some of the mergeclauses are @@ -402,7 +447,7 @@ match_unsorted_outer(Query *root, joinrel, jointype, outerpath, - innerrel->cheapest_total_path, + inner_cheapest_total, restrictlist, merge_pathkeys)); if (matpath != NULL) @@ -414,14 +459,13 @@ match_unsorted_outer(Query *root, matpath, restrictlist, merge_pathkeys)); - if (innerrel->cheapest_startup_path != - innerrel->cheapest_total_path) + if (inner_cheapest_startup != inner_cheapest_total) add_path(joinrel, (Path *) create_nestloop_path(root, joinrel, jointype, outerpath, - innerrel->cheapest_startup_path, + inner_cheapest_startup, restrictlist, merge_pathkeys)); if (bestinnerjoin != NULL) @@ -435,6 +479,10 @@ match_unsorted_outer(Query *root, merge_pathkeys)); } + /* Can't do anything else if outer path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_OUTER) + continue; + /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_pathkeys(root, outerpath->pathkeys, @@ -455,27 +503,30 @@ match_unsorted_outer(Query *root, * Generate a mergejoin on the basis of sorting the cheapest * inner. Since a sort will be needed, only cheapest total cost * matters. (But create_mergejoin_path will do the right thing if - * innerrel->cheapest_total_path is already correctly sorted.) + * inner_cheapest_total is already correctly sorted.) */ add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, outerpath, - innerrel->cheapest_total_path, + inner_cheapest_total, restrictlist, merge_pathkeys, mergeclauses, NIL, innersortkeys)); + /* Can't do anything else if inner path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_INNER) + continue; + /* * Look for presorted inner paths that satisfy the innersortkey * list --- or any truncation thereof, if we are allowed to build * a mergejoin using a subset of the merge clauses. Here, we * consider both cheap startup cost and cheap total cost. Ignore - * innerrel->cheapest_total_path, since we already made a path - * with it. + * inner_cheapest_total, since we already made a path with it. */ num_sortkeys = length(innersortkeys); if (num_sortkeys > 1 && !useallclauses) @@ -500,7 +551,7 @@ match_unsorted_outer(Query *root, trialsortkeys, TOTAL_COST); if (innerpath != NULL && - innerpath != innerrel->cheapest_total_path && + innerpath != inner_cheapest_total && (cheapest_total_inner == NULL || compare_path_costs(innerpath, cheapest_total_inner, TOTAL_COST) < 0)) @@ -535,7 +586,7 @@ match_unsorted_outer(Query *root, trialsortkeys, STARTUP_COST); if (innerpath != NULL && - innerpath != innerrel->cheapest_total_path && + innerpath != inner_cheapest_total && (cheapest_startup_inner == NULL || compare_path_costs(innerpath, cheapest_startup_inner, STARTUP_COST) < 0)) @@ -584,146 +635,6 @@ match_unsorted_outer(Query *root, } } -#ifdef NOT_USED - -/* - * match_unsorted_inner - * Generate mergejoin paths that use an explicit sort of the outer path - * with an already-ordered inner path. - * - * 'joinrel' is the join result relation - * 'outerrel' is the outer join relation - * 'innerrel' is the inner join relation - * 'restrictlist' contains all of the RestrictInfo nodes for restriction - * clauses that apply to this join - * 'mergeclause_list' is a list of RestrictInfo nodes for available - * mergejoin clauses in this join - * 'jointype' is the type of join to do - */ -static void -match_unsorted_inner(Query *root, - RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - List *mergeclause_list, - JoinType jointype) -{ - bool useallclauses; - List *i; - - switch (jointype) - { - case JOIN_INNER: - case JOIN_LEFT: - useallclauses = false; - break; - case JOIN_RIGHT: - case JOIN_FULL: - useallclauses = true; - break; - default: - elog(ERROR, "match_unsorted_inner: unexpected join type %d", - (int) jointype); - useallclauses = false; /* keep compiler quiet */ - break; - } - - foreach(i, innerrel->pathlist) - { - Path *innerpath = (Path *) lfirst(i); - List *mergeclauses; - List *outersortkeys; - List *merge_pathkeys; - Path *totalouterpath; - Path *startupouterpath; - - /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(root, - innerpath->pathkeys, - mergeclause_list); - - /* Done with this inner path if no chance for a mergejoin */ - if (mergeclauses == NIL) - continue; - if (useallclauses && length(mergeclauses) != length(mergeclause_list)) - continue; - - /* Compute the required ordering of the outer path */ - outersortkeys = make_pathkeys_for_mergeclauses(root, - mergeclauses, - outerrel); - - /* - * Generate a mergejoin on the basis of sorting the cheapest - * outer. Since a sort will be needed, only cheapest total cost - * matters. - */ - merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys); - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - outerrel->cheapest_total_path, - innerpath, - restrictlist, - merge_pathkeys, - mergeclauses, - outersortkeys, - NIL)); - - /* - * Now generate mergejoins based on already-sufficiently-ordered - * outer paths. There's likely to be some redundancy here with - * paths already generated by merge_unsorted_outer ... but since - * merge_unsorted_outer doesn't consider all permutations of the - * mergeclause list, it may fail to notice that this particular - * innerpath could have been used with this outerpath. - */ - totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist, - outersortkeys, - TOTAL_COST); - if (totalouterpath == NULL) - continue; /* there won't be a startup-cost path - * either */ - - merge_pathkeys = build_join_pathkeys(root, joinrel, - totalouterpath->pathkeys); - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - totalouterpath, - innerpath, - restrictlist, - merge_pathkeys, - mergeclauses, - NIL, - NIL)); - - startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist, - outersortkeys, - STARTUP_COST); - if (startupouterpath != NULL && startupouterpath != totalouterpath) - { - merge_pathkeys = build_join_pathkeys(root, joinrel, - startupouterpath->pathkeys); - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - startupouterpath, - innerpath, - restrictlist, - merge_pathkeys, - mergeclauses, - NIL, - NIL)); - } - } -} -#endif - /* * hash_inner_and_outer * Create hashjoin join paths by explicitly hashing both the outer and @@ -749,11 +660,14 @@ hash_inner_and_outer(Query *root, List *i; /* - * Hashjoin only supports inner and left joins. + * Hashjoin only supports inner, left, and IN joins. */ switch (jointype) { case JOIN_INNER: + case JOIN_IN: + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: isouterjoin = false; break; case JOIN_LEFT: @@ -813,21 +727,40 @@ hash_inner_and_outer(Query *root, * cheapest-startup-cost outer paths. There's no need to consider * any but the cheapest-total-cost inner path, however. */ + Path *cheapest_startup_outer = outerrel->cheapest_startup_path; + Path *cheapest_total_outer = outerrel->cheapest_total_path; + Path *cheapest_total_inner = innerrel->cheapest_total_path; + + /* Unique-ify if need be */ + if (jointype == JOIN_UNIQUE_OUTER) + { + cheapest_total_outer = (Path *) + create_unique_path(root, outerrel, cheapest_total_outer); + cheapest_startup_outer = cheapest_total_outer; + jointype = JOIN_INNER; + } + else if (jointype == JOIN_UNIQUE_INNER) + { + cheapest_total_inner = (Path *) + create_unique_path(root, innerrel, cheapest_total_inner); + jointype = JOIN_INNER; + } + add_path(joinrel, (Path *) create_hashjoin_path(root, joinrel, jointype, - outerrel->cheapest_total_path, - innerrel->cheapest_total_path, + cheapest_total_outer, + cheapest_total_inner, restrictlist, hashclauses)); - if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path) + if (cheapest_startup_outer != cheapest_total_outer) add_path(joinrel, (Path *) create_hashjoin_path(root, joinrel, jointype, - outerrel->cheapest_startup_path, - innerrel->cheapest_total_path, + cheapest_startup_outer, + cheapest_total_inner, restrictlist, hashclauses)); } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 037733d5d72..704afda37f8 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.58 2002/12/16 21:30:30 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.59 2003/01/20 18:54:51 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -172,7 +172,7 @@ make_rels_by_joins(Query *root, int level, List **joinrels) jrel = make_join_rel(root, old_rel, new_rel, JOIN_INNER); /* Avoid making duplicate entries ... */ - if (!ptrMember(jrel, result_rels)) + if (jrel && !ptrMember(jrel, result_rels)) result_rels = lcons(jrel, result_rels); break; /* need not consider more * joininfos */ @@ -276,10 +276,9 @@ make_rels_by_clause_joins(Query *root, /* * Avoid entering same joinrel into our output list more - * than once. (make_rels_by_joins doesn't really care, - * but GEQO does.) + * than once. */ - if (!ptrMember(jrel, result)) + if (jrel && !ptrMember(jrel, result)) result = lcons(jrel, result); } } @@ -323,7 +322,8 @@ make_rels_by_clauseless_joins(Query *root, * As long as given other_rels are distinct, don't need to * test to see if jrel is already part of output list. */ - result = lcons(jrel, result); + if (jrel) + result = lcons(jrel, result); } } @@ -367,6 +367,9 @@ make_jointree_rel(Query *root, Node *jtnode) /* Make this join rel */ rel = make_join_rel(root, lrel, rrel, j->jointype); + if (rel == NULL) + elog(ERROR, "make_jointree_rel: invalid join order!?"); + /* * Since we are only going to consider this one way to do it, * we're done generating Paths for this joinrel and can now select @@ -395,19 +398,121 @@ make_jointree_rel(Query *root, Node *jtnode) * created with the two rels as outer and inner rel. * (The join rel may already contain paths generated from other * pairs of rels that add up to the same set of base rels.) + * + * NB: will return NULL if attempted join is not valid. This can only + * happen when working with IN clauses that have been turned into joins. */ RelOptInfo * make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype) { + List *joinrelids; RelOptInfo *joinrel; List *restrictlist; + /* We should never try to join two overlapping sets of rels. */ + Assert(nonoverlap_setsi(rel1->relids, rel2->relids)); + + /* Construct Relids set that identifies the joinrel. */ + joinrelids = nconc(listCopy(rel1->relids), listCopy(rel2->relids)); + + /* + * If we are implementing IN clauses as joins, there are some joins + * that are illegal. Check to see if the proposed join is trouble. + * We can skip the work if looking at an outer join, however, because + * only top-level joins might be affected. + */ + if (jointype == JOIN_INNER) + { + List *l; + + foreach(l, root->in_info_list) + { + InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); + + /* + * Cannot join if proposed join contains part, but only + * part, of the RHS, *and* it contains rels not in the RHS. + * + * Singleton RHS cannot be a problem, so skip expensive tests. + */ + if (length(ininfo->righthand) > 1 && + overlap_setsi(ininfo->righthand, joinrelids) && + !is_subseti(ininfo->righthand, joinrelids) && + !is_subseti(joinrelids, ininfo->righthand)) + { + freeList(joinrelids); + return NULL; + } + + /* + * No issue unless we are looking at a join of the IN's RHS + * to other stuff. + */ + if (! (length(ininfo->righthand) < length(joinrelids) && + is_subseti(ininfo->righthand, joinrelids))) + continue; + /* + * If we already joined IN's RHS to any part of its LHS in either + * input path, then this join is not constrained (the necessary + * work was done at a lower level). + */ + if (overlap_setsi(ininfo->lefthand, rel1->relids) && + is_subseti(ininfo->righthand, rel1->relids)) + continue; + if (overlap_setsi(ininfo->lefthand, rel2->relids) && + is_subseti(ininfo->righthand, rel2->relids)) + continue; + /* + * JOIN_IN technique will work if outerrel includes LHS and + * innerrel is exactly RHS; conversely JOIN_REVERSE_IN handles + * RHS/LHS. + * + * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS; + * conversely JOIN_UNIQUE_INNER will work if innerrel is + * exactly RHS. + * + * But none of these will work if we already found another IN + * that needs to trigger here. + */ + if (jointype != JOIN_INNER) + { + freeList(joinrelids); + return NULL; + } + if (is_subseti(ininfo->lefthand, rel1->relids) && + sameseti(ininfo->righthand, rel2->relids)) + { + jointype = JOIN_IN; + } + else if (is_subseti(ininfo->lefthand, rel2->relids) && + sameseti(ininfo->righthand, rel1->relids)) + { + jointype = JOIN_REVERSE_IN; + } + else if (sameseti(ininfo->righthand, rel1->relids)) + { + jointype = JOIN_UNIQUE_OUTER; + } + else if (sameseti(ininfo->righthand, rel2->relids)) + { + jointype = JOIN_UNIQUE_INNER; + } + else + { + /* invalid join path */ + freeList(joinrelids); + return NULL; + } + } + } + /* * Find or build the join RelOptInfo, and compute the restrictlist * that goes with this particular joining. */ - joinrel = build_join_rel(root, rel1, rel2, jointype, &restrictlist); + joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype, + &restrictlist); /* * Consider paths using each rel as both outer and inner. @@ -438,11 +543,43 @@ make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2, add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT, restrictlist); break; + case JOIN_IN: + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN, + restrictlist); + /* REVERSE_IN isn't supported by joinpath.c */ + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER, + restrictlist); + break; + case JOIN_REVERSE_IN: + /* REVERSE_IN isn't supported by joinpath.c */ + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER, + restrictlist); + break; + case JOIN_UNIQUE_OUTER: + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER, + restrictlist); + break; + case JOIN_UNIQUE_INNER: + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER, + restrictlist); + break; default: elog(ERROR, "make_join_rel: unsupported join type %d", (int) jointype); break; } + freeList(joinrelids); + return joinrel; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index f6e51d0d52f..b7b1204e76e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.131 2003/01/15 23:10:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.132 2003/01/20 18:54:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,6 +26,7 @@ #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "optimizer/var.h" +#include "parser/parse_clause.h" #include "parser/parse_expr.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -36,6 +37,7 @@ static Join *create_join_plan(Query *root, JoinPath *best_path); static Append *create_append_plan(Query *root, AppendPath *best_path); static Result *create_result_plan(Query *root, ResultPath *best_path); static Material *create_material_plan(Query *root, MaterialPath *best_path); +static Plan *create_unique_plan(Query *root, UniquePath *best_path); static SeqScan *create_seqscan_plan(Path *best_path, List *tlist, List *scan_clauses); static IndexScan *create_indexscan_plan(Query *root, IndexPath *best_path, @@ -146,6 +148,10 @@ create_plan(Query *root, Path *best_path) plan = (Plan *) create_material_plan(root, (MaterialPath *) best_path); break; + case T_Unique: + plan = (Plan *) create_unique_plan(root, + (UniquePath *) best_path); + break; default: elog(ERROR, "create_plan: unknown pathtype %d", best_path->pathtype); @@ -399,6 +405,97 @@ create_material_plan(Query *root, MaterialPath *best_path) return plan; } +/* + * create_unique_plan + * Create a Unique plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * Returns a Plan node. + */ +static Plan * +create_unique_plan(Query *root, UniquePath *best_path) +{ + Plan *plan; + Plan *subplan; + List *sub_targetlist; + List *l; + + subplan = create_plan(root, best_path->subpath); + + /* + * If the subplan came from an IN subselect (currently always the case), + * we need to instantiate the correct output targetlist for the subselect, + * rather than using the flattened tlist. + */ + sub_targetlist = NIL; + foreach(l, root->in_info_list) + { + InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); + + if (sameseti(ininfo->righthand, best_path->path.parent->relids)) + { + sub_targetlist = ininfo->sub_targetlist; + break; + } + } + + if (sub_targetlist) + { + /* + * Transform list of plain Vars into targetlist + */ + List *newtlist = NIL; + int resno = 1; + + foreach(l, sub_targetlist) + { + Node *tlexpr = lfirst(l); + TargetEntry *tle; + + tle = makeTargetEntry(makeResdom(resno, + exprType(tlexpr), + exprTypmod(tlexpr), + NULL, + false), + (Expr *) tlexpr); + newtlist = lappend(newtlist, tle); + resno++; + } + /* + * If the top plan node can't do projections, we need to add a + * Result node to help it along. + * + * Currently, the only non-projection-capable plan type + * we can see here is Append. + */ + if (IsA(subplan, Append)) + subplan = (Plan *) make_result(newtlist, NULL, subplan); + else + subplan->targetlist = newtlist; + } + + if (best_path->use_hash) + { + elog(ERROR, "create_unique_plan: hash case not implemented yet"); + plan = NULL; + } + else + { + List *sort_tlist; + List *sortList; + + sort_tlist = new_unsorted_tlist(subplan->targetlist); + sortList = addAllTargetsToSortList(NIL, sort_tlist); + plan = (Plan *) make_sort_from_sortclauses(root, sort_tlist, + subplan, sortList); + plan = (Plan *) make_unique(sort_tlist, plan, sortList); + } + + plan->plan_rows = best_path->rows; + + return plan; +} + /***************************************************************************** * @@ -1548,6 +1645,52 @@ make_sort_from_pathkeys(Query *root, Plan *lefttree, return make_sort(root, sort_tlist, lefttree, numsortkeys); } +/* + * make_sort_from_sortclauses + * Create sort plan to sort according to given sortclauses + * + * 'tlist' is the targetlist + * 'lefttree' is the node which yields input tuples + * 'sortcls' is a list of SortClauses + */ +Sort * +make_sort_from_sortclauses(Query *root, List *tlist, + Plan *lefttree, List *sortcls) +{ + List *sort_tlist; + List *i; + int keyno = 0; + + /* + * First make a copy of the tlist so that we don't corrupt the + * original. + */ + sort_tlist = new_unsorted_tlist(tlist); + + foreach(i, sortcls) + { + SortClause *sortcl = (SortClause *) lfirst(i); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, sort_tlist); + Resdom *resdom = tle->resdom; + + /* + * Check for the possibility of duplicate order-by clauses --- the + * parser should have removed 'em, but the executor will get + * terribly confused if any get through! + */ + if (resdom->reskey == 0) + { + /* OK, insert the ordering info needed by the executor. */ + resdom->reskey = ++keyno; + resdom->reskeyop = sortcl->sortop; + } + } + + Assert(keyno > 0); + + return make_sort(root, sort_tlist, lefttree, keyno); +} + Material * make_material(List *tlist, Plan *lefttree) { diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 87c77e52fc3..037ed3314cf 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,13 +8,12 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.81 2003/01/15 19:35:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.82 2003/01/20 18:54:52 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" - #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" @@ -579,6 +578,11 @@ distribute_qual_to_rels(Query *root, Node *clause, * the appropriate joininfo list (creating a new list and adding it to the * appropriate rel node if necessary). * + * Note that the same copy of the restrictinfo node is linked to by all the + * lists it is in. This allows us to exploit caching of information about + * the restriction clause (but we must be careful that the information does + * not depend on context). + * * 'restrictinfo' describes the join clause * 'join_relids' is the list of relations participating in the join clause */ @@ -602,12 +606,13 @@ add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, if (lfirsti(otherrel) != cur_relid) unjoined_relids = lappendi(unjoined_relids, lfirsti(otherrel)); } + Assert(unjoined_relids != NIL); /* * Find or make the joininfo node for this combination of rels, * and add the restrictinfo node to it. */ - joininfo = find_joininfo_node(find_base_rel(root, cur_relid), + joininfo = make_joininfo_node(find_base_rel(root, cur_relid), unjoined_relids); joininfo->jinfo_restrictinfo = lappend(joininfo->jinfo_restrictinfo, restrictinfo); @@ -731,7 +736,7 @@ exprs_known_equal(Query *root, Node *item1, Node *item2) { JoinInfo *joininfo = find_joininfo_node(rel1, relids); - restrictlist = joininfo->jinfo_restrictinfo; + restrictlist = joininfo ? joininfo->jinfo_restrictinfo : NIL; } /* diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 6e265931eb2..daa840f789e 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.73 2003/01/15 19:35:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.74 2003/01/20 18:54:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -108,6 +108,8 @@ query_planner(Query *root, List *tlist, double tuple_fraction, /* * init planner lists to empty + * + * NOTE: in_info_list was set up by subquery_planner, do not touch here */ root->base_rel_list = NIL; root->other_rel_list = NIL; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index cd5d266e07a..388380f8843 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.140 2003/01/17 03:25:03 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.141 2003/01/20 18:54:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,24 +38,17 @@ #include "parser/parsetree.h" #include "parser/parse_expr.h" #include "parser/parse_oper.h" -#include "rewrite/rewriteManip.h" -#include "utils/lsyscache.h" #include "utils/selfuncs.h" #include "utils/syscache.h" /* Expression kind codes for preprocess_expression */ -#define EXPRKIND_TARGET 0 -#define EXPRKIND_WHERE 1 -#define EXPRKIND_HAVING 2 +#define EXPRKIND_QUAL 0 +#define EXPRKIND_TARGET 1 +#define EXPRKIND_RTFUNC 2 +#define EXPRKIND_ININFO 3 -static Node *pull_up_subqueries(Query *parse, Node *jtnode, - bool below_outer_join); -static bool is_simple_subquery(Query *subquery); -static bool has_nullable_targetlist(Query *subquery); -static void resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist); -static Node *preprocess_jointree(Query *parse, Node *jtnode); static Node *preprocess_expression(Query *parse, Node *expr, int kind); static void preprocess_qual_conditions(Query *parse, Node *jtnode); static Plan *inheritance_planner(Query *parse, List *inheritlist); @@ -155,6 +148,17 @@ subquery_planner(Query *parse, double tuple_fraction) PlannerQueryLevel++; PlannerInitPlan = NIL; + /* + * Look for IN clauses at the top level of WHERE, and transform them + * into joins. Note that this step only handles IN clauses originally + * at top level of WHERE; if we pull up any subqueries in the next step, + * their INs are processed just before pulling them up. + */ + parse->in_info_list = NIL; + if (parse->hasSubLinks) + parse->jointree->quals = pull_up_IN_clauses(parse, + parse->jointree->quals); + /* * Check to see if any subqueries in the rangetable can be merged into * this query. @@ -195,7 +199,11 @@ subquery_planner(Query *parse, double tuple_fraction) preprocess_qual_conditions(parse, (Node *) parse->jointree); parse->havingQual = preprocess_expression(parse, parse->havingQual, - EXPRKIND_HAVING); + EXPRKIND_QUAL); + + parse->in_info_list = (List *) + preprocess_expression(parse, (Node *) parse->in_info_list, + EXPRKIND_ININFO); /* Also need to preprocess expressions for function RTEs */ foreach(lst, parse->rtable) @@ -204,8 +212,7 @@ subquery_planner(Query *parse, double tuple_fraction) if (rte->rtekind == RTE_FUNCTION) rte->funcexpr = preprocess_expression(parse, rte->funcexpr, - EXPRKIND_TARGET); - /* These are not targetlist items, but close enough... */ + EXPRKIND_RTFUNC); } /* @@ -295,427 +302,6 @@ subquery_planner(Query *parse, double tuple_fraction) return plan; } -/* - * pull_up_subqueries - * Look for subqueries in the rangetable that can be pulled up into - * the parent query. If the subquery has no special features like - * grouping/aggregation then we can merge it into the parent's jointree. - * - * below_outer_join is true if this jointree node is within the nullable - * side of an outer join. This restricts what we can do. - * - * A tricky aspect of this code is that if we pull up a subquery we have - * to replace Vars that reference the subquery's outputs throughout the - * parent query, including quals attached to jointree nodes above the one - * we are currently processing! We handle this by being careful not to - * change the jointree structure while recursing: no nodes other than - * subquery RangeTblRef entries will be replaced. Also, we can't turn - * ResolveNew loose on the whole jointree, because it'll return a mutated - * copy of the tree; we have to invoke it just on the quals, instead. - */ -static Node * -pull_up_subqueries(Query *parse, Node *jtnode, bool below_outer_join) -{ - if (jtnode == NULL) - return NULL; - if (IsA(jtnode, RangeTblRef)) - { - int varno = ((RangeTblRef *) jtnode)->rtindex; - RangeTblEntry *rte = rt_fetch(varno, parse->rtable); - Query *subquery = rte->subquery; - - /* - * Is this a subquery RTE, and if so, is the subquery simple - * enough to pull up? (If not, do nothing at this node.) - * - * If we are inside an outer join, only pull up subqueries whose - * targetlists are nullable --- otherwise substituting their tlist - * entries for upper Var references would do the wrong thing (the - * results wouldn't become NULL when they're supposed to). XXX - * This could be improved by generating pseudo-variables for such - * expressions; we'd have to figure out how to get the pseudo- - * variables evaluated at the right place in the modified plan - * tree. Fix it someday. - * - * Note: even if the subquery itself is simple enough, we can't pull - * it up if there is a reference to its whole tuple result. - * Perhaps a pseudo-variable is the answer here too. - */ - if (rte->rtekind == RTE_SUBQUERY && is_simple_subquery(subquery) && - (!below_outer_join || has_nullable_targetlist(subquery)) && - !contain_whole_tuple_var((Node *) parse, varno, 0)) - { - int rtoffset; - List *subtlist; - List *rt; - - /* - * First, recursively pull up the subquery's subqueries, so - * that this routine's processing is complete for its jointree - * and rangetable. NB: if the same subquery is referenced - * from multiple jointree items (which can't happen normally, - * but might after rule rewriting), then we will invoke this - * processing multiple times on that subquery. OK because - * nothing will happen after the first time. We do have to be - * careful to copy everything we pull up, however, or risk - * having chunks of structure multiply linked. - * - * Note: 'false' is correct here even if we are within an outer - * join in the upper query; the lower query starts with a clean - * slate for outer-join semantics. - */ - subquery->jointree = (FromExpr *) - pull_up_subqueries(subquery, (Node *) subquery->jointree, - false); - - /* - * Now make a modifiable copy of the subquery that we can run - * OffsetVarNodes and IncrementVarSublevelsUp on. - */ - subquery = copyObject(subquery); - - /* - * Adjust level-0 varnos in subquery so that we can append its - * rangetable to upper query's. - */ - rtoffset = length(parse->rtable); - OffsetVarNodes((Node *) subquery, rtoffset, 0); - - /* - * Upper-level vars in subquery are now one level closer to their - * parent than before. - */ - IncrementVarSublevelsUp((Node *) subquery, -1, 1); - - /* - * Replace all of the top query's references to the subquery's - * outputs with copies of the adjusted subtlist items, being - * careful not to replace any of the jointree structure. - * (This'd be a lot cleaner if we could use - * query_tree_mutator.) - */ - subtlist = subquery->targetList; - parse->targetList = (List *) - ResolveNew((Node *) parse->targetList, - varno, 0, subtlist, CMD_SELECT, 0); - resolvenew_in_jointree((Node *) parse->jointree, varno, subtlist); - Assert(parse->setOperations == NULL); - parse->havingQual = - ResolveNew(parse->havingQual, - varno, 0, subtlist, CMD_SELECT, 0); - - foreach(rt, parse->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); - - if (rte->rtekind == RTE_JOIN) - rte->joinaliasvars = (List *) - ResolveNew((Node *) rte->joinaliasvars, - varno, 0, subtlist, CMD_SELECT, 0); - } - - /* - * Now append the adjusted rtable entries to upper query. (We - * hold off until after fixing the upper rtable entries; no - * point in running that code on the subquery ones too.) - */ - parse->rtable = nconc(parse->rtable, subquery->rtable); - - /* - * Pull up any FOR UPDATE markers, too. (OffsetVarNodes - * already adjusted the marker values, so just nconc the - * list.) - */ - parse->rowMarks = nconc(parse->rowMarks, subquery->rowMarks); - - /* - * Miscellaneous housekeeping. - */ - parse->hasSubLinks |= subquery->hasSubLinks; - /* subquery won't be pulled up if it hasAggs, so no work there */ - - /* - * Return the adjusted subquery jointree to replace the - * RangeTblRef entry in my jointree. - */ - return (Node *) subquery->jointree; - } - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *l; - - foreach(l, f->fromlist) - lfirst(l) = pull_up_subqueries(parse, lfirst(l), - below_outer_join); - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - - /* Recurse, being careful to tell myself when inside outer join */ - switch (j->jointype) - { - case JOIN_INNER: - j->larg = pull_up_subqueries(parse, j->larg, - below_outer_join); - j->rarg = pull_up_subqueries(parse, j->rarg, - below_outer_join); - break; - case JOIN_LEFT: - j->larg = pull_up_subqueries(parse, j->larg, - below_outer_join); - j->rarg = pull_up_subqueries(parse, j->rarg, - true); - break; - case JOIN_FULL: - j->larg = pull_up_subqueries(parse, j->larg, - true); - j->rarg = pull_up_subqueries(parse, j->rarg, - true); - break; - case JOIN_RIGHT: - j->larg = pull_up_subqueries(parse, j->larg, - true); - j->rarg = pull_up_subqueries(parse, j->rarg, - below_outer_join); - break; - case JOIN_UNION: - - /* - * This is where we fail if upper levels of planner - * haven't rewritten UNION JOIN as an Append ... - */ - elog(ERROR, "UNION JOIN is not implemented yet"); - break; - default: - elog(ERROR, "pull_up_subqueries: unexpected join type %d", - j->jointype); - break; - } - } - else - elog(ERROR, "pull_up_subqueries: unexpected node type %d", - nodeTag(jtnode)); - return jtnode; -} - -/* - * is_simple_subquery - * Check a subquery in the range table to see if it's simple enough - * to pull up into the parent query. - */ -static bool -is_simple_subquery(Query *subquery) -{ - /* - * Let's just make sure it's a valid subselect ... - */ - if (!IsA(subquery, Query) || - subquery->commandType != CMD_SELECT || - subquery->resultRelation != 0 || - subquery->into != NULL || - subquery->isPortal) - elog(ERROR, "is_simple_subquery: subquery is bogus"); - - /* - * Can't currently pull up a query with setops. Maybe after querytree - * redesign... - */ - if (subquery->setOperations) - return false; - - /* - * Can't pull up a subquery involving grouping, aggregation, sorting, - * or limiting. - */ - if (subquery->hasAggs || - subquery->groupClause || - subquery->havingQual || - subquery->sortClause || - subquery->distinctClause || - subquery->limitOffset || - subquery->limitCount) - return false; - - /* - * Don't pull up a subquery that has any set-returning functions in - * its targetlist. Otherwise we might well wind up inserting - * set-returning functions into places where they mustn't go, such as - * quals of higher queries. - */ - if (expression_returns_set((Node *) subquery->targetList)) - return false; - - /* - * Hack: don't try to pull up a subquery with an empty jointree. - * query_planner() will correctly generate a Result plan for a - * jointree that's totally empty, but I don't think the right things - * happen if an empty FromExpr appears lower down in a jointree. Not - * worth working hard on this, just to collapse SubqueryScan/Result - * into Result... - */ - if (subquery->jointree->fromlist == NIL) - return false; - - return true; -} - -/* - * has_nullable_targetlist - * Check a subquery in the range table to see if all the non-junk - * targetlist items are simple variables (and, hence, will correctly - * go to NULL when examined above the point of an outer join). - * - * A possible future extension is to accept strict functions of simple - * variables, eg, "x + 1". - */ -static bool -has_nullable_targetlist(Query *subquery) -{ - List *l; - - foreach(l, subquery->targetList) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - /* ignore resjunk columns */ - if (tle->resdom->resjunk) - continue; - - /* Okay if tlist item is a simple Var */ - if (tle->expr && IsA(tle->expr, Var)) - continue; - - return false; - } - return true; -} - -/* - * Helper routine for pull_up_subqueries: do ResolveNew on every expression - * in the jointree, without changing the jointree structure itself. Ugly, - * but there's no other way... - */ -static void -resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist) -{ - if (jtnode == NULL) - return; - if (IsA(jtnode, RangeTblRef)) - { - /* nothing to do here */ - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *l; - - foreach(l, f->fromlist) - resolvenew_in_jointree(lfirst(l), varno, subtlist); - f->quals = ResolveNew(f->quals, - varno, 0, subtlist, CMD_SELECT, 0); - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - - resolvenew_in_jointree(j->larg, varno, subtlist); - resolvenew_in_jointree(j->rarg, varno, subtlist); - j->quals = ResolveNew(j->quals, - varno, 0, subtlist, CMD_SELECT, 0); - - /* - * We don't bother to update the colvars list, since it won't be - * used again ... - */ - } - else - elog(ERROR, "resolvenew_in_jointree: unexpected node type %d", - nodeTag(jtnode)); -} - -/* - * preprocess_jointree - * Attempt to simplify a query's jointree. - * - * If we succeed in pulling up a subquery then we might form a jointree - * in which a FromExpr is a direct child of another FromExpr. In that - * case we can consider collapsing the two FromExprs into one. This is - * an optional conversion, since the planner will work correctly either - * way. But we may find a better plan (at the cost of more planning time) - * if we merge the two nodes. - * - * NOTE: don't try to do this in the same jointree scan that does subquery - * pullup! Since we're changing the jointree structure here, that wouldn't - * work reliably --- see comments for pull_up_subqueries(). - */ -static Node * -preprocess_jointree(Query *parse, Node *jtnode) -{ - if (jtnode == NULL) - return NULL; - if (IsA(jtnode, RangeTblRef)) - { - /* nothing to do here... */ - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *newlist = NIL; - List *l; - - foreach(l, f->fromlist) - { - Node *child = (Node *) lfirst(l); - - /* Recursively simplify the child... */ - child = preprocess_jointree(parse, child); - /* Now, is it a FromExpr? */ - if (child && IsA(child, FromExpr)) - { - /* - * Yes, so do we want to merge it into parent? Always do - * so if child has just one element (since that doesn't - * make the parent's list any longer). Otherwise we have - * to be careful about the increase in planning time - * caused by combining the two join search spaces into - * one. Our heuristic is to merge if the merge will - * produce a join list no longer than GEQO_RELS/2. - * (Perhaps need an additional user parameter?) - */ - FromExpr *subf = (FromExpr *) child; - int childlen = length(subf->fromlist); - int myothers = length(newlist) + length(lnext(l)); - - if (childlen <= 1 || (childlen + myothers) <= geqo_rels / 2) - { - newlist = nconc(newlist, subf->fromlist); - f->quals = make_and_qual(subf->quals, f->quals); - } - else - newlist = lappend(newlist, child); - } - else - newlist = lappend(newlist, child); - } - f->fromlist = newlist; - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - - /* Can't usefully change the JoinExpr, but recurse on children */ - j->larg = preprocess_jointree(parse, j->larg); - j->rarg = preprocess_jointree(parse, j->rarg); - } - else - elog(ERROR, "preprocess_jointree: unexpected node type %d", - nodeTag(jtnode)); - return jtnode; -} - /* * preprocess_expression * Do subquery_planner's preprocessing work for an expression, @@ -731,7 +317,7 @@ preprocess_expression(Query *parse, Node *expr, int kind) * else sublinks expanded out from join aliases wouldn't get processed. */ if (parse->hasJoinRTEs) - expr = flatten_join_alias_vars(expr, parse->rtable); + expr = flatten_join_alias_vars(parse, expr); /* * Simplify constant expressions. @@ -748,7 +334,7 @@ preprocess_expression(Query *parse, Node *expr, int kind) * XXX Is there any value in re-applying eval_const_expressions after * canonicalize_qual? */ - if (kind != EXPRKIND_TARGET) + if (kind == EXPRKIND_QUAL) { expr = (Node *) canonicalize_qual((Expr *) expr, true); @@ -760,7 +346,7 @@ preprocess_expression(Query *parse, Node *expr, int kind) /* Expand SubLinks to SubPlans */ if (parse->hasSubLinks) - expr = SS_process_sublinks(expr, (kind != EXPRKIND_TARGET)); + expr = SS_process_sublinks(expr, (kind == EXPRKIND_QUAL)); /* Replace uplevel vars with Param nodes */ if (PlannerQueryLevel > 1) @@ -791,7 +377,7 @@ preprocess_qual_conditions(Query *parse, Node *jtnode) foreach(l, f->fromlist) preprocess_qual_conditions(parse, lfirst(l)); - f->quals = preprocess_expression(parse, f->quals, EXPRKIND_WHERE); + f->quals = preprocess_expression(parse, f->quals, EXPRKIND_QUAL); } else if (IsA(jtnode, JoinExpr)) { @@ -800,7 +386,7 @@ preprocess_qual_conditions(Query *parse, Node *jtnode) preprocess_qual_conditions(parse, j->larg); preprocess_qual_conditions(parse, j->rarg); - j->quals = preprocess_expression(parse, j->quals, EXPRKIND_WHERE); + j->quals = preprocess_expression(parse, j->quals, EXPRKIND_QUAL); } else elog(ERROR, "preprocess_qual_conditions: unexpected node type %d", @@ -1251,12 +837,16 @@ grouping_planner(Query *parse, double tuple_fraction) */ if (parse->groupClause) { + List *groupExprs; + /* * Always estimate the number of groups. We can't do this until * after running query_planner(), either. */ + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); dNumGroups = estimate_num_groups(parse, - parse->groupClause, + groupExprs, cheapest_path->parent->rows); /* Also want it as a long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); @@ -1552,8 +1142,10 @@ grouping_planner(Query *parse, double tuple_fraction) if (parse->sortClause) { if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys)) - result_plan = make_sortplan(parse, tlist, result_plan, - parse->sortClause); + result_plan = (Plan *) make_sort_from_sortclauses(parse, + tlist, + result_plan, + parse->sortClause); } /* @@ -1570,9 +1162,15 @@ grouping_planner(Query *parse, double tuple_fraction) * comparable to GROUP BY. */ if (!parse->groupClause && !parse->hasAggs) + { + List *distinctExprs; + + distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); result_plan->plan_rows = estimate_num_groups(parse, - parse->distinctClause, + distinctExprs, result_plan->plan_rows); + } } /* @@ -1773,47 +1371,6 @@ make_groupsortplan(Query *parse, return (Plan *) make_sort(parse, sort_tlist, subplan, keyno); } -/* - * make_sortplan - * Add a Sort node to implement an explicit ORDER BY clause. - */ -Plan * -make_sortplan(Query *parse, List *tlist, Plan *plannode, List *sortcls) -{ - List *sort_tlist; - List *i; - int keyno = 0; - - /* - * First make a copy of the tlist so that we don't corrupt the - * original. - */ - sort_tlist = new_unsorted_tlist(tlist); - - foreach(i, sortcls) - { - SortClause *sortcl = (SortClause *) lfirst(i); - TargetEntry *tle = get_sortgroupclause_tle(sortcl, sort_tlist); - Resdom *resdom = tle->resdom; - - /* - * Check for the possibility of duplicate order-by clauses --- the - * parser should have removed 'em, but the executor will get - * terribly confused if any get through! - */ - if (resdom->reskey == 0) - { - /* OK, insert the ordering info needed by the executor. */ - resdom->reskey = ++keyno; - resdom->reskeyop = sortcl->sortop; - } - } - - Assert(keyno > 0); - - return (Plan *) make_sort(parse, sort_tlist, plannode, keyno); -} - /* * postprocess_setop_tlist * Fix up targetlist returned by plan_set_operations(). diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 513480c4e20..123b96f1880 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.90 2003/01/15 23:10:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.91 2003/01/20 18:54:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,6 +31,7 @@ typedef struct List *outer_tlist; List *inner_tlist; Index acceptable_rel; + bool tlists_have_non_vars; } join_references_context; typedef struct @@ -44,11 +45,13 @@ static void fix_expr_references(Plan *plan, Node *node); static bool fix_expr_references_walker(Node *node, void *context); static void set_join_references(Join *join, List *rtable); static void set_uppernode_references(Plan *plan, Index subvarno); +static bool targetlist_has_non_vars(List *tlist); static List *join_references(List *clauses, List *rtable, List *outer_tlist, List *inner_tlist, - Index acceptable_rel); + Index acceptable_rel, + bool tlists_have_non_vars); static Node *join_references_mutator(Node *node, join_references_context *context); static Node *replace_vars_with_subplan_refs(Node *node, @@ -175,7 +178,10 @@ set_plan_references(Plan *plan, List *rtable) rtable, NIL, plan->lefttree->targetlist, - (Index) 0); + (Index) 0, + targetlist_has_non_vars(plan->lefttree->targetlist)); + fix_expr_references(plan, + (Node *) ((Hash *) plan)->hashkeys); break; case T_Material: case T_Sort: @@ -308,23 +314,30 @@ set_join_references(Join *join, List *rtable) Plan *inner_plan = join->plan.righttree; List *outer_tlist = outer_plan->targetlist; List *inner_tlist = inner_plan->targetlist; + bool tlists_have_non_vars; + + tlists_have_non_vars = targetlist_has_non_vars(outer_tlist) || + targetlist_has_non_vars(inner_tlist); /* All join plans have tlist, qual, and joinqual */ join->plan.targetlist = join_references(join->plan.targetlist, rtable, outer_tlist, inner_tlist, - (Index) 0); + (Index) 0, + tlists_have_non_vars); join->plan.qual = join_references(join->plan.qual, rtable, outer_tlist, inner_tlist, - (Index) 0); + (Index) 0, + tlists_have_non_vars); join->joinqual = join_references(join->joinqual, rtable, outer_tlist, inner_tlist, - (Index) 0); + (Index) 0, + tlists_have_non_vars); /* Now do join-type-specific stuff */ if (IsA(join, NestLoop)) @@ -350,12 +363,14 @@ set_join_references(Join *join, List *rtable) rtable, outer_tlist, NIL, - innerrel); + innerrel, + tlists_have_non_vars); innerscan->indxqual = join_references(innerscan->indxqual, rtable, outer_tlist, NIL, - innerrel); + innerrel, + tlists_have_non_vars); /* * We must fix the inner qpqual too, if it has join clauses * (this could happen if the index is lossy: some indxquals @@ -366,7 +381,8 @@ set_join_references(Join *join, List *rtable) rtable, outer_tlist, NIL, - innerrel); + innerrel, + tlists_have_non_vars); } } else if (IsA(inner_plan, TidScan)) @@ -378,7 +394,8 @@ set_join_references(Join *join, List *rtable) rtable, outer_tlist, NIL, - innerrel); + innerrel, + tlists_have_non_vars); } } else if (IsA(join, MergeJoin)) @@ -389,7 +406,8 @@ set_join_references(Join *join, List *rtable) rtable, outer_tlist, inner_tlist, - (Index) 0); + (Index) 0, + tlists_have_non_vars); } else if (IsA(join, HashJoin)) { @@ -399,7 +417,8 @@ set_join_references(Join *join, List *rtable) rtable, outer_tlist, inner_tlist, - (Index) 0); + (Index) 0, + tlists_have_non_vars); } } @@ -433,22 +452,7 @@ set_uppernode_references(Plan *plan, Index subvarno) else subplan_targetlist = NIL; - /* - * Detect whether subplan tlist has any non-Vars (typically it won't - * because it's been flattened). This allows us to save comparisons - * in common cases. - */ - tlist_has_non_vars = false; - foreach(l, subplan_targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (tle->expr && !IsA(tle->expr, Var)) - { - tlist_has_non_vars = true; - break; - } - } + tlist_has_non_vars = targetlist_has_non_vars(subplan_targetlist); output_targetlist = NIL; foreach(l, plan->targetlist) @@ -473,6 +477,27 @@ set_uppernode_references(Plan *plan, Index subvarno) tlist_has_non_vars); } +/* + * targetlist_has_non_vars --- are there any non-Var entries in tlist? + * + * In most cases, subplan tlists will be "flat" tlists with only Vars. + * Checking for this allows us to save comparisons in common cases. + */ +static bool +targetlist_has_non_vars(List *tlist) +{ + List *l; + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->expr && !IsA(tle->expr, Var)) + return true; + } + return false; +} + /* * join_references * Creates a new set of targetlist entries or join qual clauses by @@ -505,7 +530,8 @@ join_references(List *clauses, List *rtable, List *outer_tlist, List *inner_tlist, - Index acceptable_rel) + Index acceptable_rel, + bool tlists_have_non_vars) { join_references_context context; @@ -513,6 +539,7 @@ join_references(List *clauses, context.outer_tlist = outer_tlist; context.inner_tlist = inner_tlist; context.acceptable_rel = acceptable_rel; + context.tlists_have_non_vars = tlists_have_non_vars; return (List *) join_references_mutator((Node *) clauses, &context); } @@ -554,6 +581,42 @@ join_references_mutator(Node *node, /* No referent found for Var */ elog(ERROR, "join_references: variable not in subplan target lists"); } + /* Try matching more complex expressions too, if tlists have any */ + if (context->tlists_have_non_vars) + { + Resdom *resdom; + + resdom = tlist_member(node, context->outer_tlist); + if (resdom) + { + /* Found a matching subplan output expression */ + Var *newvar; + + newvar = makeVar(OUTER, + resdom->resno, + resdom->restype, + resdom->restypmod, + 0); + newvar->varnoold = 0; /* wasn't ever a plain Var */ + newvar->varoattno = 0; + return (Node *) newvar; + } + resdom = tlist_member(node, context->inner_tlist); + if (resdom) + { + /* Found a matching subplan output expression */ + Var *newvar; + + newvar = makeVar(INNER, + resdom->resno, + resdom->restype, + resdom->restypmod, + 0); + newvar->varnoold = 0; /* wasn't ever a plain Var */ + newvar->varoattno = 0; + return (Node *) newvar; + } + } return expression_tree_mutator(node, join_references_mutator, (void *) context); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index b30454dcae2..5f420f37250 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.67 2003/01/17 02:01:11 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.68 2003/01/20 18:54:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,9 +23,11 @@ #include "optimizer/planmain.h" #include "optimizer/planner.h" #include "optimizer/subselect.h" +#include "optimizer/var.h" #include "parser/parsetree.h" #include "parser/parse_expr.h" #include "parser/parse_oper.h" +#include "parser/parse_relation.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -62,7 +64,8 @@ typedef struct finalize_primnode_results static List *convert_sublink_opers(List *lefthand, List *operOids, - List *targetlist, List **paramIds); + List *targetlist, int rtindex, + List **righthandIds); static bool subplan_is_hashable(SubLink *slink, SubPlan *node); static Node *replace_correlation_vars_mutator(Node *node, void *context); static Node *process_sublinks_mutator(Node *node, bool *isTopQual); @@ -289,6 +292,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual) exprs = convert_sublink_opers(lefthand, slink->operOids, plan->targetlist, + 0, &node->paramIds); node->setParam = nconc(node->setParam, listCopy(node->paramIds)); PlannerInitPlan = lappend(PlannerInitPlan, node); @@ -393,6 +397,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual) node->exprs = convert_sublink_opers(lefthand, slink->operOids, plan->targetlist, + 0, &node->paramIds); /* @@ -424,26 +429,32 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual) /* * convert_sublink_opers: given a lefthand-expressions list and a list of * operator OIDs, build a list of actually executable expressions. The - * righthand sides of the expressions are Params representing the results - * of the sub-select. + * righthand sides of the expressions are Params or Vars representing the + * results of the sub-select. * - * The paramids of the Params created are returned in the *paramIds list. + * If rtindex is 0, we build Params to represent the sub-select outputs. + * The paramids of the Params created are returned in the *righthandIds list. + * + * If rtindex is not 0, we build Vars using that rtindex as varno. The + * Vars themselves are returned in *righthandIds (this is a bit of a type + * cheat, but we can get away with it). */ static List * convert_sublink_opers(List *lefthand, List *operOids, - List *targetlist, List **paramIds) + List *targetlist, int rtindex, + List **righthandIds) { List *result = NIL; List *lst; - *paramIds = NIL; + *righthandIds = NIL; foreach(lst, operOids) { Oid opid = (Oid) lfirsti(lst); Node *leftop = lfirst(lefthand); TargetEntry *te = lfirst(targetlist); - Param *prm; + Node *rightop; Operator tup; Form_pg_operator opform; Node *left, @@ -451,12 +462,28 @@ convert_sublink_opers(List *lefthand, List *operOids, Assert(!te->resdom->resjunk); - /* Make the Param node representing the subplan's result */ - prm = generate_new_param(te->resdom->restype, - te->resdom->restypmod); - - /* Record its ID */ - *paramIds = lappendi(*paramIds, prm->paramid); + if (rtindex) + { + /* Make the Var node representing the subplan's result */ + rightop = (Node *) makeVar(rtindex, + te->resdom->resno, + te->resdom->restype, + te->resdom->restypmod, + 0); + /* Record it for caller */ + *righthandIds = lappend(*righthandIds, rightop); + } + else + { + /* Make the Param node representing the subplan's result */ + Param *prm; + + prm = generate_new_param(te->resdom->restype, + te->resdom->restypmod); + /* Record its ID */ + *righthandIds = lappendi(*righthandIds, prm->paramid); + rightop = (Node *) prm; + } /* Look up the operator to get its declared input types */ tup = SearchSysCache(OPEROID, @@ -473,7 +500,7 @@ convert_sublink_opers(List *lefthand, List *operOids, * function calls must be inserted for this operator! */ left = make_operand(leftop, exprType(leftop), opform->oprleft); - right = make_operand((Node *) prm, prm->paramtype, opform->oprright); + right = make_operand(rightop, te->resdom->restype, opform->oprright); result = lappend(result, make_opclause(opid, opform->oprresult, @@ -564,6 +591,96 @@ subplan_is_hashable(SubLink *slink, SubPlan *node) return true; } +/* + * convert_IN_to_join: can we convert an IN SubLink to join style? + * + * The caller has found a SubLink at the top level of WHERE, but has not + * checked the properties of the SubLink at all. Decide whether it is + * appropriate to process this SubLink in join style. If not, return NULL. + * If so, build the qual clause(s) to replace the SubLink, and return them. + * + * Side effects of a successful conversion include adding the SubLink's + * subselect to the query's rangetable and adding an InClauseInfo node to + * its in_info_list. + */ +Node * +convert_IN_to_join(Query *parse, SubLink *sublink) +{ + Query *subselect = (Query *) sublink->subselect; + List *left_varnos; + int rtindex; + RangeTblEntry *rte; + RangeTblRef *rtr; + InClauseInfo *ininfo; + List *exprs; + + /* + * The sublink type must be "= ANY" --- that is, an IN operator. + * (We require the operator name to be unqualified, which may be + * overly paranoid, or may not be.) + */ + if (sublink->subLinkType != ANY_SUBLINK) + return NULL; + if (length(sublink->operName) != 1 || + strcmp(strVal(lfirst(sublink->operName)), "=") != 0) + return NULL; + /* + * The sub-select must not refer to any Vars of the parent query. + * (Vars of higher levels should be okay, though.) + */ + if (contain_vars_of_level((Node *) subselect, 1)) + return NULL; + /* + * The left-hand expressions must contain some Vars of the current + * query, else it's not gonna be a join. + */ + left_varnos = pull_varnos((Node *) sublink->lefthand); + if (left_varnos == NIL) + return NULL; + /* + * The left-hand expressions mustn't be volatile. (Perhaps we should + * test the combining operators, too? We'd only need to point the + * function directly at the sublink ...) + */ + if (contain_volatile_functions((Node *) sublink->lefthand)) + return NULL; + /* + * Okay, pull up the sub-select into top range table and jointree. + * + * We rely here on the assumption that the outer query has no references + * to the inner (necessarily true, other than the Vars that we build + * below). Therefore this is a lot easier than what pull_up_subqueries + * has to go through. + */ + rte = addRangeTableEntryForSubquery(NULL, + subselect, + makeAlias("IN_subquery", NIL), + false); + parse->rtable = lappend(parse->rtable, rte); + rtindex = length(parse->rtable); + rtr = makeNode(RangeTblRef); + rtr->rtindex = rtindex; + parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr); + /* + * Now build the InClauseInfo node. + */ + ininfo = makeNode(InClauseInfo); + ininfo->lefthand = left_varnos; + ininfo->righthand = makeListi1(rtindex); + parse->in_info_list = lcons(ininfo, parse->in_info_list); + /* + * Build the result qual expressions. As a side effect, + * ininfo->sub_targetlist is filled with a list of the Vars + * representing the subselect outputs. + */ + exprs = convert_sublink_opers(sublink->lefthand, + sublink->operOids, + subselect->targetList, + rtindex, + &ininfo->sub_targetlist); + return (Node *) make_ands_explicit(exprs); +} + /* * Replace correlation vars (uplevel vars) with Params. */ diff --git a/src/backend/optimizer/prep/Makefile b/src/backend/optimizer/prep/Makefile index 60925de441f..05d4dd2de3e 100644 --- a/src/backend/optimizer/prep/Makefile +++ b/src/backend/optimizer/prep/Makefile @@ -4,7 +4,7 @@ # Makefile for optimizer/prep # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/optimizer/prep/Makefile,v 1.13 2002/06/16 00:09:11 momjian Exp $ +# $Header: /cvsroot/pgsql/src/backend/optimizer/prep/Makefile,v 1.14 2003/01/20 18:54:54 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/optimizer/prep top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = prepqual.o preptlist.o prepunion.o +OBJS = prepjointree.o prepqual.o preptlist.o prepunion.o all: SUBSYS.o diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c new file mode 100644 index 00000000000..083528c0490 --- /dev/null +++ b/src/backend/optimizer/prep/prepjointree.c @@ -0,0 +1,680 @@ +/*------------------------------------------------------------------------- + * + * prepjointree.c + * Planner preprocessing for subqueries and join tree manipulation. + * + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.1 2003/01/20 18:54:54 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "optimizer/clauses.h" +#include "optimizer/paths.h" +#include "optimizer/prep.h" +#include "optimizer/subselect.h" +#include "optimizer/var.h" +#include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" + + +static bool is_simple_subquery(Query *subquery); +static bool has_nullable_targetlist(Query *subquery); +static void resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist); +static void fix_in_clause_relids(List *in_info_list, int varno, + Relids subrelids); +static Node *find_jointree_node_for_rel(Node *jtnode, int relid); + + +/* + * pull_up_IN_clauses + * Attempt to pull up top-level IN clauses to be treated like joins. + * + * A clause "foo IN (sub-SELECT)" appearing at the top level of WHERE can + * be processed by pulling the sub-SELECT up to become a rangetable entry + * and handling the implied equality comparisons as join operators (with + * special join rules). + * This optimization *only* works at the top level of WHERE, because + * it cannot distinguish whether the IN ought to return FALSE or NULL in + * cases involving NULL inputs. This routine searches for such clauses + * and does the necessary parsetree transformations if any are found. + * + * This routine has to run before preprocess_expression(), so the WHERE + * clause is not yet reduced to implicit-AND format. That means we need + * to recursively search through explicit AND clauses, which are + * probably only binary ANDs. We stop as soon as we hit a non-AND item. + * + * Returns the possibly-modified version of the given qual-tree node. + */ +Node * +pull_up_IN_clauses(Query *parse, Node *node) +{ + if (node == NULL) + return NULL; + if (IsA(node, SubLink)) + { + SubLink *sublink = (SubLink *) node; + Node *subst; + + /* Is it a convertible IN clause? If not, return it as-is */ + subst = convert_IN_to_join(parse, sublink); + if (subst == NULL) + return node; + return subst; + } + if (and_clause(node)) + { + List *newclauses = NIL; + List *oldclauses; + + foreach(oldclauses, ((BoolExpr *) node)->args) + { + Node *oldclause = lfirst(oldclauses); + + newclauses = lappend(newclauses, + pull_up_IN_clauses(parse, + oldclause)); + } + return (Node *) make_andclause(newclauses); + } + /* Stop if not an AND */ + return node; +} + +/* + * pull_up_subqueries + * Look for subqueries in the rangetable that can be pulled up into + * the parent query. If the subquery has no special features like + * grouping/aggregation then we can merge it into the parent's jointree. + * + * below_outer_join is true if this jointree node is within the nullable + * side of an outer join. This restricts what we can do. + * + * A tricky aspect of this code is that if we pull up a subquery we have + * to replace Vars that reference the subquery's outputs throughout the + * parent query, including quals attached to jointree nodes above the one + * we are currently processing! We handle this by being careful not to + * change the jointree structure while recursing: no nodes other than + * subquery RangeTblRef entries will be replaced. Also, we can't turn + * ResolveNew loose on the whole jointree, because it'll return a mutated + * copy of the tree; we have to invoke it just on the quals, instead. + */ +Node * +pull_up_subqueries(Query *parse, Node *jtnode, bool below_outer_join) +{ + if (jtnode == NULL) + return NULL; + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + RangeTblEntry *rte = rt_fetch(varno, parse->rtable); + Query *subquery = rte->subquery; + + /* + * Is this a subquery RTE, and if so, is the subquery simple + * enough to pull up? (If not, do nothing at this node.) + * + * If we are inside an outer join, only pull up subqueries whose + * targetlists are nullable --- otherwise substituting their tlist + * entries for upper Var references would do the wrong thing (the + * results wouldn't become NULL when they're supposed to). XXX + * This could be improved by generating pseudo-variables for such + * expressions; we'd have to figure out how to get the pseudo- + * variables evaluated at the right place in the modified plan + * tree. Fix it someday. + * + * Note: even if the subquery itself is simple enough, we can't pull + * it up if there is a reference to its whole tuple result. + * Perhaps a pseudo-variable is the answer here too. + */ + if (rte->rtekind == RTE_SUBQUERY && is_simple_subquery(subquery) && + (!below_outer_join || has_nullable_targetlist(subquery)) && + !contain_whole_tuple_var((Node *) parse, varno, 0)) + { + int rtoffset; + List *subtlist; + List *rt; + + /* + * First, pull up any IN clauses within the subquery's WHERE, + * so that we don't leave unoptimized INs behind. + */ + if (subquery->hasSubLinks) + subquery->jointree->quals = pull_up_IN_clauses(subquery, + subquery->jointree->quals); + + /* + * Now, recursively pull up the subquery's subqueries, so + * that this routine's processing is complete for its jointree + * and rangetable. NB: if the same subquery is referenced + * from multiple jointree items (which can't happen normally, + * but might after rule rewriting), then we will invoke this + * processing multiple times on that subquery. OK because + * nothing will happen after the first time. We do have to be + * careful to copy everything we pull up, however, or risk + * having chunks of structure multiply linked. + * + * Note: 'false' is correct here even if we are within an outer + * join in the upper query; the lower query starts with a clean + * slate for outer-join semantics. + */ + subquery->jointree = (FromExpr *) + pull_up_subqueries(subquery, (Node *) subquery->jointree, + false); + + /* + * Now make a modifiable copy of the subquery that we can run + * OffsetVarNodes and IncrementVarSublevelsUp on. + */ + subquery = copyObject(subquery); + + /* + * Adjust level-0 varnos in subquery so that we can append its + * rangetable to upper query's. + */ + rtoffset = length(parse->rtable); + OffsetVarNodes((Node *) subquery, rtoffset, 0); + + /* + * Upper-level vars in subquery are now one level closer to their + * parent than before. + */ + IncrementVarSublevelsUp((Node *) subquery, -1, 1); + + /* + * Replace all of the top query's references to the subquery's + * outputs with copies of the adjusted subtlist items, being + * careful not to replace any of the jointree structure. + * (This'd be a lot cleaner if we could use + * query_tree_mutator.) + */ + subtlist = subquery->targetList; + parse->targetList = (List *) + ResolveNew((Node *) parse->targetList, + varno, 0, subtlist, CMD_SELECT, 0); + resolvenew_in_jointree((Node *) parse->jointree, varno, subtlist); + Assert(parse->setOperations == NULL); + parse->havingQual = + ResolveNew(parse->havingQual, + varno, 0, subtlist, CMD_SELECT, 0); + parse->in_info_list = (List *) + ResolveNew((Node *) parse->in_info_list, + varno, 0, subtlist, CMD_SELECT, 0); + + foreach(rt, parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); + + if (rte->rtekind == RTE_JOIN) + rte->joinaliasvars = (List *) + ResolveNew((Node *) rte->joinaliasvars, + varno, 0, subtlist, CMD_SELECT, 0); + } + + /* + * Now append the adjusted rtable entries to upper query. (We + * hold off until after fixing the upper rtable entries; no + * point in running that code on the subquery ones too.) + */ + parse->rtable = nconc(parse->rtable, subquery->rtable); + + /* + * Pull up any FOR UPDATE markers, too. (OffsetVarNodes + * already adjusted the marker values, so just nconc the + * list.) + */ + parse->rowMarks = nconc(parse->rowMarks, subquery->rowMarks); + + /* + * We also have to fix the relid lists of any parent InClauseInfo + * nodes. (This could perhaps be done by ResolveNew, but it + * would clutter that routine's API unreasonably.) + */ + if (parse->in_info_list) + { + Relids subrelids; + + subrelids = get_relids_in_jointree((Node *) subquery->jointree); + fix_in_clause_relids(parse->in_info_list, varno, subrelids); + } + + /* + * And now append any subquery InClauseInfos to our list. + */ + parse->in_info_list = nconc(parse->in_info_list, + subquery->in_info_list); + + /* + * Miscellaneous housekeeping. + */ + parse->hasSubLinks |= subquery->hasSubLinks; + /* subquery won't be pulled up if it hasAggs, so no work there */ + + /* + * Return the adjusted subquery jointree to replace the + * RangeTblRef entry in my jointree. + */ + return (Node *) subquery->jointree; + } + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + List *l; + + foreach(l, f->fromlist) + lfirst(l) = pull_up_subqueries(parse, lfirst(l), + below_outer_join); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* Recurse, being careful to tell myself when inside outer join */ + switch (j->jointype) + { + case JOIN_INNER: + j->larg = pull_up_subqueries(parse, j->larg, + below_outer_join); + j->rarg = pull_up_subqueries(parse, j->rarg, + below_outer_join); + break; + case JOIN_LEFT: + j->larg = pull_up_subqueries(parse, j->larg, + below_outer_join); + j->rarg = pull_up_subqueries(parse, j->rarg, + true); + break; + case JOIN_FULL: + j->larg = pull_up_subqueries(parse, j->larg, + true); + j->rarg = pull_up_subqueries(parse, j->rarg, + true); + break; + case JOIN_RIGHT: + j->larg = pull_up_subqueries(parse, j->larg, + true); + j->rarg = pull_up_subqueries(parse, j->rarg, + below_outer_join); + break; + case JOIN_UNION: + + /* + * This is where we fail if upper levels of planner + * haven't rewritten UNION JOIN as an Append ... + */ + elog(ERROR, "UNION JOIN is not implemented yet"); + break; + default: + elog(ERROR, "pull_up_subqueries: unexpected join type %d", + j->jointype); + break; + } + } + else + elog(ERROR, "pull_up_subqueries: unexpected node type %d", + nodeTag(jtnode)); + return jtnode; +} + +/* + * is_simple_subquery + * Check a subquery in the range table to see if it's simple enough + * to pull up into the parent query. + */ +static bool +is_simple_subquery(Query *subquery) +{ + /* + * Let's just make sure it's a valid subselect ... + */ + if (!IsA(subquery, Query) || + subquery->commandType != CMD_SELECT || + subquery->resultRelation != 0 || + subquery->into != NULL || + subquery->isPortal) + elog(ERROR, "is_simple_subquery: subquery is bogus"); + + /* + * Can't currently pull up a query with setops. Maybe after querytree + * redesign... + */ + if (subquery->setOperations) + return false; + + /* + * Can't pull up a subquery involving grouping, aggregation, sorting, + * or limiting. + */ + if (subquery->hasAggs || + subquery->groupClause || + subquery->havingQual || + subquery->sortClause || + subquery->distinctClause || + subquery->limitOffset || + subquery->limitCount) + return false; + + /* + * Don't pull up a subquery that has any set-returning functions in + * its targetlist. Otherwise we might well wind up inserting + * set-returning functions into places where they mustn't go, such as + * quals of higher queries. + */ + if (expression_returns_set((Node *) subquery->targetList)) + return false; + + /* + * Hack: don't try to pull up a subquery with an empty jointree. + * query_planner() will correctly generate a Result plan for a + * jointree that's totally empty, but I don't think the right things + * happen if an empty FromExpr appears lower down in a jointree. Not + * worth working hard on this, just to collapse SubqueryScan/Result + * into Result... + */ + if (subquery->jointree->fromlist == NIL) + return false; + + return true; +} + +/* + * has_nullable_targetlist + * Check a subquery in the range table to see if all the non-junk + * targetlist items are simple variables (and, hence, will correctly + * go to NULL when examined above the point of an outer join). + * + * A possible future extension is to accept strict functions of simple + * variables, eg, "x + 1". + */ +static bool +has_nullable_targetlist(Query *subquery) +{ + List *l; + + foreach(l, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + /* ignore resjunk columns */ + if (tle->resdom->resjunk) + continue; + + /* Okay if tlist item is a simple Var */ + if (tle->expr && IsA(tle->expr, Var)) + continue; + + return false; + } + return true; +} + +/* + * Helper routine for pull_up_subqueries: do ResolveNew on every expression + * in the jointree, without changing the jointree structure itself. Ugly, + * but there's no other way... + */ +static void +resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist) +{ + if (jtnode == NULL) + return; + if (IsA(jtnode, RangeTblRef)) + { + /* nothing to do here */ + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + List *l; + + foreach(l, f->fromlist) + resolvenew_in_jointree(lfirst(l), varno, subtlist); + f->quals = ResolveNew(f->quals, + varno, 0, subtlist, CMD_SELECT, 0); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + resolvenew_in_jointree(j->larg, varno, subtlist); + resolvenew_in_jointree(j->rarg, varno, subtlist); + j->quals = ResolveNew(j->quals, + varno, 0, subtlist, CMD_SELECT, 0); + + /* + * We don't bother to update the colvars list, since it won't be + * used again ... + */ + } + else + elog(ERROR, "resolvenew_in_jointree: unexpected node type %d", + nodeTag(jtnode)); +} + +/* + * preprocess_jointree + * Attempt to simplify a query's jointree. + * + * If we succeed in pulling up a subquery then we might form a jointree + * in which a FromExpr is a direct child of another FromExpr. In that + * case we can consider collapsing the two FromExprs into one. This is + * an optional conversion, since the planner will work correctly either + * way. But we may find a better plan (at the cost of more planning time) + * if we merge the two nodes. + * + * NOTE: don't try to do this in the same jointree scan that does subquery + * pullup! Since we're changing the jointree structure here, that wouldn't + * work reliably --- see comments for pull_up_subqueries(). + */ +Node * +preprocess_jointree(Query *parse, Node *jtnode) +{ + if (jtnode == NULL) + return NULL; + if (IsA(jtnode, RangeTblRef)) + { + /* nothing to do here... */ + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + List *newlist = NIL; + List *l; + + foreach(l, f->fromlist) + { + Node *child = (Node *) lfirst(l); + + /* Recursively simplify the child... */ + child = preprocess_jointree(parse, child); + /* Now, is it a FromExpr? */ + if (child && IsA(child, FromExpr)) + { + /* + * Yes, so do we want to merge it into parent? Always do + * so if child has just one element (since that doesn't + * make the parent's list any longer). Otherwise we have + * to be careful about the increase in planning time + * caused by combining the two join search spaces into + * one. Our heuristic is to merge if the merge will + * produce a join list no longer than GEQO_RELS/2. + * (Perhaps need an additional user parameter?) + */ + FromExpr *subf = (FromExpr *) child; + int childlen = length(subf->fromlist); + int myothers = length(newlist) + length(lnext(l)); + + if (childlen <= 1 || (childlen + myothers) <= geqo_rels / 2) + { + newlist = nconc(newlist, subf->fromlist); + f->quals = make_and_qual(subf->quals, f->quals); + } + else + newlist = lappend(newlist, child); + } + else + newlist = lappend(newlist, child); + } + f->fromlist = newlist; + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* Can't usefully change the JoinExpr, but recurse on children */ + j->larg = preprocess_jointree(parse, j->larg); + j->rarg = preprocess_jointree(parse, j->rarg); + } + else + elog(ERROR, "preprocess_jointree: unexpected node type %d", + nodeTag(jtnode)); + return jtnode; +} + +/* + * fix_in_clause_relids: update RT-index lists of InClauseInfo nodes + * + * When we pull up a subquery, any InClauseInfo references to the subquery's + * RT index have to be replaced by the list of substituted relids. + * + * We assume we may modify the InClauseInfo nodes in-place. + */ +static void +fix_in_clause_relids(List *in_info_list, int varno, Relids subrelids) +{ + List *l; + + foreach(l, in_info_list) + { + InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); + + if (intMember(varno, ininfo->lefthand)) + { + ininfo->lefthand = lremovei(varno, ininfo->lefthand); + ininfo->lefthand = nconc(ininfo->lefthand, listCopy(subrelids)); + } + if (intMember(varno, ininfo->righthand)) + { + ininfo->righthand = lremovei(varno, ininfo->righthand); + ininfo->righthand = nconc(ininfo->righthand, listCopy(subrelids)); + } + } +} + +/* + * get_relids_in_jointree: get list of base RT indexes present in a jointree + */ +List * +get_relids_in_jointree(Node *jtnode) +{ + Relids result = NIL; + + if (jtnode == NULL) + return result; + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + result = makeListi1(varno); + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + List *l; + + /* + * Note: we assume it's impossible to see same RT index from more + * than one subtree, so nconc() is OK rather than set_unioni(). + */ + foreach(l, f->fromlist) + { + result = nconc(result, + get_relids_in_jointree(lfirst(l))); + } + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* join's own RT index is not wanted in result */ + result = get_relids_in_jointree(j->larg); + result = nconc(result, get_relids_in_jointree(j->rarg)); + } + else + elog(ERROR, "get_relids_in_jointree: unexpected node type %d", + nodeTag(jtnode)); + return result; +} + +/* + * get_relids_for_join: get list of base RT indexes making up a join + */ +List * +get_relids_for_join(Query *parse, int joinrelid) +{ + Node *jtnode; + + jtnode = find_jointree_node_for_rel((Node *) parse->jointree, joinrelid); + if (!jtnode) + elog(ERROR, "get_relids_for_join: join node %d not found", joinrelid); + return get_relids_in_jointree(jtnode); +} + +/* + * find_jointree_node_for_rel: locate jointree node for a base or join RT index + * + * Returns NULL if not found + */ +static Node * +find_jointree_node_for_rel(Node *jtnode, int relid) +{ + if (jtnode == NULL) + return NULL; + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + if (relid == varno) + return jtnode; + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + List *l; + + /* + * Note: we assume it's impossible to see same RT index from more + * than one subtree, so nconc() is OK rather than set_unioni(). + */ + foreach(l, f->fromlist) + { + jtnode = find_jointree_node_for_rel(lfirst(l), relid); + if (jtnode) + return jtnode; + } + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + if (relid == j->rtindex) + return jtnode; + jtnode = find_jointree_node_for_rel(j->larg, relid); + if (jtnode) + return jtnode; + jtnode = find_jointree_node_for_rel(j->rarg, relid); + if (jtnode) + return jtnode; + } + else + elog(ERROR, "find_jointree_node_for_rel: unexpected node type %d", + nodeTag(jtnode)); + return NULL; +} diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 807364fac8d..97e4d56a9f4 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.87 2003/01/17 02:01:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.88 2003/01/20 18:54:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,6 +62,7 @@ static List *generate_append_tlist(List *colTypes, bool flag, List *refnames_tlist); static Node *adjust_inherited_attrs_mutator(Node *node, adjust_inherited_attrs_context *context); +static List *adjust_rtindex_list(List *relids, Index oldrelid, Index newrelid); static List *adjust_inherited_tlist(List *tlist, Oid new_relid); @@ -239,8 +240,9 @@ generate_union_plan(SetOperationStmt *op, Query *parse, tlist = new_unsorted_tlist(tlist); sortList = addAllTargetsToSortList(NIL, tlist); - plan = make_sortplan(parse, tlist, plan, sortList); - plan = (Plan *) make_unique(tlist, plan, copyObject(sortList)); + plan = (Plan *) make_sort_from_sortclauses(parse, tlist, + plan, sortList); + plan = (Plan *) make_unique(tlist, plan, sortList); } return plan; } @@ -292,7 +294,7 @@ generate_nonunion_plan(SetOperationStmt *op, Query *parse, */ tlist = new_unsorted_tlist(tlist); sortList = addAllTargetsToSortList(NIL, tlist); - plan = make_sortplan(parse, tlist, plan, sortList); + plan = (Plan *) make_sort_from_sortclauses(parse, tlist, plan, sortList); switch (op->op) { case SETOP_INTERSECT: @@ -830,6 +832,23 @@ adjust_inherited_attrs_mutator(Node *node, j->rtindex = context->new_rt_index; return (Node *) j; } + if (IsA(node, InClauseInfo)) + { + /* Copy the InClauseInfo node with correct mutation of subnodes */ + InClauseInfo *ininfo; + + ininfo = (InClauseInfo *) expression_tree_mutator(node, + adjust_inherited_attrs_mutator, + (void *) context); + /* now fix InClauseInfo's rtindex lists */ + ininfo->lefthand = adjust_rtindex_list(ininfo->lefthand, + context->old_rt_index, + context->new_rt_index); + ininfo->righthand = adjust_rtindex_list(ininfo->righthand, + context->old_rt_index, + context->new_rt_index); + return (Node *) ininfo; + } /* * We have to process RestrictInfo nodes specially. @@ -856,26 +875,12 @@ adjust_inherited_attrs_mutator(Node *node, /* * Adjust left/right relids lists too. */ - if (intMember(context->old_rt_index, oldinfo->left_relids)) - { - newinfo->left_relids = listCopy(oldinfo->left_relids); - newinfo->left_relids = lremovei(context->old_rt_index, - newinfo->left_relids); - newinfo->left_relids = lconsi(context->new_rt_index, - newinfo->left_relids); - } - else - newinfo->left_relids = oldinfo->left_relids; - if (intMember(context->old_rt_index, oldinfo->right_relids)) - { - newinfo->right_relids = listCopy(oldinfo->right_relids); - newinfo->right_relids = lremovei(context->old_rt_index, - newinfo->right_relids); - newinfo->right_relids = lconsi(context->new_rt_index, - newinfo->right_relids); - } - else - newinfo->right_relids = oldinfo->right_relids; + newinfo->left_relids = adjust_rtindex_list(oldinfo->left_relids, + context->old_rt_index, + context->new_rt_index); + newinfo->right_relids = adjust_rtindex_list(oldinfo->right_relids, + context->old_rt_index, + context->new_rt_index); newinfo->eval_cost.startup = -1; /* reset these too */ newinfo->this_selec = -1; @@ -922,6 +927,23 @@ adjust_inherited_attrs_mutator(Node *node, (void *) context); } +/* + * Substitute newrelid for oldrelid in a list of RT indexes + */ +static List * +adjust_rtindex_list(List *relids, Index oldrelid, Index newrelid) +{ + if (intMember(oldrelid, relids)) + { + /* Ensure we have a modifiable copy */ + relids = listCopy(relids); + /* Remove old, add new */ + relids = lremovei(oldrelid, relids); + relids = lconsi(newrelid, relids); + } + return relids; +} + /* * Adjust the targetlist entries of an inherited UPDATE operation * diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index acd17ba87d2..253c9e88138 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.124 2003/01/17 03:25:03 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.125 2003/01/20 18:54:54 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -2200,6 +2200,15 @@ expression_tree_walker(Node *node, return true; } break; + case T_InClauseInfo: + { + InClauseInfo *ininfo = (InClauseInfo *) node; + + if (expression_tree_walker((Node *) ininfo->sub_targetlist, + walker, context)) + return true; + } + break; default: elog(ERROR, "expression_tree_walker: Unexpected node type %d", nodeTag(node)); @@ -2241,6 +2250,8 @@ query_tree_walker(Query *query, return true; if (walker(query->havingQual, context)) return true; + if (walker(query->in_info_list, context)) + return true; foreach(rt, query->rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); @@ -2610,6 +2621,16 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_InClauseInfo: + { + InClauseInfo *ininfo = (InClauseInfo *) node; + InClauseInfo *newnode; + + FLATCOPY(newnode, ininfo, InClauseInfo); + MUTATE(newnode->sub_targetlist, ininfo->sub_targetlist, List *); + return (Node *) newnode; + } + break; default: elog(ERROR, "expression_tree_mutator: Unexpected node type %d", nodeTag(node)); @@ -2662,6 +2683,7 @@ query_tree_mutator(Query *query, MUTATE(query->jointree, query->jointree, FromExpr *); MUTATE(query->setOperations, query->setOperations, Node *); MUTATE(query->havingQual, query->havingQual, Node *); + MUTATE(query->in_info_list, query->in_info_list, List *); foreach(rt, query->rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c index 0f3cf201908..c202615b1f5 100644 --- a/src/backend/optimizer/util/joininfo.c +++ b/src/backend/optimizer/util/joininfo.c @@ -8,37 +8,29 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/joininfo.c,v 1.31 2002/06/20 20:29:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/joininfo.c,v 1.32 2003/01/20 18:54:56 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" - #include "optimizer/joininfo.h" -static JoinInfo *joininfo_member(List *join_relids, List *joininfo_list); /* - * joininfo_member - * Determines whether a node has already been created for a join - * between a set of join relations and the relation described by - * 'joininfo_list'. - * - * 'join_relids' is a list of relids corresponding to the join relation - * 'joininfo_list' is the list of joininfo nodes against which this is - * checked - * - * Returns the corresponding node in 'joininfo_list' if such a node - * exists. + * find_joininfo_node + * Find the joininfo node within a relation entry corresponding + * to a join between 'this_rel' and the relations in 'join_relids'. + * If there is no such node, return NULL. * + * Returns a joininfo node, or NULL. */ -static JoinInfo * -joininfo_member(List *join_relids, List *joininfo_list) +JoinInfo * +find_joininfo_node(RelOptInfo *this_rel, Relids join_relids) { List *i; - foreach(i, joininfo_list) + foreach(i, this_rel->joininfo) { JoinInfo *joininfo = (JoinInfo *) lfirst(i); @@ -48,22 +40,19 @@ joininfo_member(List *join_relids, List *joininfo_list) return NULL; } - /* - * find_joininfo_node + * make_joininfo_node * Find the joininfo node within a relation entry corresponding * to a join between 'this_rel' and the relations in 'join_relids'. * A new node is created and added to the relation entry's joininfo * field if the desired one can't be found. * * Returns a joininfo node. - * */ JoinInfo * -find_joininfo_node(RelOptInfo *this_rel, Relids join_relids) +make_joininfo_node(RelOptInfo *this_rel, Relids join_relids) { - JoinInfo *joininfo = joininfo_member(join_relids, - this_rel->joininfo); + JoinInfo *joininfo = find_joininfo_node(this_rel, join_relids); if (joininfo == NULL) { diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 615d9966973..a5cc94e831b 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.83 2002/12/05 15:50:35 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.84 2003/01/20 18:54:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,8 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/restrictinfo.h" +#include "utils/memutils.h" +#include "utils/selfuncs.h" /***************************************************************************** @@ -149,6 +151,7 @@ set_cheapest(RelOptInfo *parent_rel) parent_rel->cheapest_startup_path = cheapest_startup_path; parent_rel->cheapest_total_path = cheapest_total_path; + parent_rel->cheapest_unique_path = NULL; /* computed only if needed */ } /* @@ -489,6 +492,111 @@ create_material_path(RelOptInfo *rel, Path *subpath) return pathnode; } +/* + * create_unique_path + * Creates a path representing elimination of distinct rows from the + * input data. + * + * If used at all, this is likely to be called repeatedly on the same rel; + * and the input subpath should always be the same (the cheapest_total path + * for the rel). So we cache the result. + */ +UniquePath * +create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) +{ + UniquePath *pathnode; + Path sort_path; /* dummy for result of cost_sort */ + MemoryContext oldcontext; + List *sub_targetlist; + List *l; + int numCols; + + /* Caller made a mistake if subpath isn't cheapest_total */ + Assert(subpath == rel->cheapest_total_path); + + /* If result already cached, return it */ + if (rel->cheapest_unique_path) + return (UniquePath *) rel->cheapest_unique_path; + + /* + * We must ensure path struct is allocated in same context as parent + * rel; otherwise GEQO memory management causes trouble. (Compare + * best_inner_indexscan().) + */ + oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + + pathnode = makeNode(UniquePath); + + /* There is no substructure to allocate, so can switch back right away */ + MemoryContextSwitchTo(oldcontext); + + pathnode->path.pathtype = T_Unique; + pathnode->path.parent = rel; + + /* + * Treat the output as always unsorted, since we don't necessarily have + * pathkeys to represent it. + */ + pathnode->path.pathkeys = NIL; + + pathnode->subpath = subpath; + + /* + * Try to identify the targetlist that will actually be unique-ified. + * In current usage, this routine is only used for sub-selects of IN + * clauses, so we should be able to find the tlist in in_info_list. + */ + sub_targetlist = NIL; + foreach(l, root->in_info_list) + { + InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); + + if (sameseti(ininfo->righthand, rel->relids)) + { + sub_targetlist = ininfo->sub_targetlist; + break; + } + } + + /* + * If we know the targetlist, try to estimate number of result rows; + * otherwise punt. + */ + if (sub_targetlist) + { + pathnode->rows = estimate_num_groups(root, sub_targetlist, rel->rows); + numCols = length(sub_targetlist); + } + else + { + pathnode->rows = rel->rows; + numCols = length(rel->targetlist); /* second-best estimate */ + } + + /* + * Estimate cost for sort+unique implementation + */ + cost_sort(&sort_path, root, NIL, + subpath->total_cost, + rel->rows, + rel->width); + /* + * Charge one cpu_operator_cost per comparison per input tuple. We + * assume all columns get compared at most of the tuples. (XXX probably + * this is an overestimate.) This should agree with make_unique. + */ + sort_path.total_cost += cpu_operator_cost * rel->rows * numCols; + + pathnode->use_hash = false; /* for now */ + + pathnode->path.startup_cost = sort_path.startup_cost; + pathnode->path.total_cost = sort_path.total_cost; + + rel->cheapest_unique_path = (Path *) pathnode; + + return pathnode; +} + /* * create_subqueryscan_path * Creates a path corresponding to a sequential scan of a subquery, diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 87207f617cc..144fac75501 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.43 2003/01/15 19:35:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.44 2003/01/20 18:54:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -140,6 +140,7 @@ make_base_rel(Query *root, int relid) rel->pathlist = NIL; rel->cheapest_startup_path = NULL; rel->cheapest_total_path = NULL; + rel->cheapest_unique_path = NULL; rel->pruneable = true; rel->rtekind = rte->rtekind; rel->indexlist = NIL; @@ -244,6 +245,7 @@ find_join_rel(Query *root, Relids relids) * Returns relation entry corresponding to the union of two given rels, * creating a new relation entry if none already exists. * + * 'joinrelids' is the Relids list that uniquely identifies the join * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be * joined * 'jointype': type of join (inner/outer) @@ -256,27 +258,20 @@ find_join_rel(Query *root, Relids relids) */ RelOptInfo * build_join_rel(Query *root, + List *joinrelids, RelOptInfo *outer_rel, RelOptInfo *inner_rel, JoinType jointype, List **restrictlist_ptr) { - List *joinrelids; RelOptInfo *joinrel; List *restrictlist; List *new_outer_tlist; List *new_inner_tlist; - /* We should never try to join two overlapping sets of rels. */ - Assert(nonoverlap_setsi(outer_rel->relids, inner_rel->relids)); - /* * See if we already have a joinrel for this set of base rels. - * - * nconc(listCopy(x), y) is an idiom for making a new list without - * changing either input list. */ - joinrelids = nconc(listCopy(outer_rel->relids), inner_rel->relids); joinrel = find_join_rel(root, joinrelids); if (joinrel) @@ -299,13 +294,14 @@ build_join_rel(Query *root, */ joinrel = makeNode(RelOptInfo); joinrel->reloptkind = RELOPT_JOINREL; - joinrel->relids = joinrelids; + joinrel->relids = listCopy(joinrelids); joinrel->rows = 0; joinrel->width = 0; joinrel->targetlist = NIL; joinrel->pathlist = NIL; joinrel->cheapest_startup_path = NULL; joinrel->cheapest_total_path = NULL; + joinrel->cheapest_unique_path = NULL; joinrel->pruneable = true; joinrel->rtekind = RTE_JOIN; joinrel->indexlist = NIL; @@ -557,7 +553,7 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel, */ JoinInfo *new_joininfo; - new_joininfo = find_joininfo_node(joinrel, new_unjoined_relids); + new_joininfo = make_joininfo_node(joinrel, new_unjoined_relids); new_joininfo->jinfo_restrictinfo = set_union(new_joininfo->jinfo_restrictinfo, joininfo->jinfo_restrictinfo); diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 0d268b8e40c..6f90ea87568 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.53 2002/12/12 15:49:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.54 2003/01/20 18:54:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -255,3 +255,25 @@ get_sortgroupclause_expr(SortClause *sortClause, List *targetList) return (Node *) tle->expr; } + +/* + * get_sortgrouplist_exprs + * Given a list of SortClauses (or GroupClauses), build a list + * of the referenced targetlist expressions. + */ +List * +get_sortgrouplist_exprs(List *sortClauses, List *targetList) +{ + List *result = NIL; + List *l; + + foreach(l, sortClauses) + { + SortClause *sortcl = (SortClause *) lfirst(l); + Node *sortexpr; + + sortexpr = get_sortgroupclause_expr(sortcl, targetList); + result = lappend(result, sortexpr); + } + return result; +} diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 1eb9d9774ed..729ded51323 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.46 2003/01/17 02:01:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.47 2003/01/20 18:54:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "nodes/plannodes.h" #include "optimizer/clauses.h" +#include "optimizer/prep.h" #include "optimizer/var.h" #include "parser/parsetree.h" @@ -41,7 +42,7 @@ typedef struct typedef struct { - List *rtable; + Query *root; int sublevels_up; } flatten_join_alias_vars_context; @@ -50,10 +51,13 @@ static bool pull_varnos_walker(Node *node, static bool contain_var_reference_walker(Node *node, contain_var_reference_context *context); static bool contain_var_clause_walker(Node *node, void *context); +static bool contain_vars_of_level_walker(Node *node, int *sublevels_up); +static bool contain_vars_above_level_walker(Node *node, int *sublevels_up); static bool pull_var_clause_walker(Node *node, pull_var_clause_context *context); static Node *flatten_join_alias_vars_mutator(Node *node, flatten_join_alias_vars_context *context); +static List *alias_rtindex_list(Query *root, List *rtlist); /* @@ -224,6 +228,103 @@ contain_var_clause_walker(Node *node, void *context) return expression_tree_walker(node, contain_var_clause_walker, context); } +/* + * contain_vars_of_level + * Recursively scan a clause to discover whether it contains any Var nodes + * of the specified query level. + * + * Returns true if any such Var found. + * + * Will recurse into sublinks. Also, may be invoked directly on a Query. + */ +bool +contain_vars_of_level(Node *node, int levelsup) +{ + int sublevels_up = levelsup; + + return query_or_expression_tree_walker(node, + contain_vars_of_level_walker, + (void *) &sublevels_up, + 0); +} + +static bool +contain_vars_of_level_walker(Node *node, int *sublevels_up) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + if (((Var *) node)->varlevelsup == *sublevels_up) + return true; /* abort tree traversal and return true */ + } + if (IsA(node, Query)) + { + /* Recurse into subselects */ + bool result; + + (*sublevels_up)++; + result = query_tree_walker((Query *) node, + contain_vars_of_level_walker, + (void *) sublevels_up, + 0); + (*sublevels_up)--; + return result; + } + return expression_tree_walker(node, + contain_vars_of_level_walker, + (void *) sublevels_up); +} + +/* + * contain_vars_above_level + * Recursively scan a clause to discover whether it contains any Var nodes + * above the specified query level. (For example, pass zero to detect + * all nonlocal Vars.) + * + * Returns true if any such Var found. + * + * Will recurse into sublinks. Also, may be invoked directly on a Query. + */ +bool +contain_vars_above_level(Node *node, int levelsup) +{ + int sublevels_up = levelsup; + + return query_or_expression_tree_walker(node, + contain_vars_above_level_walker, + (void *) &sublevels_up, + 0); +} + +static bool +contain_vars_above_level_walker(Node *node, int *sublevels_up) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + if (((Var *) node)->varlevelsup > *sublevels_up) + return true; /* abort tree traversal and return true */ + } + if (IsA(node, Query)) + { + /* Recurse into subselects */ + bool result; + + (*sublevels_up)++; + result = query_tree_walker((Query *) node, + contain_vars_above_level_walker, + (void *) sublevels_up, + 0); + (*sublevels_up)--; + return result; + } + return expression_tree_walker(node, + contain_vars_above_level_walker, + (void *) sublevels_up); +} + /* * pull_var_clause @@ -277,11 +378,11 @@ pull_var_clause_walker(Node *node, pull_var_clause_context *context) * to be applied directly to a Query node. */ Node * -flatten_join_alias_vars(Node *node, List *rtable) +flatten_join_alias_vars(Query *root, Node *node) { flatten_join_alias_vars_context context; - context.rtable = rtable; + context.root = root; context.sublevels_up = 0; return flatten_join_alias_vars_mutator(node, &context); @@ -301,7 +402,7 @@ flatten_join_alias_vars_mutator(Node *node, if (var->varlevelsup != context->sublevels_up) return node; /* no need to copy, really */ - rte = rt_fetch(var->varno, context->rtable); + rte = rt_fetch(var->varno, context->root->rtable); if (rte->rtekind != RTE_JOIN) return node; Assert(var->varattno > 0); @@ -309,6 +410,24 @@ flatten_join_alias_vars_mutator(Node *node, /* expand it; recurse in case join input is itself a join */ return flatten_join_alias_vars_mutator(newvar, context); } + if (IsA(node, InClauseInfo)) + { + /* Copy the InClauseInfo node with correct mutation of subnodes */ + InClauseInfo *ininfo; + + ininfo = (InClauseInfo *) expression_tree_mutator(node, + flatten_join_alias_vars_mutator, + (void *) context); + /* now fix InClauseInfo's rtindex lists */ + if (context->sublevels_up == 0) + { + ininfo->lefthand = alias_rtindex_list(context->root, + ininfo->lefthand); + ininfo->righthand = alias_rtindex_list(context->root, + ininfo->righthand); + } + return (Node *) ininfo; + } if (IsA(node, Query)) { @@ -329,3 +448,27 @@ flatten_join_alias_vars_mutator(Node *node, return expression_tree_mutator(node, flatten_join_alias_vars_mutator, (void *) context); } + +/* + * alias_rtindex_list: in a list of RT indexes, replace joins by their + * underlying base relids + */ +static List * +alias_rtindex_list(Query *root, List *rtlist) +{ + List *result = NIL; + List *l; + + foreach(l, rtlist) + { + int rtindex = lfirsti(l); + RangeTblEntry *rte; + + rte = rt_fetch(rtindex, root->rtable); + if (rte->rtekind == RTE_JOIN) + result = nconc(result, get_relids_for_join(root, rtindex)); + else + result = lappendi(result, rtindex); + } + return result; +} diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 4a4f6824b78..44604289663 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteManip.c,v 1.69 2003/01/17 02:01:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteManip.c,v 1.70 2003/01/20 18:54:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -90,8 +90,8 @@ checkExprHasSubLink_walker(Node *node, void *context) * * Find all Var nodes in the given tree with varlevelsup == sublevels_up, * and increment their varno fields (rangetable indexes) by 'offset'. - * The varnoold fields are adjusted similarly. Also, RangeTblRef and - * JoinExpr nodes in join trees and setOp trees are adjusted. + * The varnoold fields are adjusted similarly. Also, adjust other nodes + * that contain rangetable indexes, such as RangeTblRef and JoinExpr. * * NOTE: although this has the form of a walker, we cheat and modify the * nodes in-place. The given expression tree should have been copied @@ -137,6 +137,25 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context) j->rtindex += context->offset; /* fall through to examine children */ } + if (IsA(node, InClauseInfo)) + { + InClauseInfo *ininfo = (InClauseInfo *) node; + + if (context->sublevels_up == 0) + { + List *rt; + + foreach(rt, ininfo->lefthand) + { + lfirsti(rt) += context->offset; + } + foreach(rt, ininfo->righthand) + { + lfirsti(rt) += context->offset; + } + } + /* fall through to examine children */ + } if (IsA(node, Query)) { /* Recurse into subselects */ @@ -196,8 +215,8 @@ OffsetVarNodes(Node *node, int offset, int sublevels_up) * * Find all Var nodes in the given tree belonging to a specific relation * (identified by sublevels_up and rt_index), and change their varno fields - * to 'new_index'. The varnoold fields are changed too. Also, RangeTblRef - * and JoinExpr nodes in join trees and setOp trees are adjusted. + * to 'new_index'. The varnoold fields are changed too. Also, adjust other + * nodes that contain rangetable indexes, such as RangeTblRef and JoinExpr. * * NOTE: although this has the form of a walker, we cheat and modify the * nodes in-place. The given expression tree should have been copied @@ -247,6 +266,27 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context) j->rtindex = context->new_index; /* fall through to examine children */ } + if (IsA(node, InClauseInfo)) + { + InClauseInfo *ininfo = (InClauseInfo *) node; + + if (context->sublevels_up == 0) + { + List *rt; + + foreach(rt, ininfo->lefthand) + { + if (lfirsti(rt) == context->rt_index) + lfirsti(rt) = context->new_index; + } + foreach(rt, ininfo->righthand) + { + if (lfirsti(rt) == context->rt_index) + lfirsti(rt) = context->new_index; + } + } + /* fall through to examine children */ + } if (IsA(node, Query)) { /* Recurse into subselects */ @@ -423,6 +463,16 @@ rangeTableEntry_used_walker(Node *node, return true; /* fall through to examine children */ } + if (IsA(node, InClauseInfo)) + { + InClauseInfo *ininfo = (InClauseInfo *) node; + + if (context->sublevels_up == 0 && + (intMember(context->rt_index, ininfo->lefthand) || + intMember(context->rt_index, ininfo->righthand))) + return true; + /* fall through to examine children */ + } if (IsA(node, Query)) { /* Recurse into subselects */ diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index fe6f38eee85..42ad9f5f94b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.126 2003/01/15 19:35:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.127 2003/01/20 18:54:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1825,8 +1825,7 @@ mergejoinscansel(Query *root, Node *clause, * * Inputs: * root - the query - * groupClauses - list of GroupClauses (or SortClauses for the DISTINCT - * case, but those are equivalent structs) + * groupExprs - list of expressions being grouped by * input_rows - number of rows estimated to arrive at the group/unique * filter step * @@ -1867,7 +1866,7 @@ mergejoinscansel(Query *root, Node *clause, * do better). */ double -estimate_num_groups(Query *root, List *groupClauses, double input_rows) +estimate_num_groups(Query *root, List *groupExprs, double input_rows) { List *allvars = NIL; List *varinfos = NIL; @@ -1879,14 +1878,12 @@ estimate_num_groups(Query *root, List *groupClauses, double input_rows) } MyVarInfo; /* We should not be called unless query has GROUP BY (or DISTINCT) */ - Assert(groupClauses != NIL); + Assert(groupExprs != NIL); /* Step 1: get the unique Vars used */ - foreach(l, groupClauses) + foreach(l, groupExprs) { - GroupClause *grpcl = (GroupClause *) lfirst(l); - Node *groupexpr = get_sortgroupclause_expr(grpcl, - root->targetList); + Node *groupexpr = (Node *) lfirst(l); List *varshere; varshere = pull_var_clause(groupexpr, false); diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index f119b5111db..bf8bb1719ed 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodes.h,v 1.134 2002/12/16 16:22:46 tgl Exp $ + * $Id: nodes.h,v 1.135 2003/01/20 18:55:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -152,10 +152,12 @@ typedef enum NodeTag T_AppendPath, T_ResultPath, T_MaterialPath, + T_UniquePath, T_PathKeyItem, T_RestrictInfo, T_JoinInfo, T_InnerIndexscanInfo, + T_InClauseInfo, /* * TAGS FOR MEMORY NODES (memnodes.h) @@ -408,11 +410,20 @@ typedef enum JoinType * join in the executor. (The planner must convert it to an Append * plan.) */ - JOIN_UNION + JOIN_UNION, /* - * Eventually we will have some additional join types for efficient - * support of queries like WHERE foo IN (SELECT bar FROM ...). + * These are used for queries like WHERE foo IN (SELECT bar FROM ...). + * Only JOIN_IN is actually implemented in the executor; the others + * are defined for internal use in the planner. + */ + JOIN_IN, /* at most one result per outer row */ + JOIN_REVERSE_IN, /* at most one result per inner row */ + JOIN_UNIQUE_OUTER, /* outer path must be made unique */ + JOIN_UNIQUE_INNER /* inner path must be made unique */ + + /* + * We might need additional join types someday. */ } JoinType; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 6ca3894b0da..9bbee593bf4 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: parsenodes.h,v 1.225 2003/01/06 00:31:45 tgl Exp $ + * $Id: parsenodes.h,v 1.226 2003/01/20 18:55:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -101,6 +101,7 @@ typedef struct Query List *join_rel_list; /* list of join-relation RelOptInfos */ List *equi_key_list; /* list of lists of equijoined * PathKeyItems */ + List *in_info_list; /* list of InClauseInfos */ List *query_pathkeys; /* desired pathkeys for query_planner() */ bool hasJoinRTEs; /* true if any RTEs are RTE_JOIN kind */ } Query; diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 9ef4fab957e..d3b01b7fed0 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_list.h,v 1.30 2002/11/24 21:52:15 tgl Exp $ + * $Id: pg_list.h,v 1.31 2003/01/20 18:55:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -145,7 +145,8 @@ extern List *set_intersecti(List *list1, List *list2); extern bool equali(List *list1, List *list2); extern bool sameseti(List *list1, List *list2); -extern bool nonoverlap_setsi(List *list1, List *list2); +extern bool overlap_setsi(List *list1, List *list2); +#define nonoverlap_setsi(list1, list2) (!overlap_setsi(list1, list2)) extern bool is_subseti(List *list1, List *list2); extern void freeList(List *list); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index a21debe02f9..c2d8970234f 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: relation.h,v 1.76 2003/01/15 19:35:44 tgl Exp $ + * $Id: relation.h,v 1.77 2003/01/20 18:55:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -97,6 +97,8 @@ typedef struct QualCost * (regardless of its ordering) * cheapest_total_path - the pathlist member with lowest total cost * (regardless of its ordering) + * cheapest_unique_path - for caching cheapest path to produce unique + * (no duplicates) output from relation * pruneable - flag to let the planner know whether it can prune the * pathlist of this RelOptInfo or not. * @@ -183,6 +185,7 @@ typedef struct RelOptInfo List *pathlist; /* Path structures */ struct Path *cheapest_startup_path; struct Path *cheapest_total_path; + struct Path *cheapest_unique_path; bool pruneable; /* information about a base rel (not set for join rels!) */ @@ -403,6 +406,23 @@ typedef struct MaterialPath Path *subpath; } MaterialPath; +/* + * UniquePath represents elimination of distinct rows from the output of + * its subpath. + * + * This is unlike the other Path nodes in that it can actually generate + * two different plans: either hash-based or sort-based implementation. + * The decision is sufficiently localized that it's not worth having two + * separate Path node types. + */ +typedef struct UniquePath +{ + Path path; + Path *subpath; + bool use_hash; + double rows; /* estimated number of result tuples */ +} UniquePath; + /* * All join-type paths share these fields. */ @@ -649,4 +669,25 @@ typedef struct InnerIndexscanInfo Path *best_innerpath; /* best inner indexscan, or NULL if none */ } InnerIndexscanInfo; +/* + * IN clause info. + * + * When we convert top-level IN quals into join operations, we must restrict + * the order of joining and use special join methods at some join points. + * We record information about each such IN clause in an InClauseInfo struct. + * These structs are kept in the Query node's in_info_list. + */ + +typedef struct InClauseInfo +{ + NodeTag type; + List *lefthand; /* base relids in lefthand expressions */ + List *righthand; /* base relids coming from the subselect */ + List *sub_targetlist; /* targetlist of original RHS subquery */ + /* + * Note: sub_targetlist is just a list of Vars or expressions; + * it does not contain TargetEntry nodes. + */ +} InClauseInfo; + #endif /* RELATION_H */ diff --git a/src/include/optimizer/joininfo.h b/src/include/optimizer/joininfo.h index f17e278238c..37131b722d2 100644 --- a/src/include/optimizer/joininfo.h +++ b/src/include/optimizer/joininfo.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: joininfo.h,v 1.21 2002/06/20 20:29:51 momjian Exp $ + * $Id: joininfo.h,v 1.22 2003/01/20 18:55:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,5 +17,6 @@ #include "nodes/relation.h" extern JoinInfo *find_joininfo_node(RelOptInfo *this_rel, List *join_relids); +extern JoinInfo *make_joininfo_node(RelOptInfo *this_rel, List *join_relids); #endif /* JOININFO_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 77ed27e7e55..759b18c2499 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pathnode.h,v 1.47 2003/01/15 19:35:47 tgl Exp $ + * $Id: pathnode.h,v 1.48 2003/01/20 18:55:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,8 @@ extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths); extern ResultPath *create_result_path(RelOptInfo *rel, Path *subpath, List *constantqual); extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath); +extern UniquePath *create_unique_path(Query *root, RelOptInfo *rel, + Path *subpath); extern Path *create_subqueryscan_path(RelOptInfo *rel); extern Path *create_functionscan_path(Query *root, RelOptInfo *rel); @@ -75,6 +77,7 @@ extern void build_base_rel(Query *root, int relid); extern RelOptInfo *build_other_rel(Query *root, int relid); extern RelOptInfo *find_base_rel(Query *root, int relid); extern RelOptInfo *build_join_rel(Query *root, + List *joinrelids, RelOptInfo *outer_rel, RelOptInfo *inner_rel, JoinType jointype, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 66925931609..cf9c2ddeb64 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: planmain.h,v 1.66 2003/01/15 23:10:32 tgl Exp $ + * $Id: planmain.h,v 1.67 2003/01/20 18:55:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,6 +32,8 @@ extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual, extern Append *make_append(List *appendplans, bool isTarget, List *tlist); extern Sort *make_sort(Query *root, List *tlist, Plan *lefttree, int keycount); +extern Sort *make_sort_from_sortclauses(Query *root, List *tlist, + Plan *lefttree, List *sortcls); extern Agg *make_agg(Query *root, List *tlist, List *qual, AggStrategy aggstrategy, int numGroupCols, AttrNumber *grpColIdx, diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index f49583a7ef3..16885b2f138 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: planner.h,v 1.24 2002/06/20 20:29:51 momjian Exp $ + * $Id: planner.h,v 1.25 2003/01/20 18:55:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,7 +21,4 @@ extern Plan *planner(Query *parse); extern Plan *subquery_planner(Query *parse, double tuple_fraction); -extern Plan *make_sortplan(Query *parse, List *tlist, - Plan *plannode, List *sortcls); - #endif /* PLANNER_H */ diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index 1bb64af3ae5..17ecb4d593f 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: prep.h,v 1.33 2002/08/29 16:03:49 tgl Exp $ + * $Id: prep.h,v 1.34 2003/01/20 18:55:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,16 @@ #include "nodes/parsenodes.h" #include "nodes/plannodes.h" +/* + * prototypes for prepjointree.c + */ +extern Node *pull_up_IN_clauses(Query *parse, Node *node); +extern Node *pull_up_subqueries(Query *parse, Node *jtnode, + bool below_outer_join); +extern Node *preprocess_jointree(Query *parse, Node *jtnode); +extern List *get_relids_in_jointree(Node *jtnode); +extern List *get_relids_for_join(Query *parse, int joinrelid); + /* * prototypes for prepqual.c */ diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h index 8fead9929f6..2e6a4640684 100644 --- a/src/include/optimizer/subselect.h +++ b/src/include/optimizer/subselect.h @@ -2,6 +2,11 @@ * * subselect.h * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $Id: subselect.h,v 1.17 2003/01/20 18:55:05 tgl Exp $ + * *------------------------------------------------------------------------- */ #ifndef SUBSELECT_H @@ -14,8 +19,9 @@ extern List *PlannerInitPlan; /* init subplans for current query */ extern List *PlannerParamVar; /* to get Var from Param->paramid */ extern int PlannerPlanId; /* to assign unique ID to subquery plans */ -extern List *SS_finalize_plan(Plan *plan, List *rtable); +extern Node *convert_IN_to_join(Query *parse, SubLink *sublink); extern Node *SS_replace_correlation_vars(Node *expr); extern Node *SS_process_sublinks(Node *expr, bool isQual); +extern List *SS_finalize_plan(Plan *plan, List *rtable); #endif /* SUBSELECT_H */ diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 7b82b5ae291..b38f4016f98 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: tlist.h,v 1.32 2002/06/20 20:29:51 momjian Exp $ + * $Id: tlist.h,v 1.33 2003/01/20 18:55:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,5 +32,7 @@ extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause, List *targetList); extern Node *get_sortgroupclause_expr(SortClause *sortClause, List *targetList); +extern List *get_sortgrouplist_exprs(List *sortClauses, + List *targetList); #endif /* TLIST_H */ diff --git a/src/include/optimizer/var.h b/src/include/optimizer/var.h index 07b8b311d07..b207acac593 100644 --- a/src/include/optimizer/var.h +++ b/src/include/optimizer/var.h @@ -7,14 +7,14 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: var.h,v 1.24 2003/01/15 19:35:47 tgl Exp $ + * $Id: var.h,v 1.25 2003/01/20 18:55:06 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef VAR_H #define VAR_H -#include "nodes/primnodes.h" +#include "nodes/parsenodes.h" extern List *pull_varnos(Node *node); @@ -22,7 +22,9 @@ extern bool contain_var_reference(Node *node, int varno, int varattno, int levelsup); extern bool contain_whole_tuple_var(Node *node, int varno, int levelsup); extern bool contain_var_clause(Node *node); +extern bool contain_vars_of_level(Node *node, int levelsup); +extern bool contain_vars_above_level(Node *node, int levelsup); extern List *pull_var_clause(Node *node, bool includeUpperVars); -extern Node *flatten_join_alias_vars(Node *node, List *rtable); +extern Node *flatten_join_alias_vars(Query *root, Node *node); #endif /* VAR_H */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 49f3bc7e005..037c2b2f5e3 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: selfuncs.h,v 1.10 2002/11/19 23:22:00 tgl Exp $ + * $Id: selfuncs.h,v 1.11 2003/01/20 18:55:07 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -75,7 +75,7 @@ extern void mergejoinscansel(Query *root, Node *clause, Selectivity *leftscan, Selectivity *rightscan); -extern double estimate_num_groups(Query *root, List *groupClauses, +extern double estimate_num_groups(Query *root, List *groupExprs, double input_rows); extern Datum btcostestimate(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 5a2ef11c21b..8debffe00ba 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -58,10 +58,10 @@ SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL six | Uncorrelated Field -----+-------------------- | 1 - | 2 - | 3 | 1 | 2 + | 2 + | 3 | 3 (6 rows) @@ -71,10 +71,10 @@ SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL six | Uncorrelated Field -----+-------------------- | 1 - | 2 - | 3 | 1 | 2 + | 2 + | 3 | 3 (6 rows) @@ -134,10 +134,10 @@ SELECT '' AS five, f1 AS "Correlated Field" WHERE f3 IS NOT NULL); five | Correlated Field ------+------------------ - | 2 - | 3 | 1 | 2 + | 2 + | 3 | 3 (5 rows) -- GitLab