diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 878e4b3b94c550c8b00d75769401d084c0df1bbc..37bafae46eae85d2a19255dd08759a22e81f4618 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.383 2010/02/16 22:34:43 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.384 2010/03/28 22:59:32 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1478,16 +1478,6 @@ _outUniquePath(StringInfo str, UniquePath *node) WRITE_FLOAT_FIELD(rows, "%.0f"); } -static void -_outNoOpPath(StringInfo str, NoOpPath *node) -{ - WRITE_NODE_TYPE("NOOPPATH"); - - _outPathInfo(str, (Path *) node); - - WRITE_NODE_FIELD(subpath); -} - static void _outNestPath(StringInfo str, NestPath *node) { @@ -2740,9 +2730,6 @@ _outNode(StringInfo str, void *obj) case T_UniquePath: _outUniquePath(str, obj); break; - case T_NoOpPath: - _outNoOpPath(str, obj); - break; case T_NestPath: _outNestPath(str, obj); break; diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index dcdfd1cacfda09fc584b7f5ccc555cb945fbd2c8..7f84bf15de125ed34a9c90494838e3b710361913 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.52 2009/09/29 01:20:34 tgl Exp $ +$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.53 2010/03/28 22:59:32 tgl Exp $ Optimizer ========= @@ -354,7 +354,6 @@ RelOptInfo - a relation or joined relations NestPath - nested-loop joins MergePath - merge joins HashPath - hash joins - NoOpPath - same as its input path (used when a join is removed) EquivalenceClass - a data structure representing a set of values known equal diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 52f26d255d951f9fb2643e52061d4040610b5ae7..b2412b7c3b9341678b90816dd08bb9cb1a163e4a 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.193 2010/02/26 02:00:44 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.194 2010/03/28 22:59:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1396,10 +1396,6 @@ print_path(PlannerInfo *root, Path *path, int indent) ptype = "Unique"; subpath = ((UniquePath *) path)->subpath; break; - case T_NoOpPath: - ptype = "NoOp"; - subpath = ((NoOpPath *) path)->subpath; - break; case T_NestPath: ptype = "NestLoop"; join = true; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index f069f13fefb5115ed3577ec5a25c68d4ad780b24..3247c73c01747f7ff46da2a80782b73f9a44755e 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.131 2010/03/22 13:57:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.132 2010/03/28 22:59:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,11 +22,6 @@ #include "optimizer/paths.h" -static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, JoinType jointype); -static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel, - RelOptInfo *outerrel); static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, @@ -83,27 +78,11 @@ add_paths_to_joinrel(PlannerInfo *root, { List *mergeclause_list = NIL; - /* - * 0. Consider join removal. This is always the most efficient strategy, - * so if it works, there's no need to consider anything further. - */ - if (join_is_removable(root, joinrel, outerrel, innerrel, - restrictlist, jointype)) - { - generate_outer_only(root, joinrel, outerrel); - return; - } - /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin is currently our * only way of implementing full outer joins, so override mergejoin * disable if it's a full join. - * - * Note: do this after join_is_removable(), because this sets the - * outer_is_left flags in the mergejoin clauses, while join_is_removable - * uses those flags for its own purposes. Currently, they set the flags - * the same way anyway, but let's avoid unnecessary entanglement. */ if (enable_mergejoin || jointype == JOIN_FULL) mergeclause_list = select_mergejoin_clauses(root, @@ -185,188 +164,6 @@ clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, return false; /* no good for these input relations */ } -/* - * join_is_removable - * Determine whether we need not perform the join at all, because - * it will just duplicate its left input. - * - * This is true for a left join for which the join condition cannot match - * more than one inner-side row. (There are other possibly interesting - * cases, but we don't have the infrastructure to prove them.) We also - * have to check that the inner side doesn't generate any variables needed - * above the join. - * - * Note: there is no need to consider the symmetrical case of duplicating the - * right input, because add_paths_to_joinrel() will be called with each rel - * on the outer side. - */ -static bool -join_is_removable(PlannerInfo *root, - RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - JoinType jointype) -{ - List *clause_list = NIL; - ListCell *l; - int attroff; - - /* - * Currently, we only know how to remove left joins to a baserel with - * unique indexes. We can check most of these criteria pretty trivially - * to avoid doing useless extra work. But checking whether any of the - * indexes are unique would require iterating over the indexlist, so for - * now we just make sure there are indexes of some sort or other. If none - * of them are unique, join removal will still fail, just slightly later. - */ - if (jointype != JOIN_LEFT || - innerrel->reloptkind == RELOPT_JOINREL || - innerrel->rtekind != RTE_RELATION || - innerrel->indexlist == NIL) - return false; - - /* - * We can't remove the join if any inner-rel attributes are used above the - * join. - * - * Note that this test only detects use of inner-rel attributes in higher - * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. - * - * As a micro-optimization, it seems better to start with max_attr and - * count down rather than starting with min_attr and counting up, on the - * theory that the system attributes are somewhat less likely to be wanted - * and should be tested last. - */ - for (attroff = innerrel->max_attr - innerrel->min_attr; - attroff >= 0; - attroff--) - { - if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids)) - return false; - } - - /* - * Similarly check that the inner rel doesn't produce any PlaceHolderVars - * that will be used above the join. - */ - foreach(l, root->placeholder_list) - { - PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); - - if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids) && - !bms_is_subset(phinfo->ph_needed, joinrel->relids)) - return false; - } - - /* - * Search for mergejoinable clauses that constrain the inner rel against - * either the outer rel or a pseudoconstant. If an operator is - * mergejoinable then it behaves like equality for some btree opclass, so - * it's what we want. The mergejoinability test also eliminates clauses - * containing volatile functions, which we couldn't depend on. - */ - foreach(l, restrictlist) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - - /* - * If we find a pushed-down clause, it must have come from above the - * outer join and it must contain references to the inner rel. (If it - * had only outer-rel variables, it'd have been pushed down into the - * outer rel.) Therefore, we can conclude that join removal is unsafe - * without any examination of the clause contents. - */ - if (restrictinfo->is_pushed_down) - return false; - - /* Ignore if it's not a mergejoinable clause */ - if (!restrictinfo->can_join || - restrictinfo->mergeopfamilies == NIL) - continue; /* not mergejoinable */ - - /* - * Check if clause has the form "outer op inner" or "inner op outer". - */ - if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) - continue; /* no good for these input relations */ - - /* OK, add to list */ - clause_list = lappend(clause_list, restrictinfo); - } - - /* Now examine the rel's restriction clauses for var = const clauses */ - foreach(l, innerrel->baserestrictinfo) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - - /* - * Note: can_join won't be set for a restriction clause, but - * mergeopfamilies will be if it has a mergejoinable operator and - * doesn't contain volatile functions. - */ - if (restrictinfo->mergeopfamilies == NIL) - continue; /* not mergejoinable */ - - /* - * The clause certainly doesn't refer to anything but the given rel. - * If either side is pseudoconstant then we can use it. - */ - if (bms_is_empty(restrictinfo->left_relids)) - { - /* righthand side is inner */ - restrictinfo->outer_is_left = true; - } - else if (bms_is_empty(restrictinfo->right_relids)) - { - /* lefthand side is inner */ - restrictinfo->outer_is_left = false; - } - else - continue; - - /* OK, add to list */ - clause_list = lappend(clause_list, restrictinfo); - } - - /* Now examine the indexes to see if we have a matching unique index */ - if (relation_has_unique_index_for(root, innerrel, clause_list)) - return true; - - /* - * Some day it would be nice to check for other methods of establishing - * distinctness. - */ - return false; -} - -/* - * generate_outer_only - * Generate "join" paths when we have found the join is removable. - */ -static void -generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel, - RelOptInfo *outerrel) -{ - ListCell *lc; - - /* - * For the moment, replicate all of the outerrel's paths as join paths. - * Some of them might not really be interesting above the join, if they - * have sort orderings that have no real use except to do a mergejoin for - * the join we've just found we don't need. But distinguishing that case - * probably isn't worth the extra code it would take. - */ - foreach(lc, outerrel->pathlist) - { - Path *outerpath = (Path *) lfirst(lc); - - add_path(joinrel, (Path *) - create_noop_path(root, joinrel, outerpath)); - } -} - /* * sort_inner_and_outer * Create mergejoin join paths by explicitly sorting both the outer and diff --git a/src/backend/optimizer/plan/Makefile b/src/backend/optimizer/plan/Makefile index 2aa976d5fb1142332a17a216cae004a048310b1d..3c11972155f91035849e07f883a29b05c9563e96 100644 --- a/src/backend/optimizer/plan/Makefile +++ b/src/backend/optimizer/plan/Makefile @@ -4,7 +4,7 @@ # Makefile for optimizer/plan # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.15 2008/02/19 10:30:07 petere Exp $ +# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.16 2010/03/28 22:59:32 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/optimizer/plan top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = createplan.o initsplan.o planagg.o planmain.o planner.o \ +OBJS = analyzejoins.o createplan.o initsplan.o planagg.o planmain.o planner.o \ setrefs.o subselect.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c new file mode 100644 index 0000000000000000000000000000000000000000..da6482b4c3c172ad79738789821d1a956748de7b --- /dev/null +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -0,0 +1,413 @@ +/*------------------------------------------------------------------------- + * + * analyzejoins.c + * Routines for simplifying joins after initial query analysis + * + * While we do a great deal of join simplification in prep/prepjointree.c, + * certain optimizations cannot be performed at that stage for lack of + * detailed information about the query. The routines here are invoked + * after initsplan.c has done its work, and can do additional join removal + * and simplification steps based on the information extracted. The penalty + * is that we have to work harder to clean up after ourselves when we modify + * the query, since the derived data structures have to be updated too. + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/optimizer/plan/analyzejoins.c,v 1.1 2010/03/28 22:59:32 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" + +/* local functions */ +static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static void remove_rel_from_query(PlannerInfo *root, int relid); +static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); + + +/* + * remove_useless_joins + * Check for relations that don't actually need to be joined at all, + * and remove them from the query. + * + * We are passed the current joinlist and return the updated list. Other + * data structures that have to be updated are accessible via "root". + */ +List * +remove_useless_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + /* + * We are only interested in relations that are left-joined to, so we + * can scan the join_info_list to find them easily. + */ +restart: + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + int innerrelid; + int nremoved; + + /* Skip if not removable */ + if (!join_is_removable(root, sjinfo)) + continue; + + /* + * Currently, join_is_removable can only succeed when the sjinfo's + * righthand is a single baserel. Remove that rel from the query and + * joinlist. + */ + innerrelid = bms_singleton_member(sjinfo->min_righthand); + + remove_rel_from_query(root, innerrelid); + + /* We verify that exactly one reference gets removed from joinlist */ + nremoved = 0; + joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved); + if (nremoved != 1) + elog(ERROR, "failed to find relation %d in joinlist", innerrelid); + + /* + * We can delete this SpecialJoinInfo from the list too, since it's no + * longer of interest. + */ + root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); + + /* + * Restart the scan. This is necessary to ensure we find all + * removable joins independently of ordering of the join_info_list + * (note that removal of attr_needed bits may make a join appear + * removable that did not before). Also, since we just deleted the + * current list cell, we'd have to have some kluge to continue the + * list scan anyway. + */ + goto restart; + } + + return joinlist; +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static inline bool +clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, + Relids innerrelids) +{ + if (bms_is_subset(rinfo->left_relids, outerrelids) && + bms_is_subset(rinfo->right_relids, innerrelids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrelids) && + bms_is_subset(rinfo->right_relids, outerrelids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + +/* + * join_is_removable + * Check whether we need not perform this special join at all, because + * it will just duplicate its left input. + * + * This is true for a left join for which the join condition cannot match + * more than one inner-side row. (There are other possibly interesting + * cases, but we don't have the infrastructure to prove them.) We also + * have to check that the inner side doesn't generate any variables needed + * above the join. + */ +static bool +join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +{ + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + List *clause_list = NIL; + ListCell *l; + int attroff; + + /* + * Currently, we only know how to remove left joins to a baserel with + * unique indexes. We can check most of these criteria pretty trivially + * to avoid doing useless extra work. But checking whether any of the + * indexes are unique would require iterating over the indexlist, so for + * now we just make sure there are indexes of some sort or other. If none + * of them are unique, join removal will still fail, just slightly later. + */ + if (sjinfo->jointype != JOIN_LEFT || + sjinfo->delay_upper_joins || + bms_membership(sjinfo->min_righthand) != BMS_SINGLETON) + return false; + + innerrelid = bms_singleton_member(sjinfo->min_righthand); + innerrel = find_base_rel(root, innerrelid); + + if (innerrel->reloptkind != RELOPT_BASEREL || + innerrel->rtekind != RTE_RELATION || + innerrel->indexlist == NIL) + return false; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + + /* + * We can't remove the join if any inner-rel attributes are used above the + * join. + * + * Note that this test only detects use of inner-rel attributes in higher + * join conditions and the target list. There might be such attributes in + * pushed-down conditions at this join, too. We check that case below. + * + * As a micro-optimization, it seems better to start with max_attr and + * count down rather than starting with min_attr and counting up, on the + * theory that the system attributes are somewhat less likely to be wanted + * and should be tested last. + */ + for (attroff = innerrel->max_attr - innerrel->min_attr; + attroff >= 0; + attroff--) + { + if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) + return false; + } + + /* + * Similarly check that the inner rel doesn't produce any PlaceHolderVars + * that will be used above the join. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids) && + !bms_is_subset(phinfo->ph_needed, joinrelids)) + return false; + } + + /* + * Search for mergejoinable clauses that constrain the inner rel against + * either the outer rel or a pseudoconstant. If an operator is + * mergejoinable then it behaves like equality for some btree opclass, so + * it's what we want. The mergejoinability test also eliminates clauses + * containing volatile functions, which we couldn't depend on. + */ + foreach(l, innerrel->joininfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* Ignore clauses not pertinent to this join */ + if (!bms_is_subset(restrictinfo->required_relids, joinrelids)) + continue; + + /* + * If we find a pushed-down clause, it must have come from above the + * outer join and it must contain references to the inner rel. (If it + * had only outer-rel variables, it'd have been pushed down into the + * outer rel.) Therefore, we can conclude that join removal is unsafe + * without any examination of the clause contents. + */ + if (restrictinfo->is_pushed_down) + return false; + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer". + */ + if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, + innerrel->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Now examine the rel's restriction clauses for var = const clauses */ + foreach(l, innerrel->baserestrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * Note: can_join won't be set for a restriction clause, but + * mergeopfamilies will be if it has a mergejoinable operator and + * doesn't contain volatile functions. + */ + if (restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * The clause certainly doesn't refer to anything but the given rel. + * If either side is pseudoconstant then we can use it. + */ + if (bms_is_empty(restrictinfo->left_relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_empty(restrictinfo->right_relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, innerrel, clause_list)) + return true; + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} + + +/* + * Remove the target relid from the planner's data structures, having + * determined that there is no need to include it in the query. + * + * We are not terribly thorough here. We must make sure that the rel is + * no longer treated as a baserel, and that attributes of other baserels + * are no longer marked as being needed at joins involving this rel. + * In particular, we don't bother removing join quals involving the rel from + * the joininfo lists; they'll just get ignored, since we will never form a + * join relation at which they could be evaluated. + */ +static void +remove_rel_from_query(PlannerInfo *root, int relid) +{ + RelOptInfo *rel = find_base_rel(root, relid); + Index rti; + ListCell *l; + + /* + * Mark the rel as "dead" to show it is no longer part of the join tree. + * (Removing it from the baserel array altogether seems too risky.) + */ + rel->reloptkind = RELOPT_DEADREL; + + /* + * Remove references to the rel from other baserels' attr_needed arrays. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *otherrel = root->simple_rel_array[rti]; + int attroff; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (otherrel == NULL) + continue; + + Assert(otherrel->relid == rti); /* sanity check on array */ + + /* no point in processing target rel itself */ + if (otherrel == rel) + continue; + + for (attroff = otherrel->max_attr - otherrel->min_attr; + attroff >= 0; + attroff--) + { + otherrel->attr_needed[attroff] = + bms_del_member(otherrel->attr_needed[attroff], relid); + } + } + + /* + * Likewise remove references from PlaceHolderVar data structures. + * + * Here we have a special case: if a PHV's eval_at set is just the target + * relid, we want to leave it that way instead of reducing it to the empty + * set. An empty eval_at set would confuse later processing since it + * would match every possible eval placement. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid); + if (bms_is_empty(phinfo->ph_eval_at)) /* oops, belay that */ + phinfo->ph_eval_at = bms_add_member(phinfo->ph_eval_at, relid); + + phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); + } +} + +/* + * Remove any occurrences of the target relid from a joinlist structure. + * + * It's easiest to build a whole new list structure, so we handle it that + * way. Efficiency is not a big deal here. + * + * *nremoved is incremented by the number of occurrences removed (there + * should be exactly one, but the caller checks that). + */ +static List * +remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) +{ + List *result = NIL; + ListCell *jl; + + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + + if (varno == relid) + (*nremoved)++; + else + result = lappend(result, jlnode); + } + else if (IsA(jlnode, List)) + { + /* Recurse to handle subproblem */ + List *sublist; + + sublist = remove_rel_from_joinlist((List *) jlnode, + relid, nremoved); + /* Avoid including empty sub-lists in the result */ + if (sublist) + result = lappend(result, sublist); + } + else + { + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + } + } + + return result; +} diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index db47054ecdcd4b2bba978bbfec6ad91af6afe6c0..a87c2b80fb2aa6c52384beab172969eb62a4018f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.273 2010/02/26 02:00:45 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.274 2010/03/28 22:59:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -164,11 +164,6 @@ create_plan(PlannerInfo *root, Path *best_path) case T_WorkTableScan: plan = create_scan_plan(root, best_path); break; - case T_Join: - /* this is only used for no-op joins */ - Assert(IsA(best_path, NoOpPath)); - plan = create_plan(root, ((NoOpPath *) best_path)->subpath); - break; case T_HashJoin: case T_MergeJoin: case T_NestLoop: diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index a53b4e1c5157cfd7939872c43a9f4dea35aa7740..f93205bbaa72158f6e57f269d04fe05bbde71576 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.117 2010/01/02 16:57:47 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.118 2010/03/28 22:59:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -179,31 +179,6 @@ query_planner(PlannerInfo *root, List *tlist, */ add_base_rels_to_query(root, (Node *) parse->jointree); - /* - * We should now have size estimates for every actual table involved in - * the query, so we can compute total_table_pages. Note that appendrels - * are not double-counted here, even though we don't bother to distinguish - * RelOptInfos for appendrel parents, because the parents will still have - * size zero. - * - * XXX if a table is self-joined, we will count it once per appearance, - * which perhaps is the wrong thing ... but that's not completely clear, - * and detecting self-joins here is difficult, so ignore it for now. - */ - total_pages = 0; - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *brel = root->simple_rel_array[rti]; - - if (brel == NULL) - continue; - - Assert(brel->relid == rti); /* sanity check on array */ - - total_pages += (double) brel->pages; - } - root->total_table_pages = total_pages; - /* * Examine the targetlist and qualifications, adding entries to baserel * targetlists for all referenced Vars. Restrict and join clauses are @@ -248,6 +223,49 @@ query_planner(PlannerInfo *root, List *tlist, */ fix_placeholder_eval_levels(root); + /* + * Remove any useless outer joins. Ideally this would be done during + * jointree preprocessing, but the necessary information isn't available + * until we've built baserel data structures and classified qual clauses. + */ + joinlist = remove_useless_joins(root, joinlist); + + /* + * Now distribute "placeholders" to base rels as needed. This has to be + * done after join removal because removal could change whether a + * placeholder is evaluatable at a base rel. + */ + add_placeholders_to_base_rels(root); + + /* + * We should now have size estimates for every actual table involved in + * the query, and we also know which if any have been deleted from the + * query by join removal; so we can compute total_table_pages. + * + * Note that appendrels are not double-counted here, even though we don't + * bother to distinguish RelOptInfos for appendrel parents, because the + * parents will still have size zero. + * + * XXX if a table is self-joined, we will count it once per appearance, + * which perhaps is the wrong thing ... but that's not completely clear, + * and detecting self-joins here is difficult, so ignore it for now. + */ + total_pages = 0; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + if (brel->reloptkind == RELOPT_BASEREL || + brel->reloptkind == RELOPT_OTHER_MEMBER_REL) + total_pages += (double) brel->pages; + } + root->total_table_pages = total_pages; + /* * Ready to do the primary planning. */ diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index a2ebe0d8ed352e1564d7614f1487294edfa632da..61716e464dc4c506542755f3c62f5cadf96ba8c8 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.157 2010/02/26 02:00:47 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.158 2010/03/28 22:59:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1214,26 +1214,6 @@ distinct_col_search(int colno, List *colnos, List *opids) return InvalidOid; } -/* - * create_noop_path - * Creates a path equivalent to the input subpath, but having a different - * parent rel. This is used when a join is found to be removable. - */ -NoOpPath * -create_noop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) -{ - NoOpPath *pathnode = makeNode(NoOpPath); - - pathnode->path.pathtype = T_Join; /* by convention */ - pathnode->path.parent = rel; - pathnode->path.startup_cost = subpath->startup_cost; - pathnode->path.total_cost = subpath->total_cost; - pathnode->path.pathkeys = subpath->pathkeys; - pathnode->subpath = subpath; - - return pathnode; -} - /* * create_subqueryscan_path * Creates a path corresponding to a sequential scan of a subquery, diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index e15b36df3a0c560e2f3395a9475f7a123147f361..7bf34129f3b6648d73905af8adc8b23cef6f0f11 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/placeholder.c,v 1.6 2010/01/02 16:57:48 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/placeholder.c,v 1.7 2010/03/28 22:59:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -168,18 +168,27 @@ fix_placeholder_eval_levels(PlannerInfo *root) list_free(vars); } } +} - /* - * Now, if any placeholder can be computed at a base rel and is needed - * above it, add it to that rel's targetlist. (This is essentially the - * same logic as in add_placeholders_to_joinrel, but we can't do that part - * until joinrels are formed.) We have to do this as a separate step - * because the ph_needed values aren't stable until the previous loop - * finishes. - */ - foreach(lc1, root->placeholder_list) +/* + * add_placeholders_to_base_rels + * Add any required PlaceHolderVars to base rels' targetlists. + * + * If any placeholder can be computed at a base rel and is needed above it, + * add it to that rel's targetlist. We have to do this separately from + * fix_placeholder_eval_levels() because join removal happens in between, + * and can change the ph_eval_at sets. There is essentially the same logic + * in add_placeholders_to_joinrel, but we can't do that part until joinrels + * are formed. + */ +void +add_placeholders_to_base_rels(PlannerInfo *root) +{ + ListCell *lc; + + foreach(lc, root->placeholder_list) { - PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc1); + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); Relids eval_at = phinfo->ph_eval_at; if (bms_membership(eval_at) == BMS_SINGLETON) diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index e1da4954390b01f771c02bf2e7afe53303fca7c6..401b69e08839be9ebb9e52f11901ae20a03024c1 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.233 2010/01/05 21:53:59 rhaas Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.234 2010/03/28 22:59:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -217,7 +217,6 @@ typedef enum NodeTag T_ResultPath, T_MaterialPath, T_UniquePath, - T_NoOpPath, T_EquivalenceClass, T_EquivalenceMember, T_PathKey, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 888005282aae1cff1086fa915fec57f634caa2a6..45cb17c52b8a48125e1607a411c3d236d521a88c 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.184 2010/02/26 02:01:25 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.185 2010/03/28 22:59:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -240,7 +240,9 @@ typedef struct PlannerInfo * * We also have "other rels", which are like base rels in that they refer to * single RT indexes; but they are not part of the join tree, and are given - * a different RelOptKind to identify them. + * a different RelOptKind to identify them. Lastly, there is a RelOptKind + * for "dead" relations, which are base rels that we have proven we don't + * need to join after all. * * Currently the only kind of otherrels are those made for member relations * of an "append relation", that is an inheritance set or UNION ALL subquery. @@ -346,7 +348,8 @@ typedef enum RelOptKind { RELOPT_BASEREL, RELOPT_JOINREL, - RELOPT_OTHER_MEMBER_REL + RELOPT_OTHER_MEMBER_REL, + RELOPT_DEADREL } RelOptKind; typedef struct RelOptInfo @@ -801,22 +804,6 @@ typedef struct UniquePath double rows; /* estimated number of result tuples */ } UniquePath; -/* - * NoOpPath represents exactly the same plan as its subpath. This is used - * when we have determined that a join can be eliminated. The difference - * between the NoOpPath and its subpath is just that the NoOpPath's parent - * is the whole join relation while the subpath is for one of the joined - * relations (and the other one isn't needed). - * - * Note: path.pathtype is always T_Join, but this won't actually give rise - * to a Join plan node. - */ -typedef struct NoOpPath -{ - Path path; - Path *subpath; -} NoOpPath; - /* * All join-type paths share these fields. */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 2255f147bd7d0af02bae1c76f8b1962bb5253a6d..9ff5d6328dea71ef4e2a71bd039524036e4d8684 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.83 2010/02/26 02:01:26 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.84 2010/03/28 22:59:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -51,8 +51,6 @@ extern ResultPath *create_result_path(List *quals); extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath); extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo); -extern NoOpPath *create_noop_path(PlannerInfo *root, RelOptInfo *rel, - Path *subpath); extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys); extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel); extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel); diff --git a/src/include/optimizer/placeholder.h b/src/include/optimizer/placeholder.h index c23d1b5cd15aa8da0b8ad2592a2cd51b3810b5ae..e9dde9315e32d5acd0a5df19c6c254f4bde2e67a 100644 --- a/src/include/optimizer/placeholder.h +++ b/src/include/optimizer/placeholder.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/placeholder.h,v 1.4 2010/01/02 16:58:07 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/placeholder.h,v 1.5 2010/03/28 22:59:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,7 @@ extern PlaceHolderVar *make_placeholder_expr(PlannerInfo *root, Expr *expr, extern PlaceHolderInfo *find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv); extern void fix_placeholder_eval_levels(PlannerInfo *root); +extern void add_placeholders_to_base_rels(PlannerInfo *root); extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 62742f5779225ff3db04d14357e5a6011f183e74..416cec0d4e7a47fc94b3afb98cc9cb7ad8b5dccb 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.126 2010/02/26 02:01:26 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.127 2010/03/28 22:59:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -108,6 +108,11 @@ extern RestrictInfo *build_implied_join_equality(Oid opno, Expr *item2, Relids qualscope); +/* + * prototypes for plan/analyzejoins.c + */ +extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); + /* * prototypes for plan/setrefs.c */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index ad164e1c02bf0cdc98d7be1aa31e82885de3c893..5fd7d79b45d6496e7455b9cc15029e49ccebee76 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2494,6 +2494,38 @@ select * from int4_tbl a full join int4_tbl b on false; -- -- test join removal -- +begin; +CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "a_pkey" for table "a" +CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "b_pkey" for table "b" +CREATE TEMP TABLE c (id int PRIMARY KEY); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "c_pkey" for table "c" +INSERT INTO a VALUES (0, 0), (1, NULL); +INSERT INTO b VALUES (0, 0), (1, NULL); +INSERT INTO c VALUES (0), (1); +-- all three cases should be optimizable into a simple seqscan +explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id; + QUERY PLAN +--------------- + Seq Scan on b +(1 row) + +explain (costs off) + SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id) + ON (a.b_id = b.id); + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +rollback; create temp table parent (k int primary key, pd int); NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "parent_pkey" for table "parent" create temp table child (k int unique, cd int); @@ -2540,3 +2572,24 @@ explain (costs off) -> Seq Scan on child c (5 rows) +-- bug 5255: this is not optimizable by join removal +begin; +CREATE TEMP TABLE a (id int PRIMARY KEY); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "a_pkey" for table "a" +CREATE TEMP TABLE b (id int PRIMARY KEY, a_id int); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "b_pkey" for table "b" +INSERT INTO a VALUES (0), (1); +INSERT INTO b VALUES (0, 0), (1, NULL); +SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); + id | a_id | id +----+------+---- + 1 | | +(1 row) + +SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); + id | a_id +----+------ + 1 | +(1 row) + +rollback; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index b0a4ceccf0bd578406323370536a0127174a3b14..d627973a091fbe7625eaf0555a6f4e144864b341 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -572,6 +572,24 @@ select * from int4_tbl a full join int4_tbl b on false; -- test join removal -- +begin; + +CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int); +CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int); +CREATE TEMP TABLE c (id int PRIMARY KEY); +INSERT INTO a VALUES (0, 0), (1, NULL); +INSERT INTO b VALUES (0, 0), (1, NULL); +INSERT INTO c VALUES (0), (1); + +-- all three cases should be optimizable into a simple seqscan +explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id; +explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id; +explain (costs off) + SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id) + ON (a.b_id = b.id); + +rollback; + create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30); @@ -590,3 +608,16 @@ explain (costs off) select p.*, linked from parent p left join (select c.*, true as linked from child c) as ss on (p.k = ss.k); + +-- bug 5255: this is not optimizable by join removal +begin; + +CREATE TEMP TABLE a (id int PRIMARY KEY); +CREATE TEMP TABLE b (id int PRIMARY KEY, a_id int); +INSERT INTO a VALUES (0), (1); +INSERT INTO b VALUES (0, 0), (1, NULL); + +SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); +SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); + +rollback;