diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ebb6f3a49b74d68b82f0bf592f1af2cb69ef7ac7..291e6a7c39113fbad8ab4a6cce862d6eb8fa300f 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1942,7 +1942,10 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from) COPY_SCALAR_FIELD(jointype); COPY_SCALAR_FIELD(lhs_strict); COPY_SCALAR_FIELD(delay_upper_joins); - COPY_NODE_FIELD(join_quals); + COPY_SCALAR_FIELD(semi_can_btree); + COPY_SCALAR_FIELD(semi_can_hash); + COPY_NODE_FIELD(semi_operators); + COPY_NODE_FIELD(semi_rhs_exprs); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 8186e84d3395ee6e3f1cd0d40f092402bef99e31..fcd58ada3d0066071f2b55a84580ebe00125c327 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -798,7 +798,10 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b) COMPARE_SCALAR_FIELD(jointype); COMPARE_SCALAR_FIELD(lhs_strict); COMPARE_SCALAR_FIELD(delay_upper_joins); - COMPARE_NODE_FIELD(join_quals); + COMPARE_SCALAR_FIELD(semi_can_btree); + COMPARE_SCALAR_FIELD(semi_can_hash); + COMPARE_NODE_FIELD(semi_operators); + COMPARE_NODE_FIELD(semi_rhs_exprs); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 03f8adaae3ee010a508beb6a5c91cb981ba8cb67..ca596efb5b9f0ce2bb93ffe0f0d723abe67c3647 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1945,7 +1945,10 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node) WRITE_ENUM_FIELD(jointype, JoinType); WRITE_BOOL_FIELD(lhs_strict); WRITE_BOOL_FIELD(delay_upper_joins); - WRITE_NODE_FIELD(join_quals); + WRITE_BOOL_FIELD(semi_can_btree); + WRITE_BOOL_FIELD(semi_can_hash); + WRITE_NODE_FIELD(semi_operators); + WRITE_NODE_FIELD(semi_rhs_exprs); } static void diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 5a9daf0f207225fafd2fd05cfd1fb5270e6116db..1a0d358c5f70ee7d51fb8fd31db9c08cbbcbbe76 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3294,7 +3294,10 @@ compute_semi_anti_join_factors(PlannerInfo *root, /* we don't bother trying to make the remaining fields valid */ norm_sjinfo.lhs_strict = false; norm_sjinfo.delay_upper_joins = false; - norm_sjinfo.join_quals = NIL; + norm_sjinfo.semi_can_btree = false; + norm_sjinfo.semi_can_hash = false; + norm_sjinfo.semi_operators = NIL; + norm_sjinfo.semi_rhs_exprs = NIL; nselec = clauselist_selectivity(root, joinquals, @@ -3456,7 +3459,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) /* we don't bother trying to make the remaining fields valid */ sjinfo.lhs_strict = false; sjinfo.delay_upper_joins = false; - sjinfo.join_quals = NIL; + sjinfo.semi_can_btree = false; + sjinfo.semi_can_hash = false; + sjinfo.semi_operators = NIL; + sjinfo.semi_rhs_exprs = NIL; /* Get the approximate selectivity */ foreach(l, quals) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index b86a3cd501ee6d639171b44d5e5d54f1b6c56b6e..da4d7c528455f9732f10ab0fe813631af2f7aef1 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -130,7 +130,12 @@ static Relids get_bitmap_tree_required_outer(Path *bitmapqual); static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds); static int find_list_position(Node *node, List **nodelist); static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index); -static double get_loop_count(PlannerInfo *root, Relids outer_relids); +static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids); +static double adjust_rowcount_for_semijoins(PlannerInfo *root, + Index cur_relid, + Index outer_relid, + double rowcount); +static double approximate_joinrel_size(PlannerInfo *root, Relids relids); static void match_restriction_clauses_to_index(RelOptInfo *rel, IndexOptInfo *index, IndexClauseSet *clauseset); @@ -402,7 +407,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) /* And push that path into the mix */ required_outer = get_bitmap_tree_required_outer(bitmapqual); - loop_count = get_loop_count(root, required_outer); + loop_count = get_loop_count(root, rel->relid, required_outer); bpath = create_bitmap_heap_path(root, rel, bitmapqual, required_outer, loop_count); add_path(rel, (Path *) bpath); @@ -969,7 +974,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, outer_relids = NULL; /* Compute loop_count for cost estimation purposes */ - loop_count = get_loop_count(root, outer_relids); + loop_count = get_loop_count(root, rel->relid, outer_relids); /* * 2. Compute pathkeys describing index's ordering, if any, then see how @@ -1553,7 +1558,7 @@ bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath) cost_bitmap_heap_scan(&bpath.path, root, rel, bpath.path.param_info, ipath, - get_loop_count(root, required_outer)); + get_loop_count(root, rel->relid, required_outer)); return bpath.path.total_cost; } @@ -1594,7 +1599,7 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths) cost_bitmap_heap_scan(&bpath.path, root, rel, bpath.path.param_info, (Path *) &apath, - get_loop_count(root, required_outer)); + get_loop_count(root, rel->relid, required_outer)); return bpath.path.total_cost; } @@ -1861,48 +1866,142 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index) * answer for single-other-relation cases, and it seems like a reasonable * zero-order approximation for multiway-join cases. * + * In addition, we check to see if the other side of each join clause is on + * the inside of some semijoin that the current relation is on the outside of. + * If so, the only way that a parameterized path could be used is if the + * semijoin RHS has been unique-ified, so we should use the number of unique + * RHS rows rather than using the relation's raw rowcount. + * * Note: for this to work, allpaths.c must establish all baserel size * estimates before it begins to compute paths, or at least before it * calls create_index_paths(). */ static double -get_loop_count(PlannerInfo *root, Relids outer_relids) +get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids) { - double result = 1.0; + double result; + int outer_relid; /* For a non-parameterized path, just return 1.0 quickly */ - if (outer_relids != NULL) + if (outer_relids == NULL) + return 1.0; + + result = 1.0; + outer_relid = -1; + while ((outer_relid = bms_next_member(outer_relids, outer_relid)) >= 0) { - int relid; + RelOptInfo *outer_rel; + double rowcount; - relid = -1; - while ((relid = bms_next_member(outer_relids, relid)) >= 0) - { - RelOptInfo *outer_rel; + /* Paranoia: ignore bogus relid indexes */ + if (outer_relid >= root->simple_rel_array_size) + continue; + outer_rel = root->simple_rel_array[outer_relid]; + if (outer_rel == NULL) + continue; + Assert(outer_rel->relid == outer_relid); /* sanity check on array */ - /* Paranoia: ignore bogus relid indexes */ - if (relid >= root->simple_rel_array_size) - continue; - outer_rel = root->simple_rel_array[relid]; - if (outer_rel == NULL) - continue; - Assert(outer_rel->relid == relid); /* sanity check on array */ + /* Other relation could be proven empty, if so ignore */ + if (IS_DUMMY_REL(outer_rel)) + continue; - /* Other relation could be proven empty, if so ignore */ - if (IS_DUMMY_REL(outer_rel)) - continue; + /* Otherwise, rel's rows estimate should be valid by now */ + Assert(outer_rel->rows > 0); - /* Otherwise, rel's rows estimate should be valid by now */ - Assert(outer_rel->rows > 0); + /* Check to see if rel is on the inside of any semijoins */ + rowcount = adjust_rowcount_for_semijoins(root, + cur_relid, + outer_relid, + outer_rel->rows); - /* Remember smallest row count estimate among the outer rels */ - if (result == 1.0 || result > outer_rel->rows) - result = outer_rel->rows; - } + /* Remember smallest row count estimate among the outer rels */ + if (result == 1.0 || result > rowcount) + result = rowcount; } return result; } +/* + * Check to see if outer_relid is on the inside of any semijoin that cur_relid + * is on the outside of. If so, replace rowcount with the estimated number of + * unique rows from the semijoin RHS (assuming that's smaller, which it might + * not be). The estimate is crude but it's the best we can do at this stage + * of the proceedings. + */ +static double +adjust_rowcount_for_semijoins(PlannerInfo *root, + Index cur_relid, + Index outer_relid, + double rowcount) +{ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + if (sjinfo->jointype == JOIN_SEMI && + bms_is_member(cur_relid, sjinfo->syn_lefthand) && + bms_is_member(outer_relid, sjinfo->syn_righthand)) + { + /* Estimate number of unique-ified rows */ + double nraw; + double nunique; + + nraw = approximate_joinrel_size(root, sjinfo->syn_righthand); + nunique = estimate_num_groups(root, + sjinfo->semi_rhs_exprs, + nraw); + if (rowcount > nunique) + rowcount = nunique; + } + } + return rowcount; +} + +/* + * Make an approximate estimate of the size of a joinrel. + * + * We don't have enough info at this point to get a good estimate, so we + * just multiply the base relation sizes together. Fortunately, this is + * the right answer anyway for the most common case with a single relation + * on the RHS of a semijoin. Also, estimate_num_groups() has only a weak + * dependency on its input_rows argument (it basically uses it as a clamp). + * So we might be able to get a fairly decent end result even with a severe + * overestimate of the RHS's raw size. + */ +static double +approximate_joinrel_size(PlannerInfo *root, Relids relids) +{ + double rowcount = 1.0; + int relid; + + relid = -1; + while ((relid = bms_next_member(relids, relid)) >= 0) + { + RelOptInfo *rel; + + /* Paranoia: ignore bogus relid indexes */ + if (relid >= root->simple_rel_array_size) + continue; + rel = root->simple_rel_array[relid]; + if (rel == NULL) + continue; + Assert(rel->relid == relid); /* sanity check on array */ + + /* Relation could be proven empty, if so ignore */ + if (IS_DUMMY_REL(rel)) + continue; + + /* Otherwise, rel's rows estimate should be valid by now */ + Assert(rel->rows > 0); + + /* Accumulate product */ + rowcount *= rel->rows; + } + return rowcount; +} + /**************************************************************************** * ---- ROUTINES TO CHECK QUERY CLAUSES ---- diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index e7e9a1ab49b9d66d3ca1780490da223b71921dca..fe9fd57317429c08d99c9c98f4c92af2728dff45 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -624,7 +624,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) /* we don't bother trying to make the remaining fields valid */ sjinfo->lhs_strict = false; sjinfo->delay_upper_joins = false; - sjinfo->join_quals = NIL; + sjinfo->semi_can_btree = false; + sjinfo->semi_can_hash = false; + sjinfo->semi_operators = NIL; + sjinfo->semi_rhs_exprs = NIL; } /* diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 49d776d92952e720696e97208c03c11b37289e2e..a7655e4a710747166bc0dc035fd8d0cb31ce4a88 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -17,6 +17,7 @@ #include "catalog/pg_type.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" +#include "optimizer/cost.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -55,6 +56,7 @@ static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root, Relids left_rels, Relids right_rels, Relids inner_join_rels, JoinType jointype, List *clause); +static void compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause); static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, bool is_deduced, bool below_outer_join, @@ -1085,7 +1087,8 @@ make_outerjoininfo(PlannerInfo *root, sjinfo->jointype = jointype; /* this always starts out false */ sjinfo->delay_upper_joins = false; - sjinfo->join_quals = clause; + + compute_semijoin_info(sjinfo, clause); /* If it's a full join, no need to be very smart */ if (jointype == JOIN_FULL) @@ -1237,6 +1240,182 @@ make_outerjoininfo(PlannerInfo *root, return sjinfo; } +/* + * compute_semijoin_info + * Fill semijoin-related fields of a new SpecialJoinInfo + * + * Note: this relies on only the jointype and syn_righthand fields of the + * SpecialJoinInfo; the rest may not be set yet. + */ +static void +compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause) +{ + List *semi_operators; + List *semi_rhs_exprs; + bool all_btree; + bool all_hash; + ListCell *lc; + + /* Initialize semijoin-related fields in case we can't unique-ify */ + sjinfo->semi_can_btree = false; + sjinfo->semi_can_hash = false; + sjinfo->semi_operators = NIL; + sjinfo->semi_rhs_exprs = NIL; + + /* Nothing more to do if it's not a semijoin */ + if (sjinfo->jointype != JOIN_SEMI) + return; + + /* + * Look to see whether the semijoin's join quals consist of AND'ed + * equality operators, with (only) RHS variables on only one side of each + * one. If so, we can figure out how to enforce uniqueness for the RHS. + * + * Note that the input clause list is the list of quals that are + * *syntactically* associated with the semijoin, which in practice means + * the synthesized comparison list for an IN or the WHERE of an EXISTS. + * Particularly in the latter case, it might contain clauses that aren't + * *semantically* associated with the join, but refer to just one side or + * the other. We can ignore such clauses here, as they will just drop + * down to be processed within one side or the other. (It is okay to + * consider only the syntactically-associated clauses here because for a + * semijoin, no higher-level quals could refer to the RHS, and so there + * can be no other quals that are semantically associated with this join. + * We do things this way because it is useful to have the set of potential + * unique-ification expressions before we can extract the list of quals + * that are actually semantically associated with the particular join.) + * + * Note that the semi_operators list consists of the joinqual operators + * themselves (but commuted if needed to put the RHS value on the right). + * These could be cross-type operators, in which case the operator + * actually needed for uniqueness is a related single-type operator. We + * assume here that that operator will be available from the btree or hash + * opclass when the time comes ... if not, create_unique_plan() will fail. + */ + semi_operators = NIL; + semi_rhs_exprs = NIL; + all_btree = true; + all_hash = enable_hashagg; /* don't consider hash if not enabled */ + foreach(lc, clause) + { + OpExpr *op = (OpExpr *) lfirst(lc); + Oid opno; + Node *left_expr; + Node *right_expr; + Relids left_varnos; + Relids right_varnos; + Relids all_varnos; + Oid opinputtype; + + /* Is it a binary opclause? */ + if (!IsA(op, OpExpr) || + list_length(op->args) != 2) + { + /* No, but does it reference both sides? */ + all_varnos = pull_varnos((Node *) op); + if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || + bms_is_subset(all_varnos, sjinfo->syn_righthand)) + { + /* + * Clause refers to only one rel, so ignore it --- unless it + * contains volatile functions, in which case we'd better + * punt. + */ + if (contain_volatile_functions((Node *) op)) + return; + continue; + } + /* Non-operator clause referencing both sides, must punt */ + return; + } + + /* Extract data from binary opclause */ + opno = op->opno; + left_expr = linitial(op->args); + right_expr = lsecond(op->args); + left_varnos = pull_varnos(left_expr); + right_varnos = pull_varnos(right_expr); + all_varnos = bms_union(left_varnos, right_varnos); + opinputtype = exprType(left_expr); + + /* Does it reference both sides? */ + if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || + bms_is_subset(all_varnos, sjinfo->syn_righthand)) + { + /* + * Clause refers to only one rel, so ignore it --- unless it + * contains volatile functions, in which case we'd better punt. + */ + if (contain_volatile_functions((Node *) op)) + return; + continue; + } + + /* check rel membership of arguments */ + if (!bms_is_empty(right_varnos) && + bms_is_subset(right_varnos, sjinfo->syn_righthand) && + !bms_overlap(left_varnos, sjinfo->syn_righthand)) + { + /* typical case, right_expr is RHS variable */ + } + else if (!bms_is_empty(left_varnos) && + bms_is_subset(left_varnos, sjinfo->syn_righthand) && + !bms_overlap(right_varnos, sjinfo->syn_righthand)) + { + /* flipped case, left_expr is RHS variable */ + opno = get_commutator(opno); + if (!OidIsValid(opno)) + return; + right_expr = left_expr; + } + else + { + /* mixed membership of args, punt */ + return; + } + + /* all operators must be btree equality or hash equality */ + if (all_btree) + { + /* oprcanmerge is considered a hint... */ + if (!op_mergejoinable(opno, opinputtype) || + get_mergejoin_opfamilies(opno) == NIL) + all_btree = false; + } + if (all_hash) + { + /* ... but oprcanhash had better be correct */ + if (!op_hashjoinable(opno, opinputtype)) + all_hash = false; + } + if (!(all_btree || all_hash)) + return; + + /* so far so good, keep building lists */ + semi_operators = lappend_oid(semi_operators, opno); + semi_rhs_exprs = lappend(semi_rhs_exprs, copyObject(right_expr)); + } + + /* Punt if we didn't find at least one column to unique-ify */ + if (semi_rhs_exprs == NIL) + return; + + /* + * The expressions we'd need to unique-ify mustn't be volatile. + */ + if (contain_volatile_functions((Node *) semi_rhs_exprs)) + return; + + /* + * If we get here, we can unique-ify the semijoin's RHS using at least one + * of sorting and hashing. Save the information about how to do that. + */ + sjinfo->semi_can_btree = all_btree; + sjinfo->semi_can_hash = all_hash; + sjinfo->semi_operators = semi_operators; + sjinfo->semi_rhs_exprs = semi_rhs_exprs; +} + /***************************************************************************** * diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c index d1c4e99a9b70578b64254027732988404f3865b1..f0acc143f0d2f1fc4f78dfe5770c07d055bcce7d 100644 --- a/src/backend/optimizer/util/orclauses.c +++ b/src/backend/optimizer/util/orclauses.c @@ -335,7 +335,10 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, /* we don't bother trying to make the remaining fields valid */ sjinfo.lhs_strict = false; sjinfo.delay_upper_joins = false; - sjinfo.join_quals = NIL; + sjinfo.semi_can_btree = false; + sjinfo.semi_can_hash = false; + sjinfo.semi_operators = NIL; + sjinfo.semi_rhs_exprs = NIL; /* Compute inner-join size */ orig_selec = clause_selectivity(root, (Node *) join_or_rinfo, diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 1395a2113982b2b908b38ef57573322dbc3c90ee..faca30b322d530b512c4404bfdff0a9fd5cdca16 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1088,12 +1088,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, Path sort_path; /* dummy for result of cost_sort */ Path agg_path; /* dummy for result of cost_agg */ MemoryContext oldcontext; - List *in_operators; - List *uniq_exprs; - bool all_btree; - bool all_hash; int numCols; - ListCell *lc; /* Caller made a mistake if subpath isn't cheapest_total ... */ Assert(subpath == rel->cheapest_total_path); @@ -1106,8 +1101,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, if (rel->cheapest_unique_path) return (UniquePath *) rel->cheapest_unique_path; - /* If we previously failed, return NULL quickly */ - if (sjinfo->join_quals == NIL) + /* If it's not possible to unique-ify, return NULL */ + if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash)) return NULL; /* @@ -1116,150 +1111,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, */ oldcontext = MemoryContextSwitchTo(root->planner_cxt); - /*---------- - * Look to see whether the semijoin's join quals consist of AND'ed - * equality operators, with (only) RHS variables on only one side of - * each one. If so, we can figure out how to enforce uniqueness for - * the RHS. - * - * Note that the input join_quals list is the list of quals that are - * *syntactically* associated with the semijoin, which in practice means - * the synthesized comparison list for an IN or the WHERE of an EXISTS. - * Particularly in the latter case, it might contain clauses that aren't - * *semantically* associated with the join, but refer to just one side or - * the other. We can ignore such clauses here, as they will just drop - * down to be processed within one side or the other. (It is okay to - * consider only the syntactically-associated clauses here because for a - * semijoin, no higher-level quals could refer to the RHS, and so there - * can be no other quals that are semantically associated with this join. - * We do things this way because it is useful to be able to run this test - * before we have extracted the list of quals that are actually - * semantically associated with the particular join.) - * - * Note that the in_operators list consists of the joinqual operators - * themselves (but commuted if needed to put the RHS value on the right). - * These could be cross-type operators, in which case the operator - * actually needed for uniqueness is a related single-type operator. - * We assume here that that operator will be available from the btree - * or hash opclass when the time comes ... if not, create_unique_plan() - * will fail. - *---------- - */ - in_operators = NIL; - uniq_exprs = NIL; - all_btree = true; - all_hash = enable_hashagg; /* don't consider hash if not enabled */ - foreach(lc, sjinfo->join_quals) - { - OpExpr *op = (OpExpr *) lfirst(lc); - Oid opno; - Node *left_expr; - Node *right_expr; - Relids left_varnos; - Relids right_varnos; - Relids all_varnos; - Oid opinputtype; - - /* Is it a binary opclause? */ - if (!IsA(op, OpExpr) || - list_length(op->args) != 2) - { - /* No, but does it reference both sides? */ - all_varnos = pull_varnos((Node *) op); - if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || - bms_is_subset(all_varnos, sjinfo->syn_righthand)) - { - /* - * Clause refers to only one rel, so ignore it --- unless it - * contains volatile functions, in which case we'd better - * punt. - */ - if (contain_volatile_functions((Node *) op)) - goto no_unique_path; - continue; - } - /* Non-operator clause referencing both sides, must punt */ - goto no_unique_path; - } - - /* Extract data from binary opclause */ - opno = op->opno; - left_expr = linitial(op->args); - right_expr = lsecond(op->args); - left_varnos = pull_varnos(left_expr); - right_varnos = pull_varnos(right_expr); - all_varnos = bms_union(left_varnos, right_varnos); - opinputtype = exprType(left_expr); - - /* Does it reference both sides? */ - if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || - bms_is_subset(all_varnos, sjinfo->syn_righthand)) - { - /* - * Clause refers to only one rel, so ignore it --- unless it - * contains volatile functions, in which case we'd better punt. - */ - if (contain_volatile_functions((Node *) op)) - goto no_unique_path; - continue; - } - - /* check rel membership of arguments */ - if (!bms_is_empty(right_varnos) && - bms_is_subset(right_varnos, sjinfo->syn_righthand) && - !bms_overlap(left_varnos, sjinfo->syn_righthand)) - { - /* typical case, right_expr is RHS variable */ - } - else if (!bms_is_empty(left_varnos) && - bms_is_subset(left_varnos, sjinfo->syn_righthand) && - !bms_overlap(right_varnos, sjinfo->syn_righthand)) - { - /* flipped case, left_expr is RHS variable */ - opno = get_commutator(opno); - if (!OidIsValid(opno)) - goto no_unique_path; - right_expr = left_expr; - } - else - goto no_unique_path; - - /* all operators must be btree equality or hash equality */ - if (all_btree) - { - /* oprcanmerge is considered a hint... */ - if (!op_mergejoinable(opno, opinputtype) || - get_mergejoin_opfamilies(opno) == NIL) - all_btree = false; - } - if (all_hash) - { - /* ... but oprcanhash had better be correct */ - if (!op_hashjoinable(opno, opinputtype)) - all_hash = false; - } - if (!(all_btree || all_hash)) - goto no_unique_path; - - /* so far so good, keep building lists */ - in_operators = lappend_oid(in_operators, opno); - uniq_exprs = lappend(uniq_exprs, copyObject(right_expr)); - } - - /* Punt if we didn't find at least one column to unique-ify */ - if (uniq_exprs == NIL) - goto no_unique_path; - - /* - * The expressions we'd need to unique-ify mustn't be volatile. - */ - if (contain_volatile_functions((Node *) uniq_exprs)) - goto no_unique_path; - - /* - * If we get here, we can unique-ify using at least one of sorting and - * hashing. Start building the result Path object. - */ pathnode = makeNode(UniquePath); pathnode->path.pathtype = T_Unique; @@ -1273,18 +1124,19 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->path.pathkeys = NIL; pathnode->subpath = subpath; - pathnode->in_operators = in_operators; - pathnode->uniq_exprs = uniq_exprs; + pathnode->in_operators = sjinfo->semi_operators; + pathnode->uniq_exprs = sjinfo->semi_rhs_exprs; /* * If the input is a relation and it has a unique index that proves the - * uniq_exprs are unique, then we don't need to do anything. Note that - * relation_has_unique_index_for automatically considers restriction + * semi_rhs_exprs are unique, then we don't need to do anything. Note + * that relation_has_unique_index_for automatically considers restriction * clauses for the rel, as well. */ - if (rel->rtekind == RTE_RELATION && all_btree && + if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree && relation_has_unique_index_for(root, rel, NIL, - uniq_exprs, in_operators)) + sjinfo->semi_rhs_exprs, + sjinfo->semi_operators)) { pathnode->umethod = UNIQUE_PATH_NOOP; pathnode->path.rows = rel->rows; @@ -1304,7 +1156,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, * don't need to do anything. The test for uniqueness has to consider * exactly which columns we are extracting; for example "SELECT DISTINCT * x,y" doesn't guarantee that x alone is distinct. So we cannot check for - * this optimization unless uniq_exprs consists only of simple Vars + * this optimization unless semi_rhs_exprs consists only of simple Vars * referencing subquery outputs. (Possibly we could do something with * expressions in the subquery outputs, too, but for now keep it simple.) */ @@ -1316,11 +1168,13 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, { List *sub_tlist_colnos; - sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid); + sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs, + rel->relid); if (sub_tlist_colnos && query_is_distinct_for(rte->subquery, - sub_tlist_colnos, in_operators)) + sub_tlist_colnos, + sjinfo->semi_operators)) { pathnode->umethod = UNIQUE_PATH_NOOP; pathnode->path.rows = rel->rows; @@ -1338,10 +1192,12 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, } /* Estimate number of output rows */ - pathnode->path.rows = estimate_num_groups(root, uniq_exprs, rel->rows); - numCols = list_length(uniq_exprs); + pathnode->path.rows = estimate_num_groups(root, + sjinfo->semi_rhs_exprs, + rel->rows); + numCols = list_length(sjinfo->semi_rhs_exprs); - if (all_btree) + if (sjinfo->semi_can_btree) { /* * Estimate cost for sort+unique implementation @@ -1363,7 +1219,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, sort_path.total_cost += cpu_operator_cost * rel->rows * numCols; } - if (all_hash) + if (sjinfo->semi_can_hash) { /* * Estimate the overhead per hashtable entry at 64 bytes (same as in @@ -1372,7 +1228,13 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, int hashentrysize = rel->width + 64; if (hashentrysize * pathnode->path.rows > work_mem * 1024L) - all_hash = false; /* don't try to hash */ + { + /* + * We should not try to hash. Hack the SpecialJoinInfo to + * remember this, in case we come through here again. + */ + sjinfo->semi_can_hash = false; + } else cost_agg(&agg_path, root, AGG_HASHED, NULL, @@ -1382,19 +1244,23 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, rel->rows); } - if (all_btree && all_hash) + if (sjinfo->semi_can_btree && sjinfo->semi_can_hash) { if (agg_path.total_cost < sort_path.total_cost) pathnode->umethod = UNIQUE_PATH_HASH; else pathnode->umethod = UNIQUE_PATH_SORT; } - else if (all_btree) + else if (sjinfo->semi_can_btree) pathnode->umethod = UNIQUE_PATH_SORT; - else if (all_hash) + else if (sjinfo->semi_can_hash) pathnode->umethod = UNIQUE_PATH_HASH; else - goto no_unique_path; + { + /* we can get here only if we abandoned hashing above */ + MemoryContextSwitchTo(oldcontext); + return NULL; + } if (pathnode->umethod == UNIQUE_PATH_HASH) { @@ -1412,15 +1278,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, MemoryContextSwitchTo(oldcontext); return pathnode; - -no_unique_path: /* failure exit */ - - /* Mark the SpecialJoinInfo as not unique-able */ - sjinfo->join_quals = NIL; - - MemoryContextSwitchTo(oldcontext); - - return NULL; } /* diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 6845a403e1d28a38c074d332ea682ca3f95ad5d7..e6dd4e8515da5318a27f42852fd559f7ba77a141 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -101,8 +101,7 @@ typedef struct PlannerGlobal bool transientPlan; /* redo plan when TransactionXmin changes? */ - bool hasRowSecurity; /* row security applied? */ - + bool hasRowSecurity; /* row security applied? */ } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ @@ -1374,11 +1373,13 @@ typedef struct PlaceHolderVar * commute with this join, because that would leave noplace to check the * pushed-down clause. (We don't track this for FULL JOINs, either.) * - * join_quals is an implicit-AND list of the quals syntactically associated - * with the join (they may or may not end up being applied at the join level). - * This is just a side list and does not drive actual application of quals. - * For JOIN_SEMI joins, this is cleared to NIL in create_unique_path() if - * the join is found not to be suitable for a uniqueify-the-RHS plan. + * For a semijoin, we also extract the join operators and their RHS arguments + * and set semi_operators, semi_rhs_exprs, semi_can_btree, and semi_can_hash. + * This is done in support of possibly unique-ifying the RHS, so we don't + * bother unless at least one of semi_can_btree and semi_can_hash can be set + * true. (You might expect that this information would be computed during + * join planning; but it's helpful to have it available during planning of + * parameterized table scans, so we store it in the SpecialJoinInfo structs.) * * jointype is never JOIN_RIGHT; a RIGHT JOIN is handled by switching * the inputs to make it a LEFT JOIN. So the allowed values of jointype @@ -1391,7 +1392,7 @@ typedef struct PlaceHolderVar * SpecialJoinInfos with jointype == JOIN_INNER for outer joins, since for * cost estimation purposes it is sometimes useful to know the join size under * plain innerjoin semantics. Note that lhs_strict, delay_upper_joins, and - * join_quals are not set meaningfully within such structs. + * of course the semi_xxx fields are not set meaningfully within such structs. */ typedef struct SpecialJoinInfo @@ -1404,7 +1405,11 @@ typedef struct SpecialJoinInfo JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */ bool lhs_strict; /* joinclause is strict for some LHS rel */ bool delay_upper_joins; /* can't commute with upper RHS */ - List *join_quals; /* join quals, in implicit-AND list format */ + /* Remaining fields are set only for JOIN_SEMI jointype: */ + bool semi_can_btree; /* true if semi_operators are all btree */ + bool semi_can_hash; /* true if semi_operators are all hash */ + List *semi_operators; /* OIDs of equality join operators */ + List *semi_rhs_exprs; /* righthand-side expressions of these ops */ } SpecialJoinInfo; /*