diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 817b14947775b808336123aa3e5292b73ff4ff22..b39927e02550fa3c9144e143ee39f8a785031bd2 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1698,6 +1698,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node) WRITE_UINT_FIELD(query_level); WRITE_NODE_FIELD(plan_params); WRITE_BITMAPSET_FIELD(all_baserels); + WRITE_BITMAPSET_FIELD(nullable_baserels); WRITE_NODE_FIELD(join_rel_list); WRITE_INT_FIELD(join_cur_level); WRITE_NODE_FIELD(init_plans); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 711b161c0d1bb2366224063ed56feab8d92203dc..baddd34a741b32e21c36f7e94b2f2e25b107e0b4 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -510,6 +510,13 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, * equivalence class it is a member of; if none, optionally build a new * single-member EquivalenceClass for it. * + * expr is the expression, and nullable_relids is the set of base relids + * that are potentially nullable below it. We actually only care about + * the set of such relids that are used in the expression; but for caller + * convenience, we perform that intersection step here. The caller need + * only be sure that nullable_relids doesn't omit any nullable rels that + * might appear in the expr. + * * sortref is the SortGroupRef of the originating SortGroupClause, if any, * or zero if not. (It should never be zero if the expression is volatile!) * @@ -538,6 +545,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, EquivalenceClass * get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, + Relids nullable_relids, List *opfamilies, Oid opcintype, Oid collation, @@ -545,6 +553,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, Relids rel, bool create_it) { + Relids expr_relids; EquivalenceClass *newec; EquivalenceMember *newem; ListCell *lc1; @@ -555,6 +564,12 @@ get_eclass_for_sort_expr(PlannerInfo *root, */ expr = canonicalize_ec_expression(expr, opcintype, collation); + /* + * Get the precise set of nullable relids appearing in the expression. + */ + expr_relids = pull_varnos((Node *) expr); + nullable_relids = bms_intersect(nullable_relids, expr_relids); + /* * Scan through the existing EquivalenceClasses for a match */ @@ -629,8 +644,8 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (newec->ec_has_volatile && sortref == 0) /* should not happen */ elog(ERROR, "volatile EquivalenceClass has no sortref"); - newem = add_eq_member(newec, copyObject(expr), pull_varnos((Node *) expr), - NULL, false, opcintype); + newem = add_eq_member(newec, copyObject(expr), expr_relids, + nullable_relids, false, opcintype); /* * add_eq_member doesn't check for volatile functions, set-returning diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 672499691951f36c9a215ea0c5e0f7ecd039db97..032b2cdc1330e5b65508e84d6b48026e7e0d0897 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -154,6 +154,9 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) * Given an expression and sort-order information, create a PathKey. * The result is always a "canonical" PathKey, but it might be redundant. * + * expr is the expression, and nullable_relids is the set of base relids + * that are potentially nullable below it. + * * If the PathKey is being generated from a SortGroupClause, sortref should be * the SortGroupClause's SortGroupRef; otherwise zero. * @@ -169,6 +172,7 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) static PathKey * make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, + Relids nullable_relids, Oid opfamily, Oid opcintype, Oid collation, @@ -204,8 +208,8 @@ make_pathkey_from_sortinfo(PlannerInfo *root, equality_op); /* Now find or (optionally) create a matching EquivalenceClass */ - eclass = get_eclass_for_sort_expr(root, expr, opfamilies, - opcintype, collation, + eclass = get_eclass_for_sort_expr(root, expr, nullable_relids, + opfamilies, opcintype, collation, sortref, rel, create_it); /* Fail if no EC and !create_it */ @@ -227,6 +231,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root, static PathKey * make_pathkey_from_sortop(PlannerInfo *root, Expr *expr, + Relids nullable_relids, Oid ordering_op, bool nulls_first, Index sortref, @@ -248,6 +253,7 @@ make_pathkey_from_sortop(PlannerInfo *root, return make_pathkey_from_sortinfo(root, expr, + nullable_relids, opfamily, opcintype, collation, @@ -461,9 +467,13 @@ build_index_pathkeys(PlannerInfo *root, nulls_first = index->nulls_first[i]; } - /* OK, try to make a canonical pathkey for this sort key */ + /* + * OK, try to make a canonical pathkey for this sort key. Note we're + * underneath any outer joins, so nullable_relids should be NULL. + */ cpathkey = make_pathkey_from_sortinfo(root, indexkey, + NULL, index->sortopfamily[i], index->opcintype[i], index->indexcollations[i], @@ -551,11 +561,14 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * expression is *not* volatile in the outer query: it's just * a Var referencing whatever the subquery emitted. (IOW, the * outer query isn't going to re-execute the volatile - * expression itself.) So this is okay. + * expression itself.) So this is okay. Likewise, it's + * correct to pass nullable_relids = NULL, because we're + * underneath any outer joins appearing in the outer query. */ outer_ec = get_eclass_for_sort_expr(root, outer_expr, + NULL, sub_eclass->ec_opfamilies, sub_member->em_datatype, sub_eclass->ec_collation, @@ -643,6 +656,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, /* See if we have a matching EC for that */ outer_ec = get_eclass_for_sort_expr(root, outer_expr, + NULL, sub_eclass->ec_opfamilies, sub_expr_type, sub_expr_coll, @@ -748,6 +762,13 @@ build_join_pathkeys(PlannerInfo *root, * The resulting PathKeys are always in canonical form. (Actually, there * is no longer any code anywhere that creates non-canonical PathKeys.) * + * We assume that root->nullable_baserels is the set of base relids that could + * have gone to NULL below the SortGroupClause expressions. This is okay if + * the expressions came from the query's top level (ORDER BY, DISTINCT, etc) + * and if this function is only invoked after deconstruct_jointree. In the + * future we might have to make callers pass in the appropriate + * nullable-relids set, but for now it seems unnecessary. + * * 'sortclauses' is a list of SortGroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ @@ -769,6 +790,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, Assert(OidIsValid(sortcl->sortop)); pathkey = make_pathkey_from_sortop(root, sortkey, + root->nullable_baserels, sortcl->sortop, sortcl->nulls_first, sortcl->tleSortGroupRef, @@ -824,6 +846,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) restrictinfo->left_ec = get_eclass_for_sort_expr(root, (Expr *) get_leftop(clause), + restrictinfo->nullable_relids, restrictinfo->mergeopfamilies, lefttype, ((OpExpr *) clause)->inputcollid, @@ -833,6 +856,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) restrictinfo->right_ec = get_eclass_for_sort_expr(root, (Expr *) get_rightop(clause), + restrictinfo->nullable_relids, restrictinfo->mergeopfamilies, righttype, ((OpExpr *) clause)->inputcollid, diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index c5998b9e2de21dfa5bff46554beddad4da8a93e9..04a399ee13cd549a1b3dec3a44f0f09bbefdc99c 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -649,6 +649,9 @@ deconstruct_jointree(PlannerInfo *root) Assert(root->parse->jointree != NULL && IsA(root->parse->jointree, FromExpr)); + /* this is filled as we scan the jointree */ + root->nullable_baserels = NULL; + result = deconstruct_recurse(root, (Node *) root->parse->jointree, false, &qualscope, &inner_join_rels, &postponed_qual_list); @@ -790,6 +793,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, left_inners, right_inners, nonnullable_rels, + nullable_rels, ojscope; List *leftjoinlist, *rightjoinlist; @@ -823,6 +827,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, *inner_join_rels = *qualscope; /* Inner join adds no restrictions for quals */ nonnullable_rels = NULL; + /* and it doesn't force anything to null, either */ + nullable_rels = NULL; break; case JOIN_LEFT: case JOIN_ANTI: @@ -837,6 +843,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, *qualscope = bms_union(leftids, rightids); *inner_join_rels = bms_union(left_inners, right_inners); nonnullable_rels = leftids; + nullable_rels = rightids; break; case JOIN_SEMI: leftjoinlist = deconstruct_recurse(root, j->larg, @@ -851,6 +858,13 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, *inner_join_rels = bms_union(left_inners, right_inners); /* Semi join adds no restrictions for quals */ nonnullable_rels = NULL; + + /* + * Theoretically, a semijoin would null the RHS; but since the + * RHS can't be accessed above the join, this is immaterial + * and we needn't account for it. + */ + nullable_rels = NULL; break; case JOIN_FULL: leftjoinlist = deconstruct_recurse(root, j->larg, @@ -865,16 +879,22 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, *inner_join_rels = bms_union(left_inners, right_inners); /* each side is both outer and inner */ nonnullable_rels = *qualscope; + nullable_rels = *qualscope; break; default: /* JOIN_RIGHT was eliminated during reduce_outer_joins() */ elog(ERROR, "unrecognized join type: %d", (int) j->jointype); nonnullable_rels = NULL; /* keep compiler quiet */ + nullable_rels = NULL; leftjoinlist = rightjoinlist = NIL; break; } + /* Report all rels that will be nulled anywhere in the jointree */ + root->nullable_baserels = bms_add_members(root->nullable_baserels, + nullable_rels); + /* * For an OJ, form the SpecialJoinInfo now, because we need the OJ's * semantic scope (ojscope) to pass to distribute_qual_to_rels. But diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index a2853fbf044b73f5e1cf2c7bbce4668842630df4..6d7b5948cd9fb638b366587cfc596bf9adf93ec6 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -150,10 +150,19 @@ typedef struct PlannerInfo /* * all_baserels is a Relids set of all base relids (but not "other" * relids) in the query; that is, the Relids identifier of the final join - * we need to form. + * we need to form. This is computed in make_one_rel, just before we + * start making Paths. */ Relids all_baserels; + /* + * nullable_baserels is a Relids set of base relids that are nullable by + * some outer join in the jointree; these are rels that are potentially + * nullable below the WHERE clause, SELECT targetlist, etc. This is + * computed in deconstruct_jointree. + */ + Relids nullable_baserels; + /* * join_rel_list is a list of all join-relation RelOptInfos we have * considered in this planning run. For small problems we just scan the diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9ef93c70c6412c5fd1a6171519e0715c0bab1002..96ffdb195fe62e03e0f5a33ce854855392d5886b 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -109,6 +109,7 @@ extern Expr *canonicalize_ec_expression(Expr *expr, extern void reconsider_outer_join_clauses(PlannerInfo *root); extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, + Relids nullable_relids, List *opfamilies, Oid opcintype, Oid collation, diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index b59004ddf3cc33748e78ba7d579f2c641ab39a08..b9559ea607599572393c1f6cba7986fcaa298443 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2900,6 +2900,35 @@ select f1, unique2, case when unique2 is null then f1 else 0 end 0 | 0 | 0 (1 row) +-- +-- another case with equivalence clauses above outer joins (bug #8591) +-- +explain (costs off) +select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) + from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) + where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Nested Loop Left Join + -> Nested Loop Left Join + Filter: (COALESCE(b.twothousand, a.twothousand) = 44) + -> Index Scan using tenk1_unique2 on tenk1 a + Index Cond: (unique2 = 5530) + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: (thousand = a.unique1) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = a.unique1) + -> Index Scan using tenk1_unique2 on tenk1 c + Index Cond: ((unique2 = COALESCE(b.twothousand, a.twothousand)) AND (unique2 = 44)) +(11 rows) + +select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) + from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) + where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; + unique1 | unique1 | unique1 | coalesce +---------+---------+---------+---------- +(0 rows) + -- -- test ability to push constants through outer join clauses -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index e43b0fc0ffcebb93c613dd38a5a67026b46bb36d..1edf9b3897b5a5637de0757bbd7d8f95900ce980 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -790,6 +790,19 @@ select f1, unique2, case when unique2 is null then f1 else 0 end from int4_tbl a left join tenk1 b on f1 = unique2 where (case when unique2 is null then f1 else 0 end) = 0; +-- +-- another case with equivalence clauses above outer joins (bug #8591) +-- + +explain (costs off) +select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) + from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) + where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; + +select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) + from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) + where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; + -- -- test ability to push constants through outer join clauses --