diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index be2ef3becfe0a1129b4fa7f56b0ab37b789e382a..68a93a1a5bdf93ed5d8777bc7130310bf9c64bdd 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -982,9 +982,18 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + /* + * In principle, join_clause_is_movable_into() should accept anything + * returned by generate_join_implied_equalities(); but because its + * analysis is only approximate, sometimes it doesn't. So we + * currently cannot use this Assert; instead just assume it's okay to + * apply the joinclause at this level. + */ +#ifdef NOT_USED Assert(join_clause_is_movable_into(rinfo, joinrel->relids, join_and_req)); +#endif if (!join_clause_is_movable_into(rinfo, outer_path->parent->relids, outer_and_req) && diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index e5f783651758451d31543f47eed765d4c5ad46f0..65499902f6c1d7103b7260cbfaf7b1f096a8b446 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -464,10 +464,9 @@ extract_actual_join_clauses(List *restrictinfo_list, * outer join, as that would change the results (rows would be suppressed * rather than being null-extended). * - * Also the target relation must not be in the clause's nullable_relids, i.e., - * there must not be an outer join below the clause that would null the Vars - * coming from the target relation. Otherwise the clause might give results - * different from what it would give at its normal semantic level. + * Also there must not be an outer join below the clause that would null the + * Vars coming from the target relation. Otherwise the clause might give + * results different from what it would give at its normal semantic level. * * Also, the join clause must not use any relations that have LATERAL * references to the target relation, since we could not put such rels on @@ -516,10 +515,31 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel) * not pushing the clause into its outer-join outer side, nor down into * a lower outer join's inner side. * + * The check about pushing a clause down into a lower outer join's inner side + * is only approximate; it sometimes returns "false" when actually it would + * be safe to use the clause here because we're still above the outer join + * in question. This is okay as long as the answers at different join levels + * are consistent: it just means we might sometimes fail to push a clause as + * far down as it could safely be pushed. It's unclear whether it would be + * worthwhile to do this more precisely. (But if it's ever fixed to be + * exactly accurate, there's an Assert in get_joinrel_parampathinfo() that + * should be re-enabled.) + * * There's no check here equivalent to join_clause_is_movable_to's test on * lateral_referencers. We assume the caller wouldn't be inquiring unless * it'd verified that the proposed outer rels don't have lateral references - * to the current rel(s). + * to the current rel(s). (If we are considering join paths with the outer + * rels on the outside and the current rels on the inside, then this should + * have been checked at the outset of such consideration; see join_is_legal + * and the path parameterization checks in joinpath.c.) On the other hand, + * in join_clause_is_movable_to we are asking whether the clause could be + * moved for some valid set of outer rels, so we don't have the benefit of + * relying on prior checks for lateral-reference validity. + * + * Note: if this returns true, it means that the clause could be moved to + * this join relation, but that doesn't mean that this is the lowest join + * it could be moved to. Caller may need to make additional calls to verify + * that this doesn't succeed on either of the inputs of a proposed join. * * Note: get_joinrel_parampathinfo depends on the fact that if * current_and_outer is NULL, this function will always return false @@ -534,7 +554,7 @@ join_clause_is_movable_into(RestrictInfo *rinfo, if (!bms_is_subset(rinfo->clause_relids, current_and_outer)) return false; - /* Clause must physically reference target rel(s) */ + /* Clause must physically reference at least one target rel */ if (!bms_overlap(currentrelids, rinfo->clause_relids)) return false; @@ -542,7 +562,12 @@ join_clause_is_movable_into(RestrictInfo *rinfo, if (bms_overlap(currentrelids, rinfo->outer_relids)) return false; - /* Target rel(s) must not be nullable below the clause */ + /* + * Target rel(s) must not be nullable below the clause. This is + * approximate, in the safe direction, because the current join might be + * above the join where the nulling would happen, in which case the clause + * would work correctly here. But we don't have enough info to be sure. + */ if (bms_overlap(currentrelids, rinfo->nullable_relids)) return false; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 4ce01cbcd5b6b7417150480facb0294ff3e2d52f..1afd0c328b5b8c5c56861bb7e28216d6695ee1c7 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2218,6 +2218,54 @@ order by 1, 2; 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 123 (5 rows) +-- +-- regression test: check a case where join_clause_is_movable_into() gives +-- an imprecise result +-- +analyze pg_enum; +explain (costs off) +select anname, outname, enumtypid +from + (select pa.proname as anname, coalesce(po.proname, typname) as outname + from pg_type t + left join pg_proc po on po.oid = t.typoutput + join pg_proc pa on pa.oid = t.typanalyze) ss, + pg_enum, + pg_type t2 +where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid; + QUERY PLAN +----------------------------------------------------------------------- + Nested Loop + Join Filter: (pg_enum.enumtypid = t2.oid) + -> Nested Loop Left Join + -> Hash Join + Hash Cond: ((t.typanalyze)::oid = pa.oid) + -> Seq Scan on pg_type t + -> Hash + -> Hash Join + Hash Cond: (pa.proname = pg_enum.enumlabel) + -> Seq Scan on pg_proc pa + -> Hash + -> Seq Scan on pg_enum + -> Index Scan using pg_proc_oid_index on pg_proc po + Index Cond: (oid = (t.typoutput)::oid) + -> Index Scan using pg_type_typname_nsp_index on pg_type t2 + Index Cond: (typname = COALESCE(po.proname, t.typname)) +(16 rows) + +select anname, outname, enumtypid +from + (select pa.proname as anname, coalesce(po.proname, typname) as outname + from pg_type t + left join pg_proc po on po.oid = t.typoutput + join pg_proc pa on pa.oid = t.typanalyze) ss, + pg_enum, + pg_type t2 +where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid; + anname | outname | enumtypid +--------+---------+----------- +(0 rows) + -- -- Clean up -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 3a71dbf4dffe38078ab5f204014cb65abd57a81b..d34cefac5a18fb0e63562ea137abee70fc43d3be 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -377,6 +377,32 @@ select * from int8_tbl i1 left join (int8_tbl i2 join (select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2 order by 1, 2; +-- +-- regression test: check a case where join_clause_is_movable_into() gives +-- an imprecise result +-- +analyze pg_enum; +explain (costs off) +select anname, outname, enumtypid +from + (select pa.proname as anname, coalesce(po.proname, typname) as outname + from pg_type t + left join pg_proc po on po.oid = t.typoutput + join pg_proc pa on pa.oid = t.typanalyze) ss, + pg_enum, + pg_type t2 +where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid; + +select anname, outname, enumtypid +from + (select pa.proname as anname, coalesce(po.proname, typname) as outname + from pg_type t + left join pg_proc po on po.oid = t.typoutput + join pg_proc pa on pa.oid = t.typanalyze) ss, + pg_enum, + pg_type t2 +where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid; + -- -- Clean up