From d6a8a29ac2bca06d4466e449f13d0cc52220714b Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 28 Jul 2015 13:20:40 -0400
Subject: [PATCH] Remove an unsafe Assert, and explain
 join_clause_is_movable_into() better.

join_clause_is_movable_into() is approximate, in the sense that it might
sometimes return "false" when actually it would be valid to push the given
join clause down to the specified level.  This is okay ... but there was
an Assert in get_joinrel_parampathinfo() that's only safe if the answers
are always exact.  Comment out the Assert, and add a bunch of commentary
to clarify what's going on.

Per fuzz testing by Andreas Seltenreich.  The added regression test is
a pretty silly query, but it's based on his crasher example.

Back-patch to 9.2 where the faulty logic was introduced.
---
 src/backend/optimizer/util/relnode.c      |  9 +++++
 src/backend/optimizer/util/restrictinfo.c | 31 ++++++++++++---
 src/test/regress/expected/join.out        | 48 +++++++++++++++++++++++
 src/test/regress/sql/join.sql             | 27 +++++++++++++
 4 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index edbe6252408..7e4cd64d9b9 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -931,9 +931,18 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel,
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
 
+		/*
+		 * In principle, join_clause_is_movable_into() should accept anything
+		 * returned by generate_join_implied_equalities(); but because its
+		 * analysis is only approximate, sometimes it doesn't.  So we
+		 * currently cannot use this Assert; instead just assume it's okay to
+		 * apply the joinclause at this level.
+		 */
+#ifdef NOT_USED
 		Assert(join_clause_is_movable_into(rinfo,
 										   joinrel->relids,
 										   join_and_req));
+#endif
 		if (!join_clause_is_movable_into(rinfo,
 										 outer_path->parent->relids,
 										 outer_and_req) &&
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index 9ab719c5d48..26189b403a6 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -651,10 +651,9 @@ extract_actual_join_clauses(List *restrictinfo_list,
  * outer join, as that would change the results (rows would be suppressed
  * rather than being null-extended).
  *
- * And the target relation must not be in the clause's nullable_relids, i.e.,
- * there must not be an outer join below the clause that would null the Vars
- * coming from the target relation.  Otherwise the clause might give results
- * different from what it would give at its normal semantic level.
+ * Also there must not be an outer join below the clause that would null the
+ * Vars coming from the target relation.  Otherwise the clause might give
+ * results different from what it would give at its normal semantic level.
  */
 bool
 join_clause_is_movable_to(RestrictInfo *rinfo, Index baserelid)
@@ -695,6 +694,21 @@ join_clause_is_movable_to(RestrictInfo *rinfo, Index baserelid)
  * not pushing the clause into its outer-join outer side, nor down into
  * a lower outer join's inner side.
  *
+ * The check about pushing a clause down into a lower outer join's inner side
+ * is only approximate; it sometimes returns "false" when actually it would
+ * be safe to use the clause here because we're still above the outer join
+ * in question.  This is okay as long as the answers at different join levels
+ * are consistent: it just means we might sometimes fail to push a clause as
+ * far down as it could safely be pushed.  It's unclear whether it would be
+ * worthwhile to do this more precisely.  (But if it's ever fixed to be
+ * exactly accurate, there's an Assert in get_joinrel_parampathinfo() that
+ * should be re-enabled.)
+ *
+ * Note: if this returns true, it means that the clause could be moved to
+ * this join relation, but that doesn't mean that this is the lowest join
+ * it could be moved to.  Caller may need to make additional calls to verify
+ * that this doesn't succeed on either of the inputs of a proposed join.
+ *
  * Note: get_joinrel_parampathinfo depends on the fact that if
  * current_and_outer is NULL, this function will always return false
  * (since one or the other of the first two tests must fail).
@@ -708,7 +722,7 @@ join_clause_is_movable_into(RestrictInfo *rinfo,
 	if (!bms_is_subset(rinfo->clause_relids, current_and_outer))
 		return false;
 
-	/* Clause must physically reference target rel(s) */
+	/* Clause must physically reference at least one target rel */
 	if (!bms_overlap(currentrelids, rinfo->clause_relids))
 		return false;
 
@@ -716,7 +730,12 @@ join_clause_is_movable_into(RestrictInfo *rinfo,
 	if (bms_overlap(currentrelids, rinfo->outer_relids))
 		return false;
 
-	/* Target rel(s) must not be nullable below the clause */
+	/*
+	 * Target rel(s) must not be nullable below the clause.  This is
+	 * approximate, in the safe direction, because the current join might be
+	 * above the join where the nulling would happen, in which case the clause
+	 * would work correctly here.  But we don't have enough info to be sure.
+	 */
 	if (bms_overlap(currentrelids, rinfo->nullable_relids))
 		return false;
 
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 84fffe30657..d909f7e07a5 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2184,6 +2184,54 @@ select aa, bb, unique1, unique1
 ----+----+---------+---------
 (0 rows)
 
+--
+-- regression test: check a case where join_clause_is_movable_into() gives
+-- an imprecise result
+--
+analyze pg_enum;
+explain (costs off)
+select anname, outname, enumtypid
+from
+  (select pa.proname as anname, coalesce(po.proname, typname) as outname
+   from pg_type t
+     left join pg_proc po on po.oid = t.typoutput
+     join pg_proc pa on pa.oid = t.typanalyze) ss,
+  pg_enum,
+  pg_type t2
+where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid;
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Nested Loop
+   Join Filter: (pg_enum.enumtypid = t2.oid)
+   ->  Nested Loop Left Join
+         ->  Hash Join
+               Hash Cond: ((t.typanalyze)::oid = pa.oid)
+               ->  Seq Scan on pg_type t
+               ->  Hash
+                     ->  Hash Join
+                           Hash Cond: (pa.proname = pg_enum.enumlabel)
+                           ->  Seq Scan on pg_proc pa
+                           ->  Hash
+                                 ->  Seq Scan on pg_enum
+         ->  Index Scan using pg_proc_oid_index on pg_proc po
+               Index Cond: (oid = (t.typoutput)::oid)
+   ->  Index Scan using pg_type_typname_nsp_index on pg_type t2
+         Index Cond: (typname = COALESCE(po.proname, t.typname))
+(16 rows)
+
+select anname, outname, enumtypid
+from
+  (select pa.proname as anname, coalesce(po.proname, typname) as outname
+   from pg_type t
+     left join pg_proc po on po.oid = t.typoutput
+     join pg_proc pa on pa.oid = t.typanalyze) ss,
+  pg_enum,
+  pg_type t2
+where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid;
+ anname | outname | enumtypid 
+--------+---------+-----------
+(0 rows)
+
 --
 -- Clean up
 --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index cfccb13ba85..16e5b7d0c54 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -365,6 +365,33 @@ select aa, bb, unique1, unique1
   from tenk1 right join b on aa = unique1
   where bb < bb and bb is null;
 
+--
+-- regression test: check a case where join_clause_is_movable_into() gives
+-- an imprecise result
+--
+analyze pg_enum;
+explain (costs off)
+select anname, outname, enumtypid
+from
+  (select pa.proname as anname, coalesce(po.proname, typname) as outname
+   from pg_type t
+     left join pg_proc po on po.oid = t.typoutput
+     join pg_proc pa on pa.oid = t.typanalyze) ss,
+  pg_enum,
+  pg_type t2
+where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid;
+
+select anname, outname, enumtypid
+from
+  (select pa.proname as anname, coalesce(po.proname, typname) as outname
+   from pg_type t
+     left join pg_proc po on po.oid = t.typoutput
+     join pg_proc pa on pa.oid = t.typanalyze) ss,
+  pg_enum,
+  pg_type t2
+where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid;
+
+
 --
 -- Clean up
 --
-- 
GitLab