From 44618f92bb3f287124c616cc9d25cce5592c3d2b Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 1 Aug 2015 20:57:41 -0400
Subject: [PATCH] Fix some planner issues with degenerate outer join clauses.

An outer join clause that didn't actually reference the RHS (perhaps only
after constant-folding) could confuse the join order enforcement logic,
leading to wrong query results.  Also, nested occurrences of such things
could trigger an Assertion that on reflection seems incorrect.

Per fuzz testing by Andreas Seltenreich.  The practical use of such cases
seems thin enough that it's not too surprising we've not heard field
reports about it.

This has been broken for a long time, so back-patch to all active branches.
---
 src/backend/optimizer/path/joinrels.c  |  26 +++--
 src/backend/optimizer/plan/initsplan.c |  18 +++-
 src/test/regress/expected/join.out     | 129 +++++++++++++++++++++++++
 src/test/regress/sql/join.sql          |  50 ++++++++++
 4 files changed, 211 insertions(+), 12 deletions(-)

diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 7c317da941a..086f2b0523c 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -462,20 +462,26 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 		}
 		else
 		{
+			/*
+			 * Otherwise, the proposed join overlaps the RHS but isn't a valid
+			 * implementation of this SJ.  It might still be a legal join,
+			 * however, if it does not overlap the LHS.
+			 */
+			if (bms_overlap(joinrelids, sjinfo->min_lefthand))
+				return false;
+
 			/*----------
-			 * Otherwise, the proposed join overlaps the RHS but isn't
-			 * a valid implementation of this SJ.  It might still be
-			 * a legal join, however.  If both inputs overlap the RHS,
-			 * assume that it's OK.  Since the inputs presumably got past
-			 * this function's checks previously, they can't overlap the
-			 * LHS and their violations of the RHS boundary must represent
-			 * SJs that have been determined to commute with this one.
+			 * If both inputs overlap the RHS, assume that it's OK.  Since the
+			 * inputs presumably got past this function's checks previously,
+			 * their violations of the RHS boundary must represent SJs that
+			 * have been determined to commute with this one.
 			 * We have to allow this to work correctly in cases like
 			 *		(a LEFT JOIN (b JOIN (c LEFT JOIN d)))
 			 * when the c/d join has been determined to commute with the join
 			 * to a, and hence d is not part of min_righthand for the upper
 			 * join.  It should be legal to join b to c/d but this will appear
 			 * as a violation of the upper join's RHS.
+			 *
 			 * Furthermore, if one input overlaps the RHS and the other does
 			 * not, we should still allow the join if it is a valid
 			 * implementation of some other SJ.  We have to allow this to
@@ -491,11 +497,13 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 				bms_overlap(rel1->relids, sjinfo->min_righthand) &&
 				bms_overlap(rel2->relids, sjinfo->min_righthand))
 			{
-				/* seems OK */
-				Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
+				/* both overlap; assume OK */
 			}
 			else
+			{
+				/* one overlaps, the other doesn't (or it's a semijoin) */
 				is_valid_inner = false;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 4a41e5746bb..e49f48226e7 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -655,6 +655,20 @@ make_outerjoininfo(PlannerInfo *root,
 	min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
 									right_rels);
 
+	/*
+	 * If we have a degenerate join clause that doesn't mention any RHS rels,
+	 * force the min RHS to be the syntactic RHS; otherwise we can end up
+	 * making serious errors, like putting the LHS on the wrong side of an
+	 * outer join.  It seems to be safe to not do this when we have a
+	 * contribution from inner_join_rels, though; that's enough to pin the SJ
+	 * to occur at a reasonable place in the tree.
+	 */
+	if (bms_is_empty(min_righthand))
+		min_righthand = bms_copy(right_rels);
+
+	/*
+	 * Now check previous outer joins for ordering restrictions.
+	 */
 	foreach(l, root->join_info_list)
 	{
 		SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l);
@@ -756,12 +770,10 @@ make_outerjoininfo(PlannerInfo *root,
 	 * If we found nothing to put in min_lefthand, punt and make it the full
 	 * LHS, to avoid having an empty min_lefthand which will confuse later
 	 * processing. (We don't try to be smart about such cases, just correct.)
-	 * Likewise for min_righthand.
+	 * We already forced min_righthand nonempty, so nothing to do for that.
 	 */
 	if (bms_is_empty(min_lefthand))
 		min_lefthand = bms_copy(left_rels);
-	if (bms_is_empty(min_righthand))
-		min_righthand = bms_copy(right_rels);
 
 	/* Now they'd better be nonempty */
 	Assert(!bms_is_empty(min_lefthand));
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d909f7e07a5..a3214c3c415 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3188,6 +3188,135 @@ using (join_key);
        1 |         |          
 (2 rows)
 
+--
+-- test successful handling of nested outer joins with degenerate join quals
+--
+explain (verbose, costs off)
+select t1.* from
+  text_tbl t1
+  left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+    left join int8_tbl i8
+      left join (select *, null::int as d2 from int8_tbl i8b2) b2
+      on (i8.q1 = b2.q1)
+    on (b2.d2 = b1.q2)
+  on (t1.f1 = b1.d1)
+  left join int4_tbl i4
+  on (i8.q2 = i4.f1);
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Hash Left Join
+   Output: t1.f1
+   Hash Cond: (i8.q2 = i4.f1)
+   ->  Nested Loop Left Join
+         Output: t1.f1, i8.q2
+         Join Filter: (t1.f1 = '***'::text)
+         ->  Seq Scan on public.text_tbl t1
+               Output: t1.f1
+         ->  Materialize
+               Output: i8.q2
+               ->  Hash Right Join
+                     Output: i8.q2
+                     Hash Cond: ((NULL::integer) = i8b1.q2)
+                     ->  Hash Left Join
+                           Output: i8.q2, (NULL::integer)
+                           Hash Cond: (i8.q1 = i8b2.q1)
+                           ->  Seq Scan on public.int8_tbl i8
+                                 Output: i8.q1, i8.q2
+                           ->  Hash
+                                 Output: i8b2.q1, (NULL::integer)
+                                 ->  Seq Scan on public.int8_tbl i8b2
+                                       Output: i8b2.q1, NULL::integer
+                     ->  Hash
+                           Output: i8b1.q2
+                           ->  Seq Scan on public.int8_tbl i8b1
+                                 Output: i8b1.q2
+   ->  Hash
+         Output: i4.f1
+         ->  Seq Scan on public.int4_tbl i4
+               Output: i4.f1
+(30 rows)
+
+select t1.* from
+  text_tbl t1
+  left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+    left join int8_tbl i8
+      left join (select *, null::int as d2 from int8_tbl i8b2) b2
+      on (i8.q1 = b2.q1)
+    on (b2.d2 = b1.q2)
+  on (t1.f1 = b1.d1)
+  left join int4_tbl i4
+  on (i8.q2 = i4.f1);
+        f1         
+-------------------
+ doh!
+ hi de ho neighbor
+(2 rows)
+
+explain (verbose, costs off)
+select t1.* from
+  text_tbl t1
+  left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+    left join int8_tbl i8
+      left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
+      on (i8.q1 = b2.q1)
+    on (b2.d2 = b1.q2)
+  on (t1.f1 = b1.d1)
+  left join int4_tbl i4
+  on (i8.q2 = i4.f1);
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Hash Left Join
+   Output: t1.f1
+   Hash Cond: (i8.q2 = i4.f1)
+   ->  Nested Loop Left Join
+         Output: i8.q2, t1.f1
+         Join Filter: (t1.f1 = '***'::text)
+         ->  Seq Scan on public.text_tbl t1
+               Output: t1.f1
+         ->  Materialize
+               Output: i8.q2
+               ->  Hash Right Join
+                     Output: i8.q2
+                     Hash Cond: ((NULL::integer) = i8b1.q2)
+                     ->  Hash Right Join
+                           Output: i8.q2, (NULL::integer)
+                           Hash Cond: (i8b2.q1 = i8.q1)
+                           ->  Nested Loop
+                                 Output: i8b2.q1, NULL::integer
+                                 ->  Seq Scan on public.int8_tbl i8b2
+                                       Output: i8b2.q1, i8b2.q2
+                                 ->  Materialize
+                                       ->  Seq Scan on public.int4_tbl i4b2
+                           ->  Hash
+                                 Output: i8.q1, i8.q2
+                                 ->  Seq Scan on public.int8_tbl i8
+                                       Output: i8.q1, i8.q2
+                     ->  Hash
+                           Output: i8b1.q2
+                           ->  Seq Scan on public.int8_tbl i8b1
+                                 Output: i8b1.q2
+   ->  Hash
+         Output: i4.f1
+         ->  Seq Scan on public.int4_tbl i4
+               Output: i4.f1
+(34 rows)
+
+select t1.* from
+  text_tbl t1
+  left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+    left join int8_tbl i8
+      left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
+      on (i8.q1 = b2.q1)
+    on (b2.d2 = b1.q2)
+  on (t1.f1 = b1.d1)
+  left join int4_tbl i4
+  on (i8.q2 = i4.f1);
+        f1         
+-------------------
+ doh!
+ hi de ho neighbor
+(2 rows)
+
 --
 -- test ability to push constants through outer join clauses
 --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 16e5b7d0c54..f294b8309ae 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -952,6 +952,56 @@ left join
   ) foo3
 using (join_key);
 
+--
+-- test successful handling of nested outer joins with degenerate join quals
+--
+
+explain (verbose, costs off)
+select t1.* from
+  text_tbl t1
+  left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+    left join int8_tbl i8
+      left join (select *, null::int as d2 from int8_tbl i8b2) b2
+      on (i8.q1 = b2.q1)
+    on (b2.d2 = b1.q2)
+  on (t1.f1 = b1.d1)
+  left join int4_tbl i4
+  on (i8.q2 = i4.f1);
+
+select t1.* from
+  text_tbl t1
+  left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+    left join int8_tbl i8
+      left join (select *, null::int as d2 from int8_tbl i8b2) b2
+      on (i8.q1 = b2.q1)
+    on (b2.d2 = b1.q2)
+  on (t1.f1 = b1.d1)
+  left join int4_tbl i4
+  on (i8.q2 = i4.f1);
+
+explain (verbose, costs off)
+select t1.* from
+  text_tbl t1
+  left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+    left join int8_tbl i8
+      left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
+      on (i8.q1 = b2.q1)
+    on (b2.d2 = b1.q2)
+  on (t1.f1 = b1.d1)
+  left join int4_tbl i4
+  on (i8.q2 = i4.f1);
+
+select t1.* from
+  text_tbl t1
+  left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+    left join int8_tbl i8
+      left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
+      on (i8.q1 = b2.q1)
+    on (b2.d2 = b1.q2)
+  on (t1.f1 = b1.d1)
+  left join int4_tbl i4
+  on (i8.q2 = i4.f1);
+
 --
 -- test ability to push constants through outer join clauses
 --
-- 
GitLab