From 6af9ee4c8c8f31cbf2e76166d1d9868876f60aea Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 5 Aug 2015 14:39:07 -0400
Subject: [PATCH] Make real sure we don't reassociate joins into or out of
 SEMI/ANTI joins.

Per the discussion in optimizer/README, it's unsafe to reassociate anything
into or out of the RHS of a SEMI or ANTI join.  An example from Piotr
Stefaniak showed that join_is_legal() wasn't sufficiently enforcing this
rule, so lock it down a little harder.

I couldn't find a reasonably simple example of the optimizer trying to
do this, so no new regression test.  (Piotr's example involved the random
search in GEQO accidentally trying an invalid case and triggering a sanity
check way downstream in clause selectivity estimation, which did not seem
like a sequence of events that would be useful to memorialize in a
regression test as-is.)

Back-patch to all active branches.
---
 src/backend/optimizer/path/joinrels.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 02c7c5ea864..475241856bd 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -470,10 +470,14 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			/*
 			 * Otherwise, the proposed join overlaps the RHS but isn't a valid
 			 * implementation of this SJ.  It might still be a legal join,
-			 * however, if it does not overlap the LHS.
+			 * however, if it does not overlap the LHS.  But we never allow
+			 * violations of the RHS of SEMI or ANTI joins.  (In practice,
+			 * therefore, only LEFT joins ever allow RHS violation.)
 			 */
-			if (bms_overlap(joinrelids, sjinfo->min_lefthand))
-				return false;
+			if (sjinfo->jointype == JOIN_SEMI ||
+				sjinfo->jointype == JOIN_ANTI ||
+				bms_overlap(joinrelids, sjinfo->min_lefthand))
+				return false;	/* invalid join path */
 
 			/*----------
 			 * If both inputs overlap the RHS, assume that it's OK.  Since the
@@ -498,15 +502,14 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			 * Set flag here to check at bottom of loop.
 			 *----------
 			 */
-			if (sjinfo->jointype != JOIN_SEMI &&
-				bms_overlap(rel1->relids, sjinfo->min_righthand) &&
+			if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
 				bms_overlap(rel2->relids, sjinfo->min_righthand))
 			{
 				/* both overlap; assume OK */
 			}
 			else
 			{
-				/* one overlaps, the other doesn't (or it's a semijoin) */
+				/* one overlaps, the other doesn't */
 				is_valid_inner = false;
 			}
 		}
-- 
GitLab