From f5d9fdcc773c34992a9ebe867ed452a670b91d29 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 10 Dec 2013 16:10:24 -0500
Subject: [PATCH] Fix possible crash with nested SubLinks.

An expression such as WHERE (... x IN (SELECT ...) ...) IN (SELECT ...)
could produce an invalid plan that results in a crash at execution time,
if the planner attempts to flatten the outer IN into a semi-join.
This happens because convert_testexpr() was not expecting any nested
SubLinks and would wrongly replace any PARAM_SUBLINK Params belonging
to the inner SubLink.  (I think the comment denying that this case could
happen was wrong when written; it's certainly been wrong for quite a long
time, since very early versions of the semijoin flattening logic.)

Per report from Teodor Sigaev.  Back-patch to all supported branches.
---
 src/backend/optimizer/plan/subselect.c  | 27 ++++++++++++++++++-----
 src/test/regress/expected/subselect.out | 29 +++++++++++++++++++++++++
 src/test/regress/sql/subselect.sql      | 11 ++++++++++
 3 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 22f2a55b4ac..adb50c2edf1 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -855,11 +855,6 @@ generate_subquery_vars(PlannerInfo *root, List *tlist, Index varno)
  * with Params or Vars representing the results of the sub-select.	The
  * nodes to be substituted are passed in as the List result from
  * generate_subquery_params or generate_subquery_vars.
- *
- * The given testexpr has already been recursively processed by
- * process_sublinks_mutator.  Hence it can no longer contain any
- * PARAM_SUBLINK Params for lower SubLink nodes; we can safely assume that
- * any we find are for our own level of SubLink.
  */
 static Node *
 convert_testexpr(PlannerInfo *root,
@@ -898,6 +893,28 @@ convert_testexpr_mutator(Node *node,
 												param->paramid - 1));
 		}
 	}
+	if (IsA(node, SubLink))
+	{
+		/*
+		 * If we come across a nested SubLink, it is neither necessary nor
+		 * correct to recurse into it: any PARAM_SUBLINKs we might find inside
+		 * belong to the inner SubLink not the outer. So just return it as-is.
+		 *
+		 * This reasoning depends on the assumption that nothing will pull
+		 * subexpressions into or out of the testexpr field of a SubLink, at
+		 * least not without replacing PARAM_SUBLINKs first.  If we did want
+		 * to do that we'd need to rethink the parser-output representation
+		 * altogether, since currently PARAM_SUBLINKs are only unique per
+		 * SubLink not globally across the query.  The whole point of
+		 * replacing them with Vars or PARAM_EXEC nodes is to make them
+		 * globally unique before they escape from the SubLink's testexpr.
+		 *
+		 * Note: this can't happen when called during SS_process_sublinks,
+		 * because that recursively processes inner SubLinks first.  It can
+		 * happen when called from convert_ANY_sublink_to_join, though.
+		 */
+		return node;
+	}
 	return expression_tree_mutator(node,
 								   convert_testexpr_mutator,
 								   (void *) context);
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 440ea0c98ff..cde168e3c80 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -713,3 +713,32 @@ select exists(select * from nocolumns);
  f
 (1 row)
 
+--
+-- Check sane behavior with nested IN SubLinks
+--
+explain (verbose, costs off)
+select * from int4_tbl where
+  (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
+  (select ten from tenk1 b);
+                                                                                      QUERY PLAN                                                                                       
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Nested Loop Semi Join
+   Output: int4_tbl.f1
+   Join Filter: (CASE WHEN (hashed SubPlan 1) THEN int4_tbl.f1 ELSE NULL::integer END = b.ten)
+   ->  Seq Scan on public.int4_tbl
+         Output: int4_tbl.f1
+   ->  Seq Scan on public.tenk1 b
+         Output: b.unique1, b.unique2, b.two, b.four, b.ten, b.twenty, b.hundred, b.thousand, b.twothousand, b.fivethous, b.tenthous, b.odd, b.even, b.stringu1, b.stringu2, b.string4
+   SubPlan 1
+     ->  Index Only Scan using tenk1_unique1 on public.tenk1 a
+           Output: a.unique1
+(10 rows)
+
+select * from int4_tbl where
+  (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
+  (select ten from tenk1 b);
+ f1 
+----
+  0
+(1 row)
+
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 278580797ec..326fd70e4a0 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -411,3 +411,14 @@ explain (verbose, costs off)
 --
 create temp table nocolumns();
 select exists(select * from nocolumns);
+
+--
+-- Check sane behavior with nested IN SubLinks
+--
+explain (verbose, costs off)
+select * from int4_tbl where
+  (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
+  (select ten from tenk1 b);
+select * from int4_tbl where
+  (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
+  (select ten from tenk1 b);
-- 
GitLab