From 255f66efa9b648500a4f06b839d0256987406f0f Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 9 Sep 2009 03:32:52 +0000
Subject: [PATCH] Fix bug with WITH RECURSIVE immediately inside WITH
 RECURSIVE.  99% of the code was already okay with this, but the hack that
 obtained the output column types of a recursive union in advance of doing
 real parse analysis of the recursive union forgot to handle the case where
 there was an inner WITH clause available to the non-recursive term.  Best fix
 seems to be to refactor so that we don't need the "throwaway" parse analysis
 step at all. Instead, teach the transformSetOperationStmt code to set up the
 CTE's output column information after it's processed the non-recursive term
 normally. Per report from David Fetter.

---
 src/backend/parser/analyze.c       | 90 ++++++++++++++++++++++++++++--
 src/backend/parser/parse_clause.c  |  4 +-
 src/backend/parser/parse_cte.c     | 45 +++++----------
 src/backend/parser/parse_expr.c    |  4 +-
 src/include/parser/analyze.h       |  5 +-
 src/include/parser/parse_cte.h     |  5 +-
 src/include/parser/parse_node.h    |  6 +-
 src/test/regress/expected/with.out | 73 ++++++++++++++++++++++++
 src/test/regress/sql/with.sql      | 15 +++++
 9 files changed, 203 insertions(+), 44 deletions(-)

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index c441be220e2..8a19697a71c 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -17,7 +17,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- *	$PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.390 2009/08/27 20:08:02 tgl Exp $
+ *	$PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.391 2009/09/09 03:32:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,7 +50,9 @@ static Query *transformSelectStmt(ParseState *pstate, SelectStmt *stmt);
 static Query *transformValuesClause(ParseState *pstate, SelectStmt *stmt);
 static Query *transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt);
 static Node *transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
-						  List **colInfo);
+						  bool isTopLevel, List **colInfo);
+static void determineRecursiveColTypes(ParseState *pstate,
+									   Node *larg, List *lcolinfo);
 static void applyColumnNames(List *dst, List *src);
 static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt);
 static List *transformReturningList(ParseState *pstate, List *returningList);
@@ -135,11 +137,14 @@ parse_analyze_varparams(Node *parseTree, const char *sourceText,
  *		Entry point for recursively analyzing a sub-statement.
  */
 Query *
-parse_sub_analyze(Node *parseTree, ParseState *parentParseState)
+parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
+				  CommonTableExpr *parentCTE)
 {
 	ParseState *pstate = make_parsestate(parentParseState);
 	Query	   *query;
 
+	pstate->p_parent_cte = parentCTE;
+
 	query = transformStmt(pstate, parseTree);
 
 	free_parsestate(pstate);
@@ -1199,6 +1204,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
 	 * Recursively transform the components of the tree.
 	 */
 	sostmt = (SetOperationStmt *) transformSetOperationTree(pstate, stmt,
+															true,
 															&socolinfo);
 	Assert(sostmt && IsA(sostmt, SetOperationStmt));
 	qry->setOperations = (Node *) sostmt;
@@ -1359,7 +1365,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
  */
 static Node *
 transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
-						  List **colInfo)
+						  bool isTopLevel, List **colInfo)
 {
 	bool		isLeaf;
 
@@ -1418,7 +1424,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 		 * of this sub-query, because they are not in the toplevel pstate's
 		 * namespace list.
 		 */
-		selectQuery = parse_sub_analyze((Node *) stmt, pstate);
+		selectQuery = parse_sub_analyze((Node *) stmt, pstate, NULL);
 
 		/*
 		 * Check for bogus references to Vars on the current query level (but
@@ -1485,11 +1491,28 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 		op->all = stmt->all;
 
 		/*
-		 * Recursively transform the child nodes.
+		 * Recursively transform the left child node.
 		 */
 		op->larg = transformSetOperationTree(pstate, stmt->larg,
+											 false,
 											 &lcolinfo);
+
+		/*
+		 * If we are processing a recursive union query, now is the time
+		 * to examine the non-recursive term's output columns and mark the
+		 * containing CTE as having those result columns.  We should do this
+		 * only at the topmost setop of the CTE, of course.
+		 */
+		if (isTopLevel &&
+			pstate->p_parent_cte &&
+			pstate->p_parent_cte->cterecursive)
+			determineRecursiveColTypes(pstate, op->larg, lcolinfo);
+
+		/*
+		 * Recursively transform the right child node.
+		 */
 		op->rarg = transformSetOperationTree(pstate, stmt->rarg,
+											 false,
 											 &rcolinfo);
 
 		/*
@@ -1584,6 +1607,61 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 	}
 }
 
+/*
+ * Process the outputs of the non-recursive term of a recursive union
+ * to set up the parent CTE's columns
+ */
+static void
+determineRecursiveColTypes(ParseState *pstate, Node *larg, List *lcolinfo)
+{
+	Node	   *node;
+	int			leftmostRTI;
+	Query	   *leftmostQuery;
+	List	   *targetList;
+	ListCell   *left_tlist;
+	ListCell   *lci;
+	int			next_resno;
+
+	/*
+	 * Find leftmost leaf SELECT
+	 */
+	node = larg;
+	while (node && IsA(node, SetOperationStmt))
+		node = ((SetOperationStmt *) node)->larg;
+	Assert(node && IsA(node, RangeTblRef));
+	leftmostRTI = ((RangeTblRef *) node)->rtindex;
+	leftmostQuery = rt_fetch(leftmostRTI, pstate->p_rtable)->subquery;
+	Assert(leftmostQuery != NULL);
+
+	/*
+	 * Generate dummy targetlist using column names of leftmost select
+	 * and dummy result expressions of the non-recursive term.
+	 */
+	targetList = NIL;
+	left_tlist = list_head(leftmostQuery->targetList);
+	next_resno = 1;
+
+	foreach(lci, lcolinfo)
+	{
+		Expr	   *lcolexpr = (Expr *) lfirst(lci);
+		TargetEntry *lefttle = (TargetEntry *) lfirst(left_tlist);
+		char	   *colName;
+		TargetEntry *tle;
+
+		Assert(!lefttle->resjunk);
+		colName = pstrdup(lefttle->resname);
+		tle = makeTargetEntry(lcolexpr,
+							  next_resno++,
+							  colName,
+							  false);
+		targetList = lappend(targetList, tle);
+		left_tlist = lnext(left_tlist);
+	}
+
+	/* Now build CTE's output column info using dummy targetlist */
+	analyzeCTETargetList(pstate, pstate->p_parent_cte, targetList);
+}
+
 /*
  * Attach column names from a ColumnDef list to a TargetEntry list
  * (for CREATE TABLE AS)
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index a63a280a8cb..c3bdf33a998 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.191 2009/08/27 20:08:02 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.192 2009/09/09 03:32:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -479,7 +479,7 @@ transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
 	/*
 	 * Analyze and transform the subquery.
 	 */
-	query = parse_sub_analyze(r->subquery, pstate);
+	query = parse_sub_analyze(r->subquery, pstate, NULL);
 
 	/*
 	 * Check that we got something reasonable.	Many of these conditions are
diff --git a/src/backend/parser/parse_cte.c b/src/backend/parser/parse_cte.c
index c18b4336adc..d4a190b505e 100644
--- a/src/backend/parser/parse_cte.c
+++ b/src/backend/parser/parse_cte.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_cte.c,v 2.6 2009/06/11 14:49:00 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_cte.c,v 2.7 2009/09/09 03:32:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -58,8 +58,6 @@ typedef struct CteItem
 {
 	CommonTableExpr *cte;		/* One CTE to examine */
 	int			id;				/* Its ID number for dependencies */
-	Node	   *non_recursive_term;		/* Its nonrecursive part, if
-										 * identified */
 	Bitmapset  *depends_on;		/* CTEs depended on (not including self) */
 } CteItem;
 
@@ -80,7 +78,6 @@ typedef struct CteState
 
 
 static void analyzeCTE(ParseState *pstate, CommonTableExpr *cte);
-static void analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist);
 
 /* Dependency processing functions */
 static void makeDependencyGraph(CteState *cstate);
@@ -191,25 +188,6 @@ transformWithClause(ParseState *pstate, WithClause *withClause)
 		{
 			CommonTableExpr *cte = cstate.items[i].cte;
 
-			/*
-			 * If it's recursive, we have to do a throwaway parse analysis of
-			 * the non-recursive term in order to determine the set of output
-			 * columns for the recursive CTE.
-			 */
-			if (cte->cterecursive)
-			{
-				Node	   *nrt;
-				Query	   *nrq;
-
-				if (!cstate.items[i].non_recursive_term)
-					elog(ERROR, "could not find non-recursive term for %s",
-						 cte->ctename);
-				/* copy the term to be sure we don't modify original query */
-				nrt = copyObject(cstate.items[i].non_recursive_term);
-				nrq = parse_sub_analyze(nrt, pstate);
-				analyzeCTETargetList(pstate, cte, nrq->targetList);
-			}
-
 			analyzeCTE(pstate, cte);
 		}
 	}
@@ -251,7 +229,7 @@ analyzeCTE(ParseState *pstate, CommonTableExpr *cte)
 	/* Analysis not done already */
 	Assert(IsA(cte->ctequery, SelectStmt));
 
-	query = parse_sub_analyze(cte->ctequery, pstate);
+	query = parse_sub_analyze(cte->ctequery, pstate, cte);
 	cte->ctequery = (Node *) query;
 
 	/*
@@ -325,14 +303,26 @@ analyzeCTE(ParseState *pstate, CommonTableExpr *cte)
 
 /*
  * Compute derived fields of a CTE, given the transformed output targetlist
+ *
+ * For a nonrecursive CTE, this is called after transforming the CTE's query.
+ * For a recursive CTE, we call it after transforming the non-recursive term,
+ * and pass the targetlist emitted by the non-recursive term only.
+ *
+ * Note: in the recursive case, the passed pstate is actually the one being
+ * used to analyze the CTE's query, so it is one level lower down than in
+ * the nonrecursive case.  This doesn't matter since we only use it for
+ * error message context anyway.
  */
-static void
+void
 analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist)
 {
 	int			numaliases;
 	int			varattno;
 	ListCell   *tlistitem;
 
+	/* Not done already ... */
+	Assert(cte->ctecolnames == NIL);
+
 	/*
 	 * We need to determine column names and types.  The alias column names
 	 * override anything coming from the query itself.	(Note: the SQL spec
@@ -668,11 +658,6 @@ checkWellFormedRecursion(CteState *cstate)
 					 errmsg("FOR UPDATE/SHARE in a recursive query is not implemented"),
 					 parser_errposition(cstate->pstate,
 							   exprLocation((Node *) stmt->lockingClause))));
-
-		/*
-		 * Save non_recursive_term.
-		 */
-		cstate->items[i].non_recursive_term = (Node *) stmt->larg;
 	}
 }
 
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index a8adac1472e..39d66dcc12f 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.242 2009/07/16 06:33:43 petere Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.243 2009/09/09 03:32:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1251,7 +1251,7 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
 		return result;
 
 	pstate->p_hasSubLinks = true;
-	qtree = parse_sub_analyze(sublink->subselect, pstate);
+	qtree = parse_sub_analyze(sublink->subselect, pstate, NULL);
 
 	/*
 	 * Check that we got something reasonable.	Many of these conditions are
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h
index a662e37b7af..41282675d4c 100644
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/parser/analyze.h,v 1.40 2009/01/01 17:24:00 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/analyze.h,v 1.41 2009/09/09 03:32:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,7 +22,8 @@ extern Query *parse_analyze(Node *parseTree, const char *sourceText,
 extern Query *parse_analyze_varparams(Node *parseTree, const char *sourceText,
 						Oid **paramTypes, int *numParams);
 
-extern Query *parse_sub_analyze(Node *parseTree, ParseState *parentParseState);
+extern Query *parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
+								CommonTableExpr *parentCTE);
 extern Query *transformStmt(ParseState *pstate, Node *parseTree);
 
 extern bool analyze_requires_snapshot(Node *parseTree);
diff --git a/src/include/parser/parse_cte.h b/src/include/parser/parse_cte.h
index b5dd11ec140..7e4255610ec 100644
--- a/src/include/parser/parse_cte.h
+++ b/src/include/parser/parse_cte.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/parser/parse_cte.h,v 1.2 2009/01/01 17:24:00 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_cte.h,v 1.3 2009/09/09 03:32:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,4 +18,7 @@
 
 extern List *transformWithClause(ParseState *pstate, WithClause *withClause);
 
+extern void analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte,
+								 List *tlist);
+
 #endif   /* PARSE_CTE_H */
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 78b43d915a8..b653caeeb32 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/parser/parse_node.h,v 1.62 2009/06/11 14:49:11 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_node.h,v 1.63 2009/09/09 03:32:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -61,6 +61,9 @@
  * p_future_ctes: list of CommonTableExprs (WITH items) that are not yet
  * visible due to scope rules.	This is used to help improve error messages.
  *
+ * p_parent_cte: CommonTableExpr that immediately contains the current query,
+ * if any.
+ *
  * p_windowdefs: list of WindowDefs representing WINDOW and OVER clauses.
  * We collect these while transforming expressions and then transform them
  * afterwards (so that any resjunk tlist items needed for the sort/group
@@ -88,6 +91,7 @@ typedef struct ParseState
 	List	   *p_varnamespace; /* current namespace for columns */
 	List	   *p_ctenamespace; /* current namespace for common table exprs */
 	List	   *p_future_ctes;	/* common table exprs not yet in namespace */
+	CommonTableExpr *p_parent_cte;		/* this query's containing CTE */
 	List	   *p_windowdefs;	/* raw representations of window clauses */
 	Oid		   *p_paramtypes;	/* OIDs of types for $n parameter symbols */
 	int			p_numparams;	/* allocated size of p_paramtypes[] */
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 98003ebe6c6..a3e94e93d49 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -953,3 +953,76 @@ from int4_tbl;
  -2147483647
 (5 rows)
 
+--
+-- test for nested-recursive-WITH bug
+--
+WITH RECURSIVE t(j) AS (
+    WITH RECURSIVE s(i) AS (
+        VALUES (1)
+        UNION ALL
+        SELECT i+1 FROM s WHERE i < 10
+    )
+    SELECT i FROM s
+    UNION ALL
+    SELECT j+1 FROM t WHERE j < 10
+)
+SELECT * FROM t;
+ j  
+----
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+  5
+  6
+  7
+  8
+  9
+ 10
+  6
+  7
+  8
+  9
+ 10
+  7
+  8
+  9
+ 10
+  8
+  9
+ 10
+  9
+ 10
+ 10
+(55 rows)
+
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 6eaa168d964..2cbaa42492f 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -485,3 +485,18 @@ from int4_tbl;
 select ( with cte(foo) as ( values(f1) )
           values((select foo from cte)) )
 from int4_tbl;
+
+--
+-- test for nested-recursive-WITH bug
+--
+WITH RECURSIVE t(j) AS (
+    WITH RECURSIVE s(i) AS (
+        VALUES (1)
+        UNION ALL
+        SELECT i+1 FROM s WHERE i < 10
+    )
+    SELECT i FROM s
+    UNION ALL
+    SELECT j+1 FROM t WHERE j < 10
+)
+SELECT * FROM t;
-- 
GitLab