From 0d115dde82bf368ae0f9755113bd8fd53ac0b64b Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 7 Oct 2008 19:27:04 +0000
Subject: [PATCH] Extend CTE patch to support recursive UNION (ie, without
 ALL).  The implementation uses an in-memory hash table, so it will poop out
 for very large recursive results ... but the performance characteristics of a
 sort-based implementation would be pretty unpleasant too.

---
 doc/src/sgml/queries.sgml                 |  33 ++--
 doc/src/sgml/ref/select.sgml              |   8 +-
 src/backend/executor/nodeRecursiveunion.c | 174 +++++++++++++++++++---
 src/backend/nodes/copyfuncs.c             |   9 +-
 src/backend/nodes/outfuncs.c              |  15 +-
 src/backend/optimizer/plan/createplan.c   |  38 ++++-
 src/backend/optimizer/prep/prepunion.c    |  45 +++++-
 src/backend/parser/parse_cte.c            |  10 +-
 src/include/nodes/execnodes.h             |   8 +-
 src/include/nodes/plannodes.h             |   8 +-
 src/include/optimizer/planmain.h          |   5 +-
 src/test/regress/expected/with.out        |  48 ++++--
 src/test/regress/sql/with.sql             |  18 ++-
 13 files changed, 345 insertions(+), 74 deletions(-)

diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index b3d72ceb7f8..c2e3807920f 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.46 2008/10/04 21:56:52 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.47 2008/10/07 19:27:03 tgl Exp $ -->
 
 <chapter id="queries">
  <title>Queries</title>
@@ -1519,7 +1519,8 @@ SELECT sum(n) FROM t;
 </programlisting>
 
    The general form of a recursive <literal>WITH</> query is always a
-   <firstterm>non-recursive term</>, then <literal>UNION ALL</>, then a
+   <firstterm>non-recursive term</>, then <literal>UNION</> (or
+   <literal>UNION ALL</>), then a
    <firstterm>recursive term</>, where only the recursive term can contain
    a reference to the query's own output.  Such a query is executed as
    follows:
@@ -1530,9 +1531,10 @@ SELECT sum(n) FROM t;
 
    <step performance="required">
     <para>
-     Evaluate the non-recursive term.  Include all its output rows in the
-     result of the recursive query, and also place them in a temporary
-     <firstterm>working table</>.
+     Evaluate the non-recursive term.  For <literal>UNION</> (but not
+     <literal>UNION ALL</>), discard duplicate rows.  Include all remaining
+     rows in the result of the recursive query, and also place them in a
+     temporary <firstterm>working table</>.
     </para>
    </step>
 
@@ -1544,9 +1546,11 @@ SELECT sum(n) FROM t;
      <step performance="required">
       <para>
        Evaluate the recursive term, substituting the current contents of
-       the working table for the recursive self-reference.  Include all its
-       output rows in the result of the recursive query, and also place them
-       in a temporary <firstterm>intermediate table</>.
+       the working table for the recursive self-reference.
+       For <literal>UNION</> (but not <literal>UNION ALL</>), discard
+       duplicate rows and rows that duplicate any previous result row.
+       Include all remaining rows in the result of the recursive query, and
+       also place them in a temporary <firstterm>intermediate table</>.
       </para>
      </step>
 
@@ -1598,10 +1602,13 @@ GROUP BY sub_part
   <para>
    When working with recursive queries it is important to be sure that
    the recursive part of the query will eventually return no tuples,
-   or else the query will loop indefinitely.  A useful trick for
-   development purposes is to place a <literal>LIMIT</> in the parent
-   query.  For example, this query would loop forever without the
-   <literal>LIMIT</>:
+   or else the query will loop indefinitely.  Sometimes, using
+   <literal>UNION</> instead of <literal>UNION ALL</> can accomplish this
+   by discarding rows that duplicate previous output rows; this catches
+   cycles that would otherwise repeat.  A useful trick for testing queries
+   when you are not certain if they might loop is to place a <literal>LIMIT</>
+   in the parent query.  For example, this query would loop forever without
+   the <literal>LIMIT</>:
 
 <programlisting>
 WITH RECURSIVE t(n) AS (
@@ -1614,7 +1621,7 @@ SELECT n FROM t LIMIT 100;
 
    This works because <productname>PostgreSQL</productname>'s implementation
    evaluates only as many rows of a <literal>WITH</> query as are actually
-   demanded by the parent query.  Using this trick in production is not
+   fetched by the parent query.  Using this trick in production is not
    recommended, because other systems might work differently.
   </para>
 
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index e72d9c126f6..f73ca6ed64a 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.105 2008/10/04 21:56:52 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.106 2008/10/07 19:27:04 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -202,10 +202,10 @@ and <replaceable class="parameter">with_query</replaceable> is:
     subquery to reference itself by name.  Such a subquery must have
     the form
 <synopsis>
-<replaceable class="parameter">non_recursive_term</replaceable> UNION ALL <replaceable class="parameter">recursive_term</replaceable>
+<replaceable class="parameter">non_recursive_term</replaceable> UNION [ ALL ] <replaceable class="parameter">recursive_term</replaceable>
 </synopsis>
     where the recursive self-reference must appear on the right-hand
-    side of <literal>UNION ALL</>.  Only one recursive self-reference
+    side of the <literal>UNION</>.  Only one recursive self-reference
     is permitted per query.
    </para>
 
@@ -1234,7 +1234,7 @@ SELECT distance, employee_name FROM employee_recursive;
 </programlisting>
 
    Notice the typical form of recursive queries:
-   an initial condition, followed by <literal>UNION ALL</literal>,
+   an initial condition, followed by <literal>UNION</literal>,
    followed by the recursive part of the query. Be sure that the
    recursive part of the query will eventually return no tuples, or
    else the query will loop indefinitely.  (See <xref linkend="queries-with">
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 7136a623015..81ac703514f 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeRecursiveunion.c,v 1.1 2008/10/04 21:56:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeRecursiveunion.c,v 1.2 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,41 @@
 #include "executor/execdebug.h"
 #include "executor/nodeRecursiveunion.h"
 #include "miscadmin.h"
+#include "utils/memutils.h"
+
+
+/*
+ * To implement UNION (without ALL), we need a hashtable that stores tuples
+ * already seen.  The hash key is computed from the grouping columns.
+ */
+typedef struct RUHashEntryData *RUHashEntry;
+
+typedef struct RUHashEntryData
+{
+	TupleHashEntryData shared;	/* common header for hash table entries */
+} RUHashEntryData;
+
+
+/*
+ * Initialize the hash table to empty.
+ */
+static void
+build_hash_table(RecursiveUnionState *rustate)
+{
+	RecursiveUnion	   *node = (RecursiveUnion *) rustate->ps.plan;
+
+	Assert(node->numCols > 0);
+	Assert(node->numGroups > 0);
+
+	rustate->hashtable = BuildTupleHashTable(node->numCols,
+											 node->dupColIdx,
+											 rustate->eqfunctions,
+											 rustate->hashfunctions,
+											 node->numGroups,
+											 sizeof(RUHashEntryData),
+											 rustate->tableContext,
+											 rustate->tempContext);
+}
 
 
 /* ----------------------------------------------------------------
@@ -44,49 +79,85 @@ ExecRecursiveUnion(RecursiveUnionState *node)
 	PlanState  *innerPlan = innerPlanState(node);
 	RecursiveUnion *plan = (RecursiveUnion *) node->ps.plan;
 	TupleTableSlot *slot;
+	RUHashEntry entry;
+	bool		isnew;
 
 	/* 1. Evaluate non-recursive term */
 	if (!node->recursing)
 	{
-		slot = ExecProcNode(outerPlan);
-		if (!TupIsNull(slot))
+		for (;;)
 		{
+			slot = ExecProcNode(outerPlan);
+			if (TupIsNull(slot))
+				break;
+			if (plan->numCols > 0)
+			{
+				/* Find or build hashtable entry for this tuple's group */
+				entry = (RUHashEntry)
+					LookupTupleHashEntry(node->hashtable, slot, &isnew);
+				/* Must reset temp context after each hashtable lookup */
+				MemoryContextReset(node->tempContext);
+				/* Ignore tuple if already seen */
+				if (!isnew)
+					continue;
+			}
+			/* Each non-duplicate tuple goes to the working table ... */
 			tuplestore_puttupleslot(node->working_table, slot);
+			/* ... and to the caller */
 			return slot;
 		}
 		node->recursing = true;
 	}
 
-retry:
 	/* 2. Execute recursive term */
-	slot = ExecProcNode(innerPlan);
-	if (TupIsNull(slot))
+	for (;;)
 	{
-		if (node->intermediate_empty)
-			return NULL;
+		slot = ExecProcNode(innerPlan);
+		if (TupIsNull(slot))
+		{
+			/* Done if there's nothing in the intermediate table */
+			if (node->intermediate_empty)
+				break;
 
-		/* done with old working table ... */
-		tuplestore_end(node->working_table);
+			/* done with old working table ... */
+			tuplestore_end(node->working_table);
 
-		/* intermediate table becomes working table */
-		node->working_table = node->intermediate_table;
+			/* intermediate table becomes working table */
+			node->working_table = node->intermediate_table;
 
-		/* create new empty intermediate table */
-		node->intermediate_table = tuplestore_begin_heap(false, false, work_mem);
-		node->intermediate_empty = true;
+			/* create new empty intermediate table */
+			node->intermediate_table = tuplestore_begin_heap(false, false,
+															 work_mem);
+			node->intermediate_empty = true;
 
-		/* and reset the inner plan */
-		innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
-											 plan->wtParam);
-		goto retry;
-	}
- 	else
- 	{
+			/* reset the recursive term */
+			innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
+												 plan->wtParam);
+
+			/* and continue fetching from recursive term */
+			continue;
+		}
+
+		if (plan->numCols > 0)
+		{
+			/* Find or build hashtable entry for this tuple's group */
+			entry = (RUHashEntry)
+				LookupTupleHashEntry(node->hashtable, slot, &isnew);
+			/* Must reset temp context after each hashtable lookup */
+			MemoryContextReset(node->tempContext);
+			/* Ignore tuple if already seen */
+			if (!isnew)
+				continue;
+		}
+
+		/* Else, tuple is good; stash it in intermediate table ... */
  		node->intermediate_empty = false;
  		tuplestore_puttupleslot(node->intermediate_table, slot);
- 	}
+		/* ... and return it */
+		return slot;
+	}
 
-	return slot;
+	return NULL;
 }
 
 /* ----------------------------------------------------------------
@@ -109,12 +180,40 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
 	rustate->ps.plan = (Plan *) node;
 	rustate->ps.state = estate;
 
+	rustate->eqfunctions = NULL;
+	rustate->hashfunctions = NULL;
+	rustate->hashtable = NULL;
+	rustate->tempContext = NULL;
+	rustate->tableContext = NULL;
+
 	/* initialize processing state */
 	rustate->recursing = false;
 	rustate->intermediate_empty = true;
 	rustate->working_table = tuplestore_begin_heap(false, false, work_mem);
 	rustate->intermediate_table = tuplestore_begin_heap(false, false, work_mem);
 
+	/*
+	 * If hashing, we need a per-tuple memory context for comparisons, and a
+	 * longer-lived context to store the hash table.  The table can't just be
+	 * kept in the per-query context because we want to be able to throw it
+	 * away when rescanning.
+	 */
+	if (node->numCols > 0)
+	{
+		rustate->tempContext =
+			AllocSetContextCreate(CurrentMemoryContext,
+								  "RecursiveUnion",
+								  ALLOCSET_DEFAULT_MINSIZE,
+								  ALLOCSET_DEFAULT_INITSIZE,
+								  ALLOCSET_DEFAULT_MAXSIZE);
+		rustate->tableContext =
+			AllocSetContextCreate(CurrentMemoryContext,
+								  "RecursiveUnion hash table",
+								  ALLOCSET_DEFAULT_MINSIZE,
+								  ALLOCSET_DEFAULT_INITSIZE,
+								  ALLOCSET_DEFAULT_MAXSIZE);
+	}
+
 	/*
 	 * Make the state structure available to descendant WorkTableScan nodes
 	 * via the Param slot reserved for it.
@@ -154,6 +253,19 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
 	outerPlanState(rustate) = ExecInitNode(outerPlan(node), estate, eflags);
 	innerPlanState(rustate) = ExecInitNode(innerPlan(node), estate, eflags);
 
+	/*
+	 * If hashing, precompute fmgr lookup data for inner loop, and create
+	 * the hash table.
+	 */
+	if (node->numCols > 0)
+	{
+		execTuplesHashPrepare(node->numCols,
+							  node->dupOperators,
+							  &rustate->eqfunctions,
+							  &rustate->hashfunctions);
+		build_hash_table(rustate);
+	}
+
 	return rustate;
 }
 
@@ -178,6 +290,12 @@ ExecEndRecursiveUnion(RecursiveUnionState *node)
 	tuplestore_end(node->working_table);
 	tuplestore_end(node->intermediate_table);
 
+	/* free subsidiary stuff including hashtable */
+	if (node->tempContext)
+		MemoryContextDelete(node->tempContext);
+	if (node->tableContext)
+		MemoryContextDelete(node->tableContext);
+
 	/*
 	 * clean out the upper tuple table
 	 */
@@ -217,6 +335,14 @@ ExecRecursiveUnionReScan(RecursiveUnionState *node, ExprContext *exprCtxt)
 	if (outerPlan->chgParam == NULL)
 		ExecReScan(outerPlan, exprCtxt);
 
+	/* Release any hashtable storage */
+	if (node->tableContext)
+		MemoryContextResetAndDeleteChildren(node->tableContext);
+
+	/* And rebuild empty hashtable if needed */
+	if (plan->numCols > 0)
+		build_hash_table(node);
+
 	/* reset processing state */
 	node->recursing = false;
 	node->intermediate_empty = true;
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 1e7eb605f5f..f0a49155024 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.407 2008/10/06 17:39:25 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.408 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -193,6 +193,13 @@ _copyRecursiveUnion(RecursiveUnion *from)
 	 * copy remainder of node
 	 */
 	COPY_SCALAR_FIELD(wtParam);
+	COPY_SCALAR_FIELD(numCols);
+	if (from->numCols > 0)
+	{
+		COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
+		COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+	}
+	COPY_SCALAR_FIELD(numGroups);
 
 	return newnode;
 }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index b25ef4b577a..75a04ce2fa1 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.341 2008/10/06 17:39:26 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.342 2008/10/07 19:27:04 tgl Exp $
  *
  * NOTES
  *	  Every node type that can appear in stored rules' parsetrees *must*
@@ -334,11 +334,24 @@ _outAppend(StringInfo str, Append *node)
 static void
 _outRecursiveUnion(StringInfo str, RecursiveUnion *node)
 {
+	int i;
+
 	WRITE_NODE_TYPE("RECURSIVEUNION");
 
 	_outPlanInfo(str, (Plan *) node);
 
 	WRITE_INT_FIELD(wtParam);
+	WRITE_INT_FIELD(numCols);
+
+	appendStringInfo(str, " :dupColIdx");
+	for (i = 0; i < node->numCols; i++)
+		appendStringInfo(str, " %d", node->dupColIdx[i]);
+
+	appendStringInfo(str, " :dupOperators");
+	for (i = 0; i < node->numCols; i++)
+		appendStringInfo(str, " %u", node->dupOperators[i]);
+
+	WRITE_LONG_FIELD(numGroups);
 }
 
 static void
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index aabbf64a755..f812c939794 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.249 2008/10/04 21:56:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.250 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2545,10 +2545,13 @@ RecursiveUnion *
 make_recursive_union(List *tlist,
 					 Plan *lefttree,
 					 Plan *righttree,
-					 int wtParam)
+					 int wtParam,
+					 List *distinctList,
+					 long numGroups)
 {
 	RecursiveUnion *node = makeNode(RecursiveUnion);
 	Plan	   *plan = &node->plan;
+	int			numCols = list_length(distinctList);
 
 	cost_recursive_union(plan, lefttree, righttree);
 
@@ -2558,6 +2561,37 @@ make_recursive_union(List *tlist,
 	plan->righttree = righttree;
 	node->wtParam = wtParam;
 
+	/*
+	 * convert SortGroupClause list into arrays of attr indexes and equality
+	 * operators, as wanted by executor
+	 */
+	node->numCols = numCols;
+	if (numCols > 0)
+	{
+		int			keyno = 0;
+		AttrNumber *dupColIdx;
+		Oid		   *dupOperators;
+		ListCell   *slitem;
+
+		dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+		dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+
+		foreach(slitem, distinctList)
+		{
+			SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
+			TargetEntry *tle = get_sortgroupclause_tle(sortcl,
+													   plan->targetlist);
+
+			dupColIdx[keyno] = tle->resno;
+			dupOperators[keyno] = sortcl->eqop;
+			Assert(OidIsValid(dupOperators[keyno]));
+			keyno++;
+		}
+		node->dupColIdx = dupColIdx;
+		node->dupOperators = dupOperators;
+	}
+	node->numGroups = numGroups;
+
 	return node;
 }
 
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index e8b78509475..2e22d09b4ad 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.157 2008/10/06 17:39:26 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.158 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -318,10 +318,12 @@ generate_recursion_plan(SetOperationStmt *setOp, PlannerInfo *root,
 	Plan	   *lplan;
 	Plan	   *rplan;
 	List	   *tlist;
+	List	   *groupList;
+	long		numGroups;
 
 	/* Parser should have rejected other cases */
-	if (setOp->op != SETOP_UNION || !setOp->all)
-		elog(ERROR, "only UNION ALL queries can be recursive");
+	if (setOp->op != SETOP_UNION)
+		elog(ERROR, "only UNION queries can be recursive");
 	/* Worktable ID should be assigned */
 	Assert(root->wt_param_id >= 0);
 
@@ -346,13 +348,46 @@ generate_recursion_plan(SetOperationStmt *setOp, PlannerInfo *root,
 								  list_make2(lplan, rplan),
 								  refnames_tlist);
 
+	/*
+	 * If UNION, identify the grouping operators
+	 */
+	if (setOp->all)
+	{
+		groupList = NIL;
+		numGroups = 0;
+	}
+	else
+	{
+		double	dNumGroups;
+
+		/* Identify the grouping semantics */
+		groupList = generate_setop_grouplist(setOp, tlist);
+
+		/* We only support hashing here */
+		if (!grouping_is_hashable(groupList))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("could not implement recursive UNION"),
+					 errdetail("All column datatypes must be hashable.")));
+
+		/*
+		 * For the moment, take the number of distinct groups as equal to
+		 * the total input size, ie, the worst case.
+		 */
+		dNumGroups = lplan->plan_rows + rplan->plan_rows * 10;
+
+		/* Also convert to long int --- but 'ware overflow! */
+		numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
+	}
+
 	/*
 	 * And make the plan node.
 	 */
 	plan = (Plan *) make_recursive_union(tlist, lplan, rplan,
-										 root->wt_param_id);
+										 root->wt_param_id,
+										 groupList, numGroups);
 
-	*sortClauses = NIL;			/* result of UNION ALL is always unsorted */
+	*sortClauses = NIL;			/* RecursiveUnion result is always unsorted */
 
 	return plan;
 }
diff --git a/src/backend/parser/parse_cte.c b/src/backend/parser/parse_cte.c
index 29111acb968..3971dac23f7 100644
--- a/src/backend/parser/parse_cte.c
+++ b/src/backend/parser/parse_cte.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_cte.c,v 2.2 2008/10/05 22:50:55 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_cte.c,v 2.3 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -601,11 +601,11 @@ checkWellFormedRecursion(CteState *cstate)
 		if (!cte->cterecursive)
 			continue;
 
-		/* Must have top-level UNION ALL */
-		if (stmt->op != SETOP_UNION || !stmt->all)
+		/* Must have top-level UNION */
+		if (stmt->op != SETOP_UNION)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_RECURSION),
-					 errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION ALL recursive-term",
+					 errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION [ALL] recursive-term",
 							cte->ctename),
 					 parser_errposition(cstate->pstate, cte->location)));
 
@@ -628,7 +628,7 @@ checkWellFormedRecursion(CteState *cstate)
 			elog(ERROR, "missing recursive reference");
 
 		/*
-		 * Disallow ORDER BY and similar decoration atop the UNION ALL.
+		 * Disallow ORDER BY and similar decoration atop the UNION.
 		 * These don't make sense because it's impossible to figure out what
 		 * they mean when we have only part of the recursive query's results.
 		 * (If we did allow them, we'd have to check for recursive references
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 02c9c8f0566..c10204a9100 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.189 2008/10/04 21:56:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.190 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -964,6 +964,12 @@ typedef struct RecursiveUnionState
 	bool		intermediate_empty;
 	Tuplestorestate *working_table;
 	Tuplestorestate *intermediate_table;
+	/* Remaining fields are unused in UNION ALL case */
+	FmgrInfo   *eqfunctions;	/* per-grouping-field equality fns */
+	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
+	MemoryContext tempContext;	/* short-term context for comparisons */
+	TupleHashTable hashtable;	/* hash table for tuples already seen */
+	MemoryContext tableContext;	/* memory context containing hash table */
 } RecursiveUnionState;
 
 /* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index cdf06b00207..16c25fd6d0e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.104 2008/10/04 21:56:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.105 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -194,6 +194,12 @@ typedef struct RecursiveUnion
 {
 	Plan		plan;
 	int			wtParam;		/* ID of Param representing work table */
+	/* Remaining fields are zero/null in UNION ALL case */
+	int			numCols;		/* number of columns to check for
+								 * duplicate-ness */
+	AttrNumber *dupColIdx;		/* their indexes in the target list */
+	Oid		   *dupOperators;	/* equality operators to compare with */
+	long		numGroups;		/* estimated number of groups in input */
 } RecursiveUnion;
 
 /* ----------------
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 1e8017fb6d4..641ebe42fb4 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.113 2008/10/04 21:56:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.114 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,7 +43,8 @@ extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
 				  Index scanrelid, Plan *subplan, List *subrtable);
 extern Append *make_append(List *appendplans, bool isTarget, List *tlist);
 extern RecursiveUnion *make_recursive_union(List *tlist,
-			   Plan *lefttree, Plan *righttree, int wtParam);
+			   Plan *lefttree, Plan *righttree, int wtParam,
+			   List *distinctList, long numGroups);
 extern Sort *make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree,
 						List *pathkeys, double limit_tuples);
 extern Sort *make_sort_from_sortclauses(PlannerInfo *root, List *sortcls,
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 5b45ac89dfc..4760aa9d9fd 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -49,6 +49,18 @@ SELECT * FROM t;
  5
 (5 rows)
 
+-- This is an infinite loop with UNION ALL, but not with UNION
+WITH RECURSIVE t(n) AS (
+    SELECT 1
+UNION
+    SELECT 10-n FROM t)
+SELECT * FROM t;
+ n 
+---
+ 1
+ 9
+(2 rows)
+
 -- This'd be an infinite loop, but outside query reads only as much as needed
 WITH RECURSIVE t(n) AS (
     VALUES (1)
@@ -69,6 +81,26 @@ SELECT * FROM t LIMIT 10;
  10
 (10 rows)
 
+-- UNION case should have same property
+WITH RECURSIVE t(n) AS (
+    SELECT 1
+UNION
+    SELECT n+1 FROM t)
+SELECT * FROM t LIMIT 10;
+ n  
+----
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+(10 rows)
+
 -- Test behavior with an unknown-type literal in the WITH
 WITH q AS (SELECT 'foo' AS x)
 SELECT x, x IS OF (unknown) as is_unknown FROM q;
@@ -510,38 +542,32 @@ WITH RECURSIVE
 --
 -- error cases
 --
--- UNION (should be supported someday)
-WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
-	SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
-LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
-                       ^
 -- INTERSECT
 WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x)
 	SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x...
                        ^
 WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT ALL SELECT n+1 FROM x)
 	SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT ALL SELECT n+1 FR...
                        ^
 -- EXCEPT
 WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT SELECT n+1 FROM x)
 	SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT SELECT n+1 FROM x)
                        ^
 WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT ALL SELECT n+1 FROM x)
 	SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT ALL SELECT n+1 FROM ...
                        ^
 -- no non-recursive term
 WITH RECURSIVE x(n) AS (SELECT n FROM x)
 	SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT n FROM x)
                        ^
 -- recursive term in the left hand side (strictly speaking, should allow this)
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index c3ff5e285a7..60c545d0676 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -31,6 +31,13 @@ UNION ALL
 )
 SELECT * FROM t;
 
+-- This is an infinite loop with UNION ALL, but not with UNION
+WITH RECURSIVE t(n) AS (
+    SELECT 1
+UNION
+    SELECT 10-n FROM t)
+SELECT * FROM t;
+
 -- This'd be an infinite loop, but outside query reads only as much as needed
 WITH RECURSIVE t(n) AS (
     VALUES (1)
@@ -38,6 +45,13 @@ UNION ALL
     SELECT n+1 FROM t)
 SELECT * FROM t LIMIT 10;
 
+-- UNION case should have same property
+WITH RECURSIVE t(n) AS (
+    SELECT 1
+UNION
+    SELECT n+1 FROM t)
+SELECT * FROM t LIMIT 10;
+
 -- Test behavior with an unknown-type literal in the WITH
 WITH q AS (SELECT 'foo' AS x)
 SELECT x, x IS OF (unknown) as is_unknown FROM q;
@@ -265,10 +279,6 @@ WITH RECURSIVE
 -- error cases
 --
 
--- UNION (should be supported someday)
-WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
-	SELECT * FROM x;
-
 -- INTERSECT
 WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x)
 	SELECT * FROM x;
-- 
GitLab