From bdfbfde1b168b3332c4cdac34ac86a80aaf4d442 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 20 Jan 2003 18:55:07 +0000
Subject: [PATCH] IN clauses appearing at top level of WHERE can now be handled
 as joins. There are two implementation techniques: the executor understands a
 new JOIN_IN jointype, which emits at most one matching row per left-hand row,
 or the result of the IN's sub-select can be fed through a DISTINCT filter and
 then joined as an ordinary relation. Along the way, some minor code cleanup
 in the optimizer; notably, break out most of the jointree-rearrangement
 preprocessing in planner.c and put it in a new file prep/prepjointree.c.

---
 doc/src/sgml/release.sgml                 |   3 +-
 src/backend/executor/nodeHashjoin.c       |  12 +-
 src/backend/executor/nodeMergejoin.c      |  16 +-
 src/backend/executor/nodeNestloop.c       |  12 +-
 src/backend/nodes/copyfuncs.c             |  26 +-
 src/backend/nodes/equalfuncs.c            |  21 +-
 src/backend/nodes/list.c                  |  10 +-
 src/backend/nodes/outfuncs.c              |  30 +-
 src/backend/optimizer/README              |   1 +
 src/backend/optimizer/geqo/geqo_eval.c    |  15 +-
 src/backend/optimizer/geqo/geqo_main.c    |  23 +-
 src/backend/optimizer/path/allpaths.c     |   6 +-
 src/backend/optimizer/path/costsize.c     |  46 +-
 src/backend/optimizer/path/indxpath.c     |   6 +-
 src/backend/optimizer/path/joinpath.c     | 287 ++++-----
 src/backend/optimizer/path/joinrels.c     | 151 ++++-
 src/backend/optimizer/plan/createplan.c   | 145 ++++-
 src/backend/optimizer/plan/initsplan.c    |  13 +-
 src/backend/optimizer/plan/planmain.c     |   4 +-
 src/backend/optimizer/plan/planner.c      | 529 ++---------------
 src/backend/optimizer/plan/setrefs.c      | 121 +++-
 src/backend/optimizer/plan/subselect.c    | 147 ++++-
 src/backend/optimizer/prep/Makefile       |   4 +-
 src/backend/optimizer/prep/prepjointree.c | 680 ++++++++++++++++++++++
 src/backend/optimizer/prep/prepunion.c    |  70 ++-
 src/backend/optimizer/util/clauses.c      |  24 +-
 src/backend/optimizer/util/joininfo.c     |  35 +-
 src/backend/optimizer/util/pathnode.c     | 110 +++-
 src/backend/optimizer/util/relnode.c      |  18 +-
 src/backend/optimizer/util/tlist.c        |  24 +-
 src/backend/optimizer/util/var.c          | 153 ++++-
 src/backend/rewrite/rewriteManip.c        |  60 +-
 src/backend/utils/adt/selfuncs.c          |  15 +-
 src/include/nodes/nodes.h                 |  19 +-
 src/include/nodes/parsenodes.h            |   3 +-
 src/include/nodes/pg_list.h               |   5 +-
 src/include/nodes/relation.h              |  43 +-
 src/include/optimizer/joininfo.h          |   3 +-
 src/include/optimizer/pathnode.h          |   5 +-
 src/include/optimizer/planmain.h          |   4 +-
 src/include/optimizer/planner.h           |   5 +-
 src/include/optimizer/prep.h              |  12 +-
 src/include/optimizer/subselect.h         |   8 +-
 src/include/optimizer/tlist.h             |   4 +-
 src/include/optimizer/var.h               |   8 +-
 src/include/utils/selfuncs.h              |   4 +-
 src/test/regress/expected/subselect.out   |  12 +-
 47 files changed, 2076 insertions(+), 876 deletions(-)
 create mode 100644 src/backend/optimizer/prep/prepjointree.c

diff --git a/doc/src/sgml/release.sgml b/doc/src/sgml/release.sgml
index 77fed8d8b02..2911e1828f9 100644
--- a/doc/src/sgml/release.sgml
+++ b/doc/src/sgml/release.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.178 2003/01/11 21:02:49 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.179 2003/01/20 18:54:44 tgl Exp $
 -->
 
 <appendix id="release">
@@ -24,6 +24,7 @@ CDATA means the content is "SGML-free", so you can write without
 worries about funny characters.
 -->
 <literallayout><![CDATA[
+Performance of "foo IN (SELECT ...)" queries has been considerably improved
 FETCH 0 now re-fetches cursor's current row, per SQL spec
 Revised executor state representation; plan trees are read-only to executor now
 Information schema
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 48cf30c21f4..d452d3865f5 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.46 2002/12/30 15:21:20 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.47 2003/01/20 18:54:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -95,6 +95,15 @@ ExecHashJoin(HashJoinState *node)
 		node->js.ps.ps_TupFromTlist = false;
 	}
 
+	/*
+	 * If we're doing an IN join, we want to return at most one row per
+	 * outer tuple; so we can stop scanning the inner scan if we matched on
+	 * the previous try.
+	 */
+	if (node->js.jointype == JOIN_IN && 
+		node->hj_MatchedOuter)
+		node->hj_NeedNewOuter = true;
+
 	/*
 	 * Reset per-tuple memory context to free any expression evaluation
 	 * storage allocated in the previous tuple cycle.  Note this can't
@@ -353,6 +362,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
 	switch (node->join.jointype)
 	{
 		case JOIN_INNER:
+		case JOIN_IN:
 			break;
 		case JOIN_LEFT:
 			hjstate->hj_NullInnerTupleSlot =
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index af6cd8d6f3f..d5dc7f421aa 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.55 2002/12/15 16:17:46 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.56 2003/01/20 18:54:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -381,6 +381,7 @@ ExecMergeJoin(MergeJoinState *node)
 	switch (node->js.jointype)
 	{
 		case JOIN_INNER:
+		case JOIN_IN:
 			doFillOuter = false;
 			doFillInner = false;
 			break;
@@ -581,9 +582,15 @@ ExecMergeJoin(MergeJoinState *node)
 				 * the econtext's tuple pointers were set up before
 				 * checking the merge qual, so we needn't do it again.
 				 */
-				qualResult = (joinqual == NIL ||
-							  ExecQual(joinqual, econtext, false));
-				MJ_DEBUG_QUAL(joinqual, qualResult);
+				if (node->js.jointype == JOIN_IN &&
+					node->mj_MatchedOuter)
+					qualResult = false;
+				else
+				{
+					qualResult = (joinqual == NIL ||
+								  ExecQual(joinqual, econtext, false));
+					MJ_DEBUG_QUAL(joinqual, qualResult);
+				}
 
 				if (qualResult)
 				{
@@ -1452,6 +1459,7 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate)
 	switch (node->join.jointype)
 	{
 		case JOIN_INNER:
+		case JOIN_IN:
 			break;
 		case JOIN_LEFT:
 			mergestate->mj_NullInnerTupleSlot =
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index 917a7011cbf..1bae9805898 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v 1.29 2002/12/15 16:17:46 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v 1.30 2003/01/20 18:54:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -101,6 +101,15 @@ ExecNestLoop(NestLoopState *node)
 		node->js.ps.ps_TupFromTlist = false;
 	}
 
+	/*
+	 * If we're doing an IN join, we want to return at most one row per
+	 * outer tuple; so we can stop scanning the inner scan if we matched on
+	 * the previous try.
+	 */
+	if (node->js.jointype == JOIN_IN &&
+		node->nl_MatchedOuter)
+		node->nl_NeedNewOuter = true;
+
 	/*
 	 * Reset per-tuple memory context to free any expression evaluation
 	 * storage allocated in the previous tuple cycle.  Note this can't
@@ -312,6 +321,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate)
 	switch (node->join.jointype)
 	{
 		case JOIN_INNER:
+		case JOIN_IN:
 			break;
 		case JOIN_LEFT:
 			nlstate->nl_NullInnerTupleSlot =
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 8663c6c4a14..f8e81431ec0 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.236 2003/01/15 19:35:35 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.237 2003/01/20 18:54:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1095,6 +1095,21 @@ _copyJoinInfo(JoinInfo *from)
 	return newnode;
 }
 
+/*
+ * _copyInClauseInfo
+ */
+static InClauseInfo *
+_copyInClauseInfo(InClauseInfo *from)
+{
+	InClauseInfo *newnode = makeNode(InClauseInfo);
+
+	COPY_INTLIST_FIELD(lefthand);
+	COPY_INTLIST_FIELD(righthand);
+	COPY_NODE_FIELD(sub_targetlist);
+
+	return newnode;
+}
+
 /* ****************************************************************
  *					parsenodes.h copy functions
  * ****************************************************************
@@ -1424,9 +1439,9 @@ _copyQuery(Query *from)
 
 	/*
 	 * We do not copy the planner internal fields: base_rel_list,
-	 * other_rel_list, join_rel_list, equi_key_list, query_pathkeys,
-	 * hasJoinRTEs.  That would get us into copying RelOptInfo/Path
-	 * trees, which we don't want to do.
+	 * other_rel_list, join_rel_list, equi_key_list, in_info_list,
+	 * query_pathkeys, hasJoinRTEs.  That would get us into copying
+	 * RelOptInfo/Path trees, which we don't want to do.
 	 */
 
 	return newnode;
@@ -2490,6 +2505,9 @@ copyObject(void *from)
 		case T_JoinInfo:
 			retval = _copyJoinInfo(from);
 			break;
+		case T_InClauseInfo:
+			retval = _copyInClauseInfo(from);
+			break;
 
 			/*
 			 * VALUE NODES
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index a4e9e1092d8..5d3e194e3c2 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.180 2003/01/15 19:35:37 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.181 2003/01/20 18:54:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -486,6 +486,16 @@ _equalJoinInfo(JoinInfo *a, JoinInfo *b)
 	return true;
 }
 
+static bool
+_equalInClauseInfo(InClauseInfo *a, InClauseInfo *b)
+{
+	COMPARE_INTLIST_FIELD(lefthand);
+	COMPARE_INTLIST_FIELD(righthand);
+	COMPARE_NODE_FIELD(sub_targetlist);
+
+	return true;
+}
+
 
 /*
  * Stuff from parsenodes.h
@@ -518,9 +528,9 @@ _equalQuery(Query *a, Query *b)
 
 	/*
 	 * We do not check the internal-to-the-planner fields: base_rel_list,
-	 * other_rel_list, join_rel_list, equi_key_list, query_pathkeys,
-	 * hasJoinRTEs.  They might not be set yet, and in any case they should
-	 * be derivable from the other fields.
+	 * other_rel_list, join_rel_list, equi_key_list, in_info_list,
+	 * query_pathkeys, hasJoinRTEs.  They might not be set yet, and in any
+	 * case they should be derivable from the other fields.
 	 */
 	return true;
 }
@@ -1618,6 +1628,9 @@ equal(void *a, void *b)
 		case T_JoinInfo:
 			retval = _equalJoinInfo(a, b);
 			break;
+		case T_InClauseInfo:
+			retval = _equalInClauseInfo(a, b);
+			break;
 
 			/*
 			 * LIST NODES
diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index b3c6a18496f..e896b479018 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/list.c,v 1.43 2002/12/17 01:18:18 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/list.c,v 1.44 2003/01/20 18:54:47 tgl Exp $
  *
  * NOTES
  *	  XXX a few of the following functions are duplicated to handle
@@ -638,10 +638,10 @@ lreverse(List *l)
 }
 
 /*
- * Return t if two integer lists have no members in common.
+ * Return t if two integer lists have any members in common.
  */
 bool
-nonoverlap_setsi(List *list1, List *list2)
+overlap_setsi(List *list1, List *list2)
 {
 	List	   *x;
 
@@ -650,9 +650,9 @@ nonoverlap_setsi(List *list1, List *list2)
 		int			e = lfirsti(x);
 
 		if (intMember(e, list2))
-			return false;
+			return true;
 	}
-	return true;
+	return false;
 }
 
 /*
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index e72b52570e5..fd18c957d9b 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.193 2003/01/15 19:35:39 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.194 2003/01/20 18:54:47 tgl Exp $
  *
  * NOTES
  *	  Every node type that can appear in stored rules' parsetrees *must*
@@ -905,6 +905,18 @@ _outMaterialPath(StringInfo str, MaterialPath *node)
 	WRITE_NODE_FIELD(subpath);
 }
 
+static void
+_outUniquePath(StringInfo str, UniquePath *node)
+{
+	WRITE_NODE_TYPE("UNIQUEPATH");
+
+	_outPathInfo(str, (Path *) node);
+
+	WRITE_NODE_FIELD(subpath);
+	WRITE_BOOL_FIELD(use_hash);
+	WRITE_FLOAT_FIELD(rows, "%.0f");
+}
+
 static void
 _outNestPath(StringInfo str, NestPath *node)
 {
@@ -969,6 +981,16 @@ _outJoinInfo(StringInfo str, JoinInfo *node)
 	WRITE_NODE_FIELD(jinfo_restrictinfo);
 }
 
+static void
+_outInClauseInfo(StringInfo str, InClauseInfo *node)
+{
+	WRITE_NODE_TYPE("INCLAUSEINFO");
+
+	WRITE_INTLIST_FIELD(lefthand);
+	WRITE_INTLIST_FIELD(righthand);
+	WRITE_NODE_FIELD(sub_targetlist);
+}
+
 /*****************************************************************************
  *
  *	Stuff from parsenodes.h.
@@ -1563,6 +1585,9 @@ _outNode(StringInfo str, void *obj)
 			case T_MaterialPath:
 				_outMaterialPath(str, obj);
 				break;
+			case T_UniquePath:
+				_outUniquePath(str, obj);
+				break;
 			case T_NestPath:
 				_outNestPath(str, obj);
 				break;
@@ -1581,6 +1606,9 @@ _outNode(StringInfo str, void *obj)
 			case T_JoinInfo:
 				_outJoinInfo(str, obj);
 				break;
+			case T_InClauseInfo:
+				_outInClauseInfo(str, obj);
+				break;
 
 			case T_CreateStmt:
 				_outCreateStmt(str, obj);
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 955e022d8f6..ef086992e8b 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -263,6 +263,7 @@ RelOptInfo      - a relation or joined relations
   AppendPath    - append multiple subpaths together
   ResultPath    - a Result plan node (used for variable-free tlist or qual)
   MaterialPath  - a Material plan node
+  UniquePath    - remove duplicate rows
   NestPath      - nested-loop joins
   MergePath     - merge joins
   HashPath      - hash joins
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 91b6c75e8e2..d53a160a4eb 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.60 2002/12/16 21:30:29 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.61 2003/01/20 18:54:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,8 +22,8 @@
 #include "postgres.h"
 
 #include <float.h>
-#include <math.h>
 #include <limits.h>
+#include <math.h>
 
 #include "optimizer/geqo.h"
 #include "optimizer/pathnode.h"
@@ -91,7 +91,10 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
 	 * XXX geqo does not currently support optimization for partial result
 	 * retrieval --- how to fix?
 	 */
-	fitness = joinrel->cheapest_total_path->total_cost;
+	if (joinrel)
+		fitness = joinrel->cheapest_total_path->total_cost;
+	else
+		fitness = DBL_MAX;
 
 	/* restore join_rel_list */
 	root->join_rel_list = savelist;
@@ -113,7 +116,7 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
  *	 'tour' is the proposed join order, of length 'num_gene'
  *
  * Returns a new join relation whose cheapest path is the best plan for
- * this join order.
+ * this join order.  NB: will return NULL if join order is invalid.
  *
  * Note that at each step we consider using the next rel as both left and
  * right side of a join.  However, we cannot build general ("bushy") plan
@@ -154,6 +157,10 @@ gimme_tree(Query *root, List *initial_rels,
 		 */
 		new_rel = make_join_rel(root, joinrel, inner_rel, JOIN_INNER);
 
+		/* Fail if join order is not valid */
+		if (new_rel == NULL)
+			return NULL;
+
 		/* Find and save the cheapest paths for this rel */
 		set_cheapest(new_rel);
 
diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c
index c9993680b50..c517652dab6 100644
--- a/src/backend/optimizer/geqo/geqo_main.c
+++ b/src/backend/optimizer/geqo/geqo_main.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.33 2002/12/16 21:30:29 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.34 2003/01/20 18:54:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -228,20 +228,25 @@ geqo(Query *root, int number_of_rels, List *initial_rels)
 #endif
 
 
-/* got the cheapest query tree processed by geqo;
-   first element of the population indicates the best query tree */
-
+	/*
+	 * got the cheapest query tree processed by geqo;
+	 * first element of the population indicates the best query tree
+	 */
 	best_tour = (Gene *) pool->data[0].string;
 
-/* root->join_rel_list will be modified during this ! */
+	/* root->join_rel_list will be modified during this ! */
 	best_rel = gimme_tree(root, initial_rels,
 						  best_tour, pool->string_length);
 
-/* DBG: show the query plan
-print_plan(best_plan, root);
-   DBG */
+	if (best_rel == NULL)
+		elog(ERROR, "geqo: failed to make a valid plan");
+
+	/* DBG: show the query plan */
+#ifdef NOT_USED
+	print_plan(best_plan, root);
+#endif
 
-/* ... free memory stuff */
+	/* ... free memory stuff */
 	free_chromo(momma);
 	free_chromo(daddy);
 
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index c0b3ab40da1..f85144b8184 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.94 2003/01/20 18:54:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -750,6 +750,10 @@ print_path(Query *root, Path *path, int indent)
 			ptype = "Material";
 			subpath = ((MaterialPath *) path)->subpath;
 			break;
+		case T_UniquePath:
+			ptype = "Unique";
+			subpath = ((UniquePath *) path)->subpath;
+			break;
 		case T_NestPath:
 			ptype = "NestLoop";
 			join = true;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index efd80dff1ed..5146517132f 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.100 2003/01/15 19:35:39 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.101 2003/01/20 18:54:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1024,12 +1024,17 @@ cost_hashjoin(Path *path, Query *root,
 	 * Bias against putting larger relation on inside.	We don't want an
 	 * absolute prohibition, though, since larger relation might have
 	 * better bucketsize --- and we can't trust the size estimates
-	 * unreservedly, anyway.  Instead, inflate the startup cost by the
+	 * unreservedly, anyway.  Instead, inflate the run cost by the
 	 * square root of the size ratio.  (Why square root?  No real good
 	 * reason, but it seems reasonable...)
+	 *
+	 * Note: before 7.4 we implemented this by inflating startup cost;
+	 * but if there's a disable_cost component in the input paths'
+	 * startup cost, that unfairly penalizes the hash.  Probably it'd
+	 * be better to keep track of disable penalty separately from cost.
 	 */
 	if (innerbytes > outerbytes && outerbytes > 0)
-		startup_cost *= sqrt(innerbytes / outerbytes);
+		run_cost *= sqrt(innerbytes / outerbytes);
 
 	path->startup_cost = startup_cost;
 	path->total_cost = startup_cost + run_cost;
@@ -1492,22 +1497,26 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 						   JoinType jointype,
 						   List *restrictlist)
 {
+	Selectivity selec;
 	double		temp;
-
-	/* Start with the Cartesian product */
-	temp = outer_rel->rows * inner_rel->rows;
+	UniquePath *upath;
 
 	/*
-	 * Apply join restrictivity.  Note that we are only considering
+	 * Compute joinclause selectivity.  Note that we are only considering
 	 * clauses that become restriction clauses at this join level; we are
 	 * not double-counting them because they were not considered in
 	 * estimating the sizes of the component rels.
 	 */
-	temp *= restrictlist_selectivity(root,
+	selec = restrictlist_selectivity(root,
 									 restrictlist,
 									 0);
 
 	/*
+	 * Normally, we multiply size of Cartesian product by selectivity.
+	 * But for JOIN_IN, we just multiply the lefthand size by the selectivity
+	 * (is that really right?).  For UNIQUE_OUTER or UNIQUE_INNER, use
+	 * the estimated number of distinct rows (again, is that right?)
+	 *
 	 * If we are doing an outer join, take that into account: the output
 	 * must be at least as large as the non-nullable input.  (Is there any
 	 * chance of being even smarter?)
@@ -1515,24 +1524,45 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 	switch (jointype)
 	{
 		case JOIN_INNER:
+			temp = outer_rel->rows * inner_rel->rows * selec;
 			break;
 		case JOIN_LEFT:
+			temp = outer_rel->rows * inner_rel->rows * selec;
 			if (temp < outer_rel->rows)
 				temp = outer_rel->rows;
 			break;
 		case JOIN_RIGHT:
+			temp = outer_rel->rows * inner_rel->rows * selec;
 			if (temp < inner_rel->rows)
 				temp = inner_rel->rows;
 			break;
 		case JOIN_FULL:
+			temp = outer_rel->rows * inner_rel->rows * selec;
 			if (temp < outer_rel->rows)
 				temp = outer_rel->rows;
 			if (temp < inner_rel->rows)
 				temp = inner_rel->rows;
 			break;
+		case JOIN_IN:
+			temp = outer_rel->rows * selec;
+			break;
+		case JOIN_REVERSE_IN:
+			temp = inner_rel->rows * selec;
+			break;
+		case JOIN_UNIQUE_OUTER:
+			upath = create_unique_path(root, outer_rel,
+									   outer_rel->cheapest_total_path);
+			temp = upath->rows * inner_rel->rows * selec;
+			break;
+		case JOIN_UNIQUE_INNER:
+			upath = create_unique_path(root, inner_rel,
+									   inner_rel->cheapest_total_path);
+			temp = outer_rel->rows * upath->rows * selec;
+			break;
 		default:
 			elog(ERROR, "set_joinrel_size_estimates: unsupported join type %d",
 				 (int) jointype);
+			temp = 0;			/* keep compiler quiet */
 			break;
 	}
 
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 7e68c41ef37..02a92fd9960 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.131 2003/01/15 19:35:39 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.132 2003/01/20 18:54:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1401,11 +1401,13 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
 	MemoryContext oldcontext;
 
 	/*
-	 * Nestloop only supports inner and left joins.
+	 * Nestloop only supports inner, left, and IN joins.
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
+		case JOIN_IN:
+		case JOIN_UNIQUE_OUTER:
 			isouterjoin = false;
 			break;
 		case JOIN_LEFT:
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 8a6fcd3f060..0cbe7bbf83b 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.75 2003/01/15 19:35:40 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.76 2003/01/20 18:54:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,13 +32,6 @@ static void match_unsorted_outer(Query *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, List *mergeclause_list,
 					 JoinType jointype);
-
-#ifdef NOT_USED
-static void match_unsorted_inner(Query *root, RelOptInfo *joinrel,
-					 RelOptInfo *outerrel, RelOptInfo *innerrel,
-					 List *restrictlist, List *mergeclause_list,
-					 JoinType jointype);
-#endif
 static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, JoinType jointype);
@@ -149,6 +142,8 @@ sort_inner_and_outer(Query *root,
 					 JoinType jointype)
 {
 	bool		useallclauses;
+	Path	   *outer_path;
+	Path	   *inner_path;
 	List	   *all_pathkeys;
 	List	   *i;
 
@@ -160,6 +155,9 @@ sort_inner_and_outer(Query *root,
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
+		case JOIN_IN:
+		case JOIN_UNIQUE_OUTER:
+		case JOIN_UNIQUE_INNER:
 			useallclauses = false;
 			break;
 		case JOIN_RIGHT:
@@ -173,6 +171,28 @@ sort_inner_and_outer(Query *root,
 			break;
 	}
 
+	/*
+	 * We only consider the cheapest-total-cost input paths, since we are
+	 * assuming here that a sort is required.  We will consider
+	 * cheapest-startup-cost input paths later, and only if they don't
+	 * need a sort.
+	 *
+	 * If unique-ification is requested, do it and then handle as a plain
+	 * inner join.
+	 */
+	outer_path = outerrel->cheapest_total_path;
+	inner_path = innerrel->cheapest_total_path;
+	if (jointype == JOIN_UNIQUE_OUTER)
+	{
+		outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
+		jointype = JOIN_INNER;
+	}
+	else if (jointype == JOIN_UNIQUE_INNER)
+	{
+		inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
+		jointype = JOIN_INNER;
+	}
+
 	/*
 	 * Each possible ordering of the available mergejoin clauses will
 	 * generate a differently-sorted result path at essentially the same
@@ -254,17 +274,14 @@ sort_inner_and_outer(Query *root,
 		merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys);
 
 		/*
-		 * And now we can make the path.  We only consider the cheapest-
-		 * total-cost input paths, since we are assuming here that a sort
-		 * is required.  We will consider cheapest-startup-cost input
-		 * paths later, and only if they don't need a sort.
+		 * And now we can make the path.
 		 */
 		add_path(joinrel, (Path *)
 				 create_mergejoin_path(root,
 									   joinrel,
 									   jointype,
-									   outerrel->cheapest_total_path,
-									   innerrel->cheapest_total_path,
+									   outer_path,
+									   inner_path,
 									   restrictlist,
 									   merge_pathkeys,
 									   cur_mergeclauses,
@@ -314,15 +331,18 @@ match_unsorted_outer(Query *root,
 					 List *mergeclause_list,
 					 JoinType jointype)
 {
+	JoinType	save_jointype = jointype;
 	bool		nestjoinOK;
 	bool		useallclauses;
+	Path	   *inner_cheapest_startup = innerrel->cheapest_startup_path;
+	Path	   *inner_cheapest_total = innerrel->cheapest_total_path;
 	Path	   *matpath = NULL;
 	Path	   *bestinnerjoin = NULL;
 	List	   *i;
 
 	/*
-	 * Nestloop only supports inner and left joins.  Also, if we are doing
-	 * a right or full join, we must use *all* the mergeclauses as join
+	 * Nestloop only supports inner, left, and IN joins.  Also, if we are
+	 * doing a right or full join, we must use *all* the mergeclauses as join
 	 * clauses, else we will not have a valid plan.  (Although these two
 	 * flags are currently inverses, keep them separate for clarity and
 	 * possible future changes.)
@@ -331,6 +351,9 @@ match_unsorted_outer(Query *root,
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
+		case JOIN_IN:
+		case JOIN_UNIQUE_OUTER:
+		case JOIN_UNIQUE_INNER:
 			nestjoinOK = true;
 			useallclauses = false;
 			break;
@@ -347,18 +370,28 @@ match_unsorted_outer(Query *root,
 			break;
 	}
 
-	if (nestjoinOK)
+	/*
+	 * If we need to unique-ify the inner path, we will consider only
+	 * the cheapest inner.
+	 */
+	if (jointype == JOIN_UNIQUE_INNER)
+	{
+		inner_cheapest_total = (Path *)
+			create_unique_path(root, innerrel, inner_cheapest_total);
+		inner_cheapest_startup = inner_cheapest_total;
+		jointype = JOIN_INNER;
+	}
+	else if (nestjoinOK)
 	{
 		/*
 		 * If the cheapest inner path is a join or seqscan, we should consider
 		 * materializing it.  (This is a heuristic: we could consider it
 		 * always, but for inner indexscans it's probably a waste of time.)
 		 */
-		if (!(IsA(innerrel->cheapest_total_path, IndexPath) ||
-			  IsA(innerrel->cheapest_total_path, TidPath)))
+		if (!(IsA(inner_cheapest_total, IndexPath) ||
+			  IsA(inner_cheapest_total, TidPath)))
 			matpath = (Path *)
-				create_material_path(innerrel, 
-									 innerrel->cheapest_total_path);
+				create_material_path(innerrel, inner_cheapest_total);
 
 		/*
 		 * Get the best innerjoin indexpath (if any) for this outer rel. It's
@@ -380,6 +413,18 @@ match_unsorted_outer(Query *root,
 		int			num_sortkeys;
 		int			sortkeycnt;
 
+		/*
+		 * If we need to unique-ify the outer path, it's pointless to consider
+		 * any but the cheapest outer.
+		 */
+		if (save_jointype == JOIN_UNIQUE_OUTER)
+		{
+			if (outerpath != outerrel->cheapest_total_path)
+				continue;
+			outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
+			jointype = JOIN_INNER;
+		}
+
 		/*
 		 * The result will have this sort order (even if it is implemented
 		 * as a nestloop, and even if some of the mergeclauses are
@@ -402,7 +447,7 @@ match_unsorted_outer(Query *root,
 										  joinrel,
 										  jointype,
 										  outerpath,
-										  innerrel->cheapest_total_path,
+										  inner_cheapest_total,
 										  restrictlist,
 										  merge_pathkeys));
 			if (matpath != NULL)
@@ -414,14 +459,13 @@ match_unsorted_outer(Query *root,
 											  matpath,
 											  restrictlist,
 											  merge_pathkeys));
-			if (innerrel->cheapest_startup_path !=
-				innerrel->cheapest_total_path)
+			if (inner_cheapest_startup != inner_cheapest_total)
 				add_path(joinrel, (Path *)
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
 											  outerpath,
-										 innerrel->cheapest_startup_path,
+											  inner_cheapest_startup,
 											  restrictlist,
 											  merge_pathkeys));
 			if (bestinnerjoin != NULL)
@@ -435,6 +479,10 @@ match_unsorted_outer(Query *root,
 											  merge_pathkeys));
 		}
 
+		/* Can't do anything else if outer path needs to be unique'd */
+		if (save_jointype == JOIN_UNIQUE_OUTER)
+			continue;
+
 		/* Look for useful mergeclauses (if any) */
 		mergeclauses = find_mergeclauses_for_pathkeys(root,
 													  outerpath->pathkeys,
@@ -455,27 +503,30 @@ match_unsorted_outer(Query *root,
 		 * Generate a mergejoin on the basis of sorting the cheapest
 		 * inner. Since a sort will be needed, only cheapest total cost
 		 * matters.  (But create_mergejoin_path will do the right thing if
-		 * innerrel->cheapest_total_path is already correctly sorted.)
+		 * inner_cheapest_total is already correctly sorted.)
 		 */
 		add_path(joinrel, (Path *)
 				 create_mergejoin_path(root,
 									   joinrel,
 									   jointype,
 									   outerpath,
-									   innerrel->cheapest_total_path,
+									   inner_cheapest_total,
 									   restrictlist,
 									   merge_pathkeys,
 									   mergeclauses,
 									   NIL,
 									   innersortkeys));
 
+		/* Can't do anything else if inner path needs to be unique'd */
+		if (save_jointype == JOIN_UNIQUE_INNER)
+			continue;
+
 		/*
 		 * Look for presorted inner paths that satisfy the innersortkey
 		 * list --- or any truncation thereof, if we are allowed to build
 		 * a mergejoin using a subset of the merge clauses.  Here, we
 		 * consider both cheap startup cost and cheap total cost.  Ignore
-		 * innerrel->cheapest_total_path, since we already made a path
-		 * with it.
+		 * inner_cheapest_total, since we already made a path with it.
 		 */
 		num_sortkeys = length(innersortkeys);
 		if (num_sortkeys > 1 && !useallclauses)
@@ -500,7 +551,7 @@ match_unsorted_outer(Query *root,
 													   trialsortkeys,
 													   TOTAL_COST);
 			if (innerpath != NULL &&
-				innerpath != innerrel->cheapest_total_path &&
+				innerpath != inner_cheapest_total &&
 				(cheapest_total_inner == NULL ||
 				 compare_path_costs(innerpath, cheapest_total_inner,
 									TOTAL_COST) < 0))
@@ -535,7 +586,7 @@ match_unsorted_outer(Query *root,
 													   trialsortkeys,
 													   STARTUP_COST);
 			if (innerpath != NULL &&
-				innerpath != innerrel->cheapest_total_path &&
+				innerpath != inner_cheapest_total &&
 				(cheapest_startup_inner == NULL ||
 				 compare_path_costs(innerpath, cheapest_startup_inner,
 									STARTUP_COST) < 0))
@@ -584,146 +635,6 @@ match_unsorted_outer(Query *root,
 	}
 }
 
-#ifdef NOT_USED
-
-/*
- * match_unsorted_inner
- *	  Generate mergejoin paths that use an explicit sort of the outer path
- *	  with an already-ordered inner path.
- *
- * 'joinrel' is the join result relation
- * 'outerrel' is the outer join relation
- * 'innerrel' is the inner join relation
- * 'restrictlist' contains all of the RestrictInfo nodes for restriction
- *		clauses that apply to this join
- * 'mergeclause_list' is a list of RestrictInfo nodes for available
- *		mergejoin clauses in this join
- * 'jointype' is the type of join to do
- */
-static void
-match_unsorted_inner(Query *root,
-					 RelOptInfo *joinrel,
-					 RelOptInfo *outerrel,
-					 RelOptInfo *innerrel,
-					 List *restrictlist,
-					 List *mergeclause_list,
-					 JoinType jointype)
-{
-	bool		useallclauses;
-	List	   *i;
-
-	switch (jointype)
-	{
-		case JOIN_INNER:
-		case JOIN_LEFT:
-			useallclauses = false;
-			break;
-		case JOIN_RIGHT:
-		case JOIN_FULL:
-			useallclauses = true;
-			break;
-		default:
-			elog(ERROR, "match_unsorted_inner: unexpected join type %d",
-				 (int) jointype);
-			useallclauses = false;		/* keep compiler quiet */
-			break;
-	}
-
-	foreach(i, innerrel->pathlist)
-	{
-		Path	   *innerpath = (Path *) lfirst(i);
-		List	   *mergeclauses;
-		List	   *outersortkeys;
-		List	   *merge_pathkeys;
-		Path	   *totalouterpath;
-		Path	   *startupouterpath;
-
-		/* Look for useful mergeclauses (if any) */
-		mergeclauses = find_mergeclauses_for_pathkeys(root,
-													  innerpath->pathkeys,
-													  mergeclause_list);
-
-		/* Done with this inner path if no chance for a mergejoin */
-		if (mergeclauses == NIL)
-			continue;
-		if (useallclauses && length(mergeclauses) != length(mergeclause_list))
-			continue;
-
-		/* Compute the required ordering of the outer path */
-		outersortkeys = make_pathkeys_for_mergeclauses(root,
-													   mergeclauses,
-													   outerrel);
-
-		/*
-		 * Generate a mergejoin on the basis of sorting the cheapest
-		 * outer. Since a sort will be needed, only cheapest total cost
-		 * matters.
-		 */
-		merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
-		add_path(joinrel, (Path *)
-				 create_mergejoin_path(root,
-									   joinrel,
-									   jointype,
-									   outerrel->cheapest_total_path,
-									   innerpath,
-									   restrictlist,
-									   merge_pathkeys,
-									   mergeclauses,
-									   outersortkeys,
-									   NIL));
-
-		/*
-		 * Now generate mergejoins based on already-sufficiently-ordered
-		 * outer paths.  There's likely to be some redundancy here with
-		 * paths already generated by merge_unsorted_outer ... but since
-		 * merge_unsorted_outer doesn't consider all permutations of the
-		 * mergeclause list, it may fail to notice that this particular
-		 * innerpath could have been used with this outerpath.
-		 */
-		totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
-														outersortkeys,
-														TOTAL_COST);
-		if (totalouterpath == NULL)
-			continue;			/* there won't be a startup-cost path
-								 * either */
-
-		merge_pathkeys = build_join_pathkeys(root, joinrel,
-											 totalouterpath->pathkeys);
-		add_path(joinrel, (Path *)
-				 create_mergejoin_path(root,
-									   joinrel,
-									   jointype,
-									   totalouterpath,
-									   innerpath,
-									   restrictlist,
-									   merge_pathkeys,
-									   mergeclauses,
-									   NIL,
-									   NIL));
-
-		startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
-														  outersortkeys,
-														  STARTUP_COST);
-		if (startupouterpath != NULL && startupouterpath != totalouterpath)
-		{
-			merge_pathkeys = build_join_pathkeys(root, joinrel,
-											 startupouterpath->pathkeys);
-			add_path(joinrel, (Path *)
-					 create_mergejoin_path(root,
-										   joinrel,
-										   jointype,
-										   startupouterpath,
-										   innerpath,
-										   restrictlist,
-										   merge_pathkeys,
-										   mergeclauses,
-										   NIL,
-										   NIL));
-		}
-	}
-}
-#endif
-
 /*
  * hash_inner_and_outer
  *	  Create hashjoin join paths by explicitly hashing both the outer and
@@ -749,11 +660,14 @@ hash_inner_and_outer(Query *root,
 	List	   *i;
 
 	/*
-	 * Hashjoin only supports inner and left joins.
+	 * Hashjoin only supports inner, left, and IN joins.
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
+		case JOIN_IN:
+		case JOIN_UNIQUE_OUTER:
+		case JOIN_UNIQUE_INNER:
 			isouterjoin = false;
 			break;
 		case JOIN_LEFT:
@@ -813,21 +727,40 @@ hash_inner_and_outer(Query *root,
 		 * cheapest-startup-cost outer paths.  There's no need to consider
 		 * any but the cheapest-total-cost inner path, however.
 		 */
+		Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
+		Path *cheapest_total_outer = outerrel->cheapest_total_path;
+		Path *cheapest_total_inner = innerrel->cheapest_total_path;
+
+		/* Unique-ify if need be */
+		if (jointype == JOIN_UNIQUE_OUTER)
+		{
+			cheapest_total_outer = (Path *)
+				create_unique_path(root, outerrel, cheapest_total_outer);
+			cheapest_startup_outer = cheapest_total_outer;
+			jointype = JOIN_INNER;
+		}
+		else if (jointype == JOIN_UNIQUE_INNER)
+		{
+			cheapest_total_inner = (Path *)
+				create_unique_path(root, innerrel, cheapest_total_inner);
+			jointype = JOIN_INNER;
+		}
+
 		add_path(joinrel, (Path *)
 				 create_hashjoin_path(root,
 									  joinrel,
 									  jointype,
-									  outerrel->cheapest_total_path,
-									  innerrel->cheapest_total_path,
+									  cheapest_total_outer,
+									  cheapest_total_inner,
 									  restrictlist,
 									  hashclauses));
-		if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
+		if (cheapest_startup_outer != cheapest_total_outer)
 			add_path(joinrel, (Path *)
 					 create_hashjoin_path(root,
 										  joinrel,
 										  jointype,
-										  outerrel->cheapest_startup_path,
-										  innerrel->cheapest_total_path,
+										  cheapest_startup_outer,
+										  cheapest_total_inner,
 										  restrictlist,
 										  hashclauses));
 	}
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 037733d5d72..704afda37f8 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.58 2002/12/16 21:30:30 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.59 2003/01/20 18:54:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -172,7 +172,7 @@ make_rels_by_joins(Query *root, int level, List **joinrels)
 							jrel = make_join_rel(root, old_rel, new_rel,
 												 JOIN_INNER);
 							/* Avoid making duplicate entries ... */
-							if (!ptrMember(jrel, result_rels))
+							if (jrel && !ptrMember(jrel, result_rels))
 								result_rels = lcons(jrel, result_rels);
 							break;		/* need not consider more
 										 * joininfos */
@@ -276,10 +276,9 @@ make_rels_by_clause_joins(Query *root,
 
 				/*
 				 * Avoid entering same joinrel into our output list more
-				 * than once.  (make_rels_by_joins doesn't really care,
-				 * but GEQO does.)
+				 * than once.
 				 */
-				if (!ptrMember(jrel, result))
+				if (jrel && !ptrMember(jrel, result))
 					result = lcons(jrel, result);
 			}
 		}
@@ -323,7 +322,8 @@ make_rels_by_clauseless_joins(Query *root,
 			 * As long as given other_rels are distinct, don't need to
 			 * test to see if jrel is already part of output list.
 			 */
-			result = lcons(jrel, result);
+			if (jrel)
+				result = lcons(jrel, result);
 		}
 	}
 
@@ -367,6 +367,9 @@ make_jointree_rel(Query *root, Node *jtnode)
 		/* Make this join rel */
 		rel = make_join_rel(root, lrel, rrel, j->jointype);
 
+		if (rel == NULL)
+			elog(ERROR, "make_jointree_rel: invalid join order!?");
+
 		/*
 		 * Since we are only going to consider this one way to do it,
 		 * we're done generating Paths for this joinrel and can now select
@@ -395,19 +398,121 @@ make_jointree_rel(Query *root, Node *jtnode)
  *	   created with the two rels as outer and inner rel.
  *	   (The join rel may already contain paths generated from other
  *	   pairs of rels that add up to the same set of base rels.)
+ *
+ * NB: will return NULL if attempted join is not valid.  This can only
+ * happen when working with IN clauses that have been turned into joins.
  */
 RelOptInfo *
 make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			  JoinType jointype)
 {
+	List	   *joinrelids;
 	RelOptInfo *joinrel;
 	List	   *restrictlist;
 
+	/* We should never try to join two overlapping sets of rels. */
+	Assert(nonoverlap_setsi(rel1->relids, rel2->relids));
+
+	/* Construct Relids set that identifies the joinrel. */
+	joinrelids = nconc(listCopy(rel1->relids), listCopy(rel2->relids));
+
+	/*
+	 * If we are implementing IN clauses as joins, there are some joins
+	 * that are illegal.  Check to see if the proposed join is trouble.
+	 * We can skip the work if looking at an outer join, however, because
+	 * only top-level joins might be affected.
+	 */
+	if (jointype == JOIN_INNER)
+	{
+		List	   *l;
+
+		foreach(l, root->in_info_list)
+		{
+			InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+			/*
+			 * Cannot join if proposed join contains part, but only
+			 * part, of the RHS, *and* it contains rels not in the RHS.
+			 *
+			 * Singleton RHS cannot be a problem, so skip expensive tests.
+			 */
+			if (length(ininfo->righthand) > 1 &&
+				overlap_setsi(ininfo->righthand, joinrelids) &&
+				!is_subseti(ininfo->righthand, joinrelids) &&
+				!is_subseti(joinrelids, ininfo->righthand))
+			{
+				freeList(joinrelids);
+				return NULL;
+			}
+
+			/*
+			 * No issue unless we are looking at a join of the IN's RHS
+			 * to other stuff.
+			 */
+			if (! (length(ininfo->righthand) < length(joinrelids) &&
+				   is_subseti(ininfo->righthand, joinrelids)))
+				continue;
+			/*
+			 * If we already joined IN's RHS to any part of its LHS in either
+			 * input path, then this join is not constrained (the necessary
+			 * work was done at a lower level).
+			 */
+			if (overlap_setsi(ininfo->lefthand, rel1->relids) &&
+				is_subseti(ininfo->righthand, rel1->relids))
+				continue;
+			if (overlap_setsi(ininfo->lefthand, rel2->relids) &&
+				is_subseti(ininfo->righthand, rel2->relids))
+				continue;
+			/*
+			 * JOIN_IN technique will work if outerrel includes LHS and
+			 * innerrel is exactly RHS; conversely JOIN_REVERSE_IN handles
+			 * RHS/LHS.
+			 *
+			 * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS;
+			 * conversely JOIN_UNIQUE_INNER will work if innerrel is
+			 * exactly RHS.
+			 *
+			 * But none of these will work if we already found another IN
+			 * that needs to trigger here.
+			 */
+			if (jointype != JOIN_INNER)
+			{
+				freeList(joinrelids);
+				return NULL;
+			}
+			if (is_subseti(ininfo->lefthand, rel1->relids) &&
+				sameseti(ininfo->righthand, rel2->relids))
+			{
+				jointype = JOIN_IN;
+			}
+			else if (is_subseti(ininfo->lefthand, rel2->relids) &&
+					 sameseti(ininfo->righthand, rel1->relids))
+			{
+				jointype = JOIN_REVERSE_IN;
+			}
+			else if (sameseti(ininfo->righthand, rel1->relids))
+			{
+				jointype = JOIN_UNIQUE_OUTER;
+			}
+			else if (sameseti(ininfo->righthand, rel2->relids))
+			{
+				jointype = JOIN_UNIQUE_INNER;
+			}
+			else
+			{
+				/* invalid join path */
+				freeList(joinrelids);
+				return NULL;
+			}
+		}
+	}
+
 	/*
 	 * Find or build the join RelOptInfo, and compute the restrictlist
 	 * that goes with this particular joining.
 	 */
-	joinrel = build_join_rel(root, rel1, rel2, jointype, &restrictlist);
+	joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
+							 &restrictlist);
 
 	/*
 	 * Consider paths using each rel as both outer and inner.
@@ -438,11 +543,43 @@ make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
 								 restrictlist);
 			break;
+		case JOIN_IN:
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
+								 restrictlist);
+			/* REVERSE_IN isn't supported by joinpath.c */
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
+								 restrictlist);
+			break;
+		case JOIN_REVERSE_IN:
+			/* REVERSE_IN isn't supported by joinpath.c */
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
+								 restrictlist);
+			break;
+		case JOIN_UNIQUE_OUTER:
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
+								 restrictlist);
+			break;
+		case JOIN_UNIQUE_INNER:
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
+								 restrictlist);
+			break;
 		default:
 			elog(ERROR, "make_join_rel: unsupported join type %d",
 				 (int) jointype);
 			break;
 	}
 
+	freeList(joinrelids);
+
 	return joinrel;
 }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index f6e51d0d52f..b7b1204e76e 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.131 2003/01/15 23:10:32 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.132 2003/01/20 18:54:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,6 +26,7 @@
 #include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
+#include "parser/parse_clause.h"
 #include "parser/parse_expr.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
@@ -36,6 +37,7 @@ static Join *create_join_plan(Query *root, JoinPath *best_path);
 static Append *create_append_plan(Query *root, AppendPath *best_path);
 static Result *create_result_plan(Query *root, ResultPath *best_path);
 static Material *create_material_plan(Query *root, MaterialPath *best_path);
+static Plan *create_unique_plan(Query *root, UniquePath *best_path);
 static SeqScan *create_seqscan_plan(Path *best_path, List *tlist,
 					List *scan_clauses);
 static IndexScan *create_indexscan_plan(Query *root, IndexPath *best_path,
@@ -146,6 +148,10 @@ create_plan(Query *root, Path *best_path)
 			plan = (Plan *) create_material_plan(root,
 												 (MaterialPath *) best_path);
 			break;
+		case T_Unique:
+			plan = (Plan *) create_unique_plan(root,
+											   (UniquePath *) best_path);
+			break;
 		default:
 			elog(ERROR, "create_plan: unknown pathtype %d",
 				 best_path->pathtype);
@@ -399,6 +405,97 @@ create_material_plan(Query *root, MaterialPath *best_path)
 	return plan;
 }
 
+/*
+ * create_unique_plan
+ *	  Create a Unique plan for 'best_path' and (recursively) plans
+ *	  for its subpaths.
+ *
+ *	  Returns a Plan node.
+ */
+static Plan *
+create_unique_plan(Query *root, UniquePath *best_path)
+{
+	Plan	   *plan;
+	Plan	   *subplan;
+	List	   *sub_targetlist;
+	List	   *l;
+
+	subplan = create_plan(root, best_path->subpath);
+
+	/*
+	 * If the subplan came from an IN subselect (currently always the case),
+	 * we need to instantiate the correct output targetlist for the subselect,
+	 * rather than using the flattened tlist.
+	 */
+	sub_targetlist = NIL;
+	foreach(l, root->in_info_list)
+	{
+		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+		if (sameseti(ininfo->righthand, best_path->path.parent->relids))
+		{
+			sub_targetlist = ininfo->sub_targetlist;
+			break;
+		}
+	}
+
+	if (sub_targetlist)
+	{
+		/*
+		 * Transform list of plain Vars into targetlist
+		 */
+		List   *newtlist = NIL;
+		int		resno = 1;
+
+		foreach(l, sub_targetlist)
+		{
+			Node	   *tlexpr = lfirst(l);
+			TargetEntry *tle;
+
+			tle = makeTargetEntry(makeResdom(resno,
+											 exprType(tlexpr),
+											 exprTypmod(tlexpr),
+											 NULL,
+											 false),
+								  (Expr *) tlexpr);
+			newtlist = lappend(newtlist, tle);
+			resno++;
+		}
+		/*
+		 * If the top plan node can't do projections, we need to add a
+		 * Result node to help it along.
+		 *
+		 * Currently, the only non-projection-capable plan type
+		 * we can see here is Append.
+		 */
+		if (IsA(subplan, Append))
+			subplan = (Plan *) make_result(newtlist, NULL, subplan);
+		else
+			subplan->targetlist = newtlist;
+	}
+
+	if (best_path->use_hash)
+	{
+		elog(ERROR, "create_unique_plan: hash case not implemented yet");
+		plan = NULL;
+	}
+	else
+	{
+		List	   *sort_tlist;
+		List	   *sortList;
+
+		sort_tlist = new_unsorted_tlist(subplan->targetlist);
+		sortList = addAllTargetsToSortList(NIL, sort_tlist);
+		plan = (Plan *) make_sort_from_sortclauses(root, sort_tlist,
+												   subplan, sortList);
+		plan = (Plan *) make_unique(sort_tlist, plan, sortList);
+	}
+
+	plan->plan_rows = best_path->rows;
+
+	return plan;
+}
+
 
 /*****************************************************************************
  *
@@ -1548,6 +1645,52 @@ make_sort_from_pathkeys(Query *root, Plan *lefttree,
 	return make_sort(root, sort_tlist, lefttree, numsortkeys);
 }
 
+/*
+ * make_sort_from_sortclauses
+ *	  Create sort plan to sort according to given sortclauses
+ *
+ *	  'tlist' is the targetlist
+ *	  'lefttree' is the node which yields input tuples
+ *	  'sortcls' is a list of SortClauses
+ */
+Sort *
+make_sort_from_sortclauses(Query *root, List *tlist,
+						   Plan *lefttree, List *sortcls)
+{
+	List	   *sort_tlist;
+	List	   *i;
+	int			keyno = 0;
+
+	/*
+	 * First make a copy of the tlist so that we don't corrupt the
+	 * original.
+	 */
+	sort_tlist = new_unsorted_tlist(tlist);
+
+	foreach(i, sortcls)
+	{
+		SortClause *sortcl = (SortClause *) lfirst(i);
+		TargetEntry *tle = get_sortgroupclause_tle(sortcl, sort_tlist);
+		Resdom	   *resdom = tle->resdom;
+
+		/*
+		 * Check for the possibility of duplicate order-by clauses --- the
+		 * parser should have removed 'em, but the executor will get
+		 * terribly confused if any get through!
+		 */
+		if (resdom->reskey == 0)
+		{
+			/* OK, insert the ordering info needed by the executor. */
+			resdom->reskey = ++keyno;
+			resdom->reskeyop = sortcl->sortop;
+		}
+	}
+
+	Assert(keyno > 0);
+
+	return make_sort(root, sort_tlist, lefttree, keyno);
+}
+
 Material *
 make_material(List *tlist, Plan *lefttree)
 {
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 87c77e52fc3..037ed3314cf 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,12 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.81 2003/01/15 19:35:40 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.82 2003/01/20 18:54:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
@@ -579,6 +578,11 @@ distribute_qual_to_rels(Query *root, Node *clause,
  *	  the appropriate joininfo list (creating a new list and adding it to the
  *	  appropriate rel node if necessary).
  *
+ * Note that the same copy of the restrictinfo node is linked to by all the
+ * lists it is in.  This allows us to exploit caching of information about
+ * the restriction clause (but we must be careful that the information does
+ * not depend on context).
+ *
  * 'restrictinfo' describes the join clause
  * 'join_relids' is the list of relations participating in the join clause
  */
@@ -602,12 +606,13 @@ add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo,
 			if (lfirsti(otherrel) != cur_relid)
 				unjoined_relids = lappendi(unjoined_relids, lfirsti(otherrel));
 		}
+		Assert(unjoined_relids != NIL);
 
 		/*
 		 * Find or make the joininfo node for this combination of rels,
 		 * and add the restrictinfo node to it.
 		 */
-		joininfo = find_joininfo_node(find_base_rel(root, cur_relid),
+		joininfo = make_joininfo_node(find_base_rel(root, cur_relid),
 									  unjoined_relids);
 		joininfo->jinfo_restrictinfo = lappend(joininfo->jinfo_restrictinfo,
 											   restrictinfo);
@@ -731,7 +736,7 @@ exprs_known_equal(Query *root, Node *item1, Node *item2)
 	{
 		JoinInfo   *joininfo = find_joininfo_node(rel1, relids);
 
-		restrictlist = joininfo->jinfo_restrictinfo;
+		restrictlist = joininfo ? joininfo->jinfo_restrictinfo : NIL;
 	}
 
 	/*
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 6e265931eb2..daa840f789e 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.73 2003/01/15 19:35:40 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.74 2003/01/20 18:54:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -108,6 +108,8 @@ query_planner(Query *root, List *tlist, double tuple_fraction,
 
 	/*
 	 * init planner lists to empty
+	 *
+	 * NOTE: in_info_list was set up by subquery_planner, do not touch here
 	 */
 	root->base_rel_list = NIL;
 	root->other_rel_list = NIL;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index cd5d266e07a..388380f8843 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.140 2003/01/17 03:25:03 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.141 2003/01/20 18:54:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,24 +38,17 @@
 #include "parser/parsetree.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_oper.h"
-#include "rewrite/rewriteManip.h"
-#include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
 
 
 /* Expression kind codes for preprocess_expression */
-#define EXPRKIND_TARGET 0
-#define EXPRKIND_WHERE	1
-#define EXPRKIND_HAVING 2
+#define EXPRKIND_QUAL	0
+#define EXPRKIND_TARGET	1
+#define EXPRKIND_RTFUNC	2
+#define EXPRKIND_ININFO	3
 
 
-static Node *pull_up_subqueries(Query *parse, Node *jtnode,
-				   bool below_outer_join);
-static bool is_simple_subquery(Query *subquery);
-static bool has_nullable_targetlist(Query *subquery);
-static void resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist);
-static Node *preprocess_jointree(Query *parse, Node *jtnode);
 static Node *preprocess_expression(Query *parse, Node *expr, int kind);
 static void preprocess_qual_conditions(Query *parse, Node *jtnode);
 static Plan *inheritance_planner(Query *parse, List *inheritlist);
@@ -155,6 +148,17 @@ subquery_planner(Query *parse, double tuple_fraction)
 	PlannerQueryLevel++;
 	PlannerInitPlan = NIL;
 
+	/*
+	 * Look for IN clauses at the top level of WHERE, and transform them
+	 * into joins.  Note that this step only handles IN clauses originally
+	 * at top level of WHERE; if we pull up any subqueries in the next step,
+	 * their INs are processed just before pulling them up.
+	 */
+	parse->in_info_list = NIL;
+	if (parse->hasSubLinks)
+		parse->jointree->quals = pull_up_IN_clauses(parse,
+													parse->jointree->quals);
+
 	/*
 	 * Check to see if any subqueries in the rangetable can be merged into
 	 * this query.
@@ -195,7 +199,11 @@ subquery_planner(Query *parse, double tuple_fraction)
 	preprocess_qual_conditions(parse, (Node *) parse->jointree);
 
 	parse->havingQual = preprocess_expression(parse, parse->havingQual,
-											  EXPRKIND_HAVING);
+											  EXPRKIND_QUAL);
+
+	parse->in_info_list = (List *)
+		preprocess_expression(parse, (Node *) parse->in_info_list,
+							  EXPRKIND_ININFO);
 
 	/* Also need to preprocess expressions for function RTEs */
 	foreach(lst, parse->rtable)
@@ -204,8 +212,7 @@ subquery_planner(Query *parse, double tuple_fraction)
 
 		if (rte->rtekind == RTE_FUNCTION)
 			rte->funcexpr = preprocess_expression(parse, rte->funcexpr,
-												  EXPRKIND_TARGET);
-		/* These are not targetlist items, but close enough... */
+												  EXPRKIND_RTFUNC);
 	}
 
 	/*
@@ -295,427 +302,6 @@ subquery_planner(Query *parse, double tuple_fraction)
 	return plan;
 }
 
-/*
- * pull_up_subqueries
- *		Look for subqueries in the rangetable that can be pulled up into
- *		the parent query.  If the subquery has no special features like
- *		grouping/aggregation then we can merge it into the parent's jointree.
- *
- * below_outer_join is true if this jointree node is within the nullable
- * side of an outer join.  This restricts what we can do.
- *
- * A tricky aspect of this code is that if we pull up a subquery we have
- * to replace Vars that reference the subquery's outputs throughout the
- * parent query, including quals attached to jointree nodes above the one
- * we are currently processing!  We handle this by being careful not to
- * change the jointree structure while recursing: no nodes other than
- * subquery RangeTblRef entries will be replaced.  Also, we can't turn
- * ResolveNew loose on the whole jointree, because it'll return a mutated
- * copy of the tree; we have to invoke it just on the quals, instead.
- */
-static Node *
-pull_up_subqueries(Query *parse, Node *jtnode, bool below_outer_join)
-{
-	if (jtnode == NULL)
-		return NULL;
-	if (IsA(jtnode, RangeTblRef))
-	{
-		int			varno = ((RangeTblRef *) jtnode)->rtindex;
-		RangeTblEntry *rte = rt_fetch(varno, parse->rtable);
-		Query	   *subquery = rte->subquery;
-
-		/*
-		 * Is this a subquery RTE, and if so, is the subquery simple
-		 * enough to pull up?  (If not, do nothing at this node.)
-		 *
-		 * If we are inside an outer join, only pull up subqueries whose
-		 * targetlists are nullable --- otherwise substituting their tlist
-		 * entries for upper Var references would do the wrong thing (the
-		 * results wouldn't become NULL when they're supposed to). XXX
-		 * This could be improved by generating pseudo-variables for such
-		 * expressions; we'd have to figure out how to get the pseudo-
-		 * variables evaluated at the right place in the modified plan
-		 * tree. Fix it someday.
-		 *
-		 * Note: even if the subquery itself is simple enough, we can't pull
-		 * it up if there is a reference to its whole tuple result.
-		 * Perhaps a pseudo-variable is the answer here too.
-		 */
-		if (rte->rtekind == RTE_SUBQUERY && is_simple_subquery(subquery) &&
-			(!below_outer_join || has_nullable_targetlist(subquery)) &&
-			!contain_whole_tuple_var((Node *) parse, varno, 0))
-		{
-			int			rtoffset;
-			List	   *subtlist;
-			List	   *rt;
-
-			/*
-			 * First, recursively pull up the subquery's subqueries, so
-			 * that this routine's processing is complete for its jointree
-			 * and rangetable.	NB: if the same subquery is referenced
-			 * from multiple jointree items (which can't happen normally,
-			 * but might after rule rewriting), then we will invoke this
-			 * processing multiple times on that subquery.	OK because
-			 * nothing will happen after the first time.  We do have to be
-			 * careful to copy everything we pull up, however, or risk
-			 * having chunks of structure multiply linked.
-			 *
-			 * Note: 'false' is correct here even if we are within an outer
-			 * join in the upper query; the lower query starts with a clean
-			 * slate for outer-join semantics.
-			 */
-			subquery->jointree = (FromExpr *)
-				pull_up_subqueries(subquery, (Node *) subquery->jointree,
-								   false);
-
-			/*
-			 * Now make a modifiable copy of the subquery that we can run
-			 * OffsetVarNodes and IncrementVarSublevelsUp on.
-			 */
-			subquery = copyObject(subquery);
-
-			/*
-			 * Adjust level-0 varnos in subquery so that we can append its
-			 * rangetable to upper query's.
-			 */
-			rtoffset = length(parse->rtable);
-			OffsetVarNodes((Node *) subquery, rtoffset, 0);
-
-			/*
-			 * Upper-level vars in subquery are now one level closer to their
-			 * parent than before.
-			 */
-			IncrementVarSublevelsUp((Node *) subquery, -1, 1);
-
-			/*
-			 * Replace all of the top query's references to the subquery's
-			 * outputs with copies of the adjusted subtlist items, being
-			 * careful not to replace any of the jointree structure.
-			 * (This'd be a lot cleaner if we could use
-			 * query_tree_mutator.)
-			 */
-			subtlist = subquery->targetList;
-			parse->targetList = (List *)
-				ResolveNew((Node *) parse->targetList,
-						   varno, 0, subtlist, CMD_SELECT, 0);
-			resolvenew_in_jointree((Node *) parse->jointree, varno, subtlist);
-			Assert(parse->setOperations == NULL);
-			parse->havingQual =
-				ResolveNew(parse->havingQual,
-						   varno, 0, subtlist, CMD_SELECT, 0);
-
-			foreach(rt, parse->rtable)
-			{
-				RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
-
-				if (rte->rtekind == RTE_JOIN)
-					rte->joinaliasvars = (List *)
-						ResolveNew((Node *) rte->joinaliasvars,
-								   varno, 0, subtlist, CMD_SELECT, 0);
-			}
-
-			/*
-			 * Now append the adjusted rtable entries to upper query. (We
-			 * hold off until after fixing the upper rtable entries; no
-			 * point in running that code on the subquery ones too.)
-			 */
-			parse->rtable = nconc(parse->rtable, subquery->rtable);
-
-			/*
-			 * Pull up any FOR UPDATE markers, too.  (OffsetVarNodes
-			 * already adjusted the marker values, so just nconc the
-			 * list.)
-			 */
-			parse->rowMarks = nconc(parse->rowMarks, subquery->rowMarks);
-
-			/*
-			 * Miscellaneous housekeeping.
-			 */
-			parse->hasSubLinks |= subquery->hasSubLinks;
-			/* subquery won't be pulled up if it hasAggs, so no work there */
-
-			/*
-			 * Return the adjusted subquery jointree to replace the
-			 * RangeTblRef entry in my jointree.
-			 */
-			return (Node *) subquery->jointree;
-		}
-	}
-	else if (IsA(jtnode, FromExpr))
-	{
-		FromExpr   *f = (FromExpr *) jtnode;
-		List	   *l;
-
-		foreach(l, f->fromlist)
-			lfirst(l) = pull_up_subqueries(parse, lfirst(l),
-										   below_outer_join);
-	}
-	else if (IsA(jtnode, JoinExpr))
-	{
-		JoinExpr   *j = (JoinExpr *) jtnode;
-
-		/* Recurse, being careful to tell myself when inside outer join */
-		switch (j->jointype)
-		{
-			case JOIN_INNER:
-				j->larg = pull_up_subqueries(parse, j->larg,
-											 below_outer_join);
-				j->rarg = pull_up_subqueries(parse, j->rarg,
-											 below_outer_join);
-				break;
-			case JOIN_LEFT:
-				j->larg = pull_up_subqueries(parse, j->larg,
-											 below_outer_join);
-				j->rarg = pull_up_subqueries(parse, j->rarg,
-											 true);
-				break;
-			case JOIN_FULL:
-				j->larg = pull_up_subqueries(parse, j->larg,
-											 true);
-				j->rarg = pull_up_subqueries(parse, j->rarg,
-											 true);
-				break;
-			case JOIN_RIGHT:
-				j->larg = pull_up_subqueries(parse, j->larg,
-											 true);
-				j->rarg = pull_up_subqueries(parse, j->rarg,
-											 below_outer_join);
-				break;
-			case JOIN_UNION:
-
-				/*
-				 * This is where we fail if upper levels of planner
-				 * haven't rewritten UNION JOIN as an Append ...
-				 */
-				elog(ERROR, "UNION JOIN is not implemented yet");
-				break;
-			default:
-				elog(ERROR, "pull_up_subqueries: unexpected join type %d",
-					 j->jointype);
-				break;
-		}
-	}
-	else
-		elog(ERROR, "pull_up_subqueries: unexpected node type %d",
-			 nodeTag(jtnode));
-	return jtnode;
-}
-
-/*
- * is_simple_subquery
- *	  Check a subquery in the range table to see if it's simple enough
- *	  to pull up into the parent query.
- */
-static bool
-is_simple_subquery(Query *subquery)
-{
-	/*
-	 * Let's just make sure it's a valid subselect ...
-	 */
-	if (!IsA(subquery, Query) ||
-		subquery->commandType != CMD_SELECT ||
-		subquery->resultRelation != 0 ||
-		subquery->into != NULL ||
-		subquery->isPortal)
-		elog(ERROR, "is_simple_subquery: subquery is bogus");
-
-	/*
-	 * Can't currently pull up a query with setops. Maybe after querytree
-	 * redesign...
-	 */
-	if (subquery->setOperations)
-		return false;
-
-	/*
-	 * Can't pull up a subquery involving grouping, aggregation, sorting,
-	 * or limiting.
-	 */
-	if (subquery->hasAggs ||
-		subquery->groupClause ||
-		subquery->havingQual ||
-		subquery->sortClause ||
-		subquery->distinctClause ||
-		subquery->limitOffset ||
-		subquery->limitCount)
-		return false;
-
-	/*
-	 * Don't pull up a subquery that has any set-returning functions in
-	 * its targetlist.	Otherwise we might well wind up inserting
-	 * set-returning functions into places where they mustn't go, such as
-	 * quals of higher queries.
-	 */
-	if (expression_returns_set((Node *) subquery->targetList))
-		return false;
-
-	/*
-	 * Hack: don't try to pull up a subquery with an empty jointree.
-	 * query_planner() will correctly generate a Result plan for a
-	 * jointree that's totally empty, but I don't think the right things
-	 * happen if an empty FromExpr appears lower down in a jointree. Not
-	 * worth working hard on this, just to collapse SubqueryScan/Result
-	 * into Result...
-	 */
-	if (subquery->jointree->fromlist == NIL)
-		return false;
-
-	return true;
-}
-
-/*
- * has_nullable_targetlist
- *	  Check a subquery in the range table to see if all the non-junk
- *	  targetlist items are simple variables (and, hence, will correctly
- *	  go to NULL when examined above the point of an outer join).
- *
- * A possible future extension is to accept strict functions of simple
- * variables, eg, "x + 1".
- */
-static bool
-has_nullable_targetlist(Query *subquery)
-{
-	List	   *l;
-
-	foreach(l, subquery->targetList)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(l);
-
-		/* ignore resjunk columns */
-		if (tle->resdom->resjunk)
-			continue;
-
-		/* Okay if tlist item is a simple Var */
-		if (tle->expr && IsA(tle->expr, Var))
-			continue;
-
-		return false;
-	}
-	return true;
-}
-
-/*
- * Helper routine for pull_up_subqueries: do ResolveNew on every expression
- * in the jointree, without changing the jointree structure itself.  Ugly,
- * but there's no other way...
- */
-static void
-resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist)
-{
-	if (jtnode == NULL)
-		return;
-	if (IsA(jtnode, RangeTblRef))
-	{
-		/* nothing to do here */
-	}
-	else if (IsA(jtnode, FromExpr))
-	{
-		FromExpr   *f = (FromExpr *) jtnode;
-		List	   *l;
-
-		foreach(l, f->fromlist)
-			resolvenew_in_jointree(lfirst(l), varno, subtlist);
-		f->quals = ResolveNew(f->quals,
-							  varno, 0, subtlist, CMD_SELECT, 0);
-	}
-	else if (IsA(jtnode, JoinExpr))
-	{
-		JoinExpr   *j = (JoinExpr *) jtnode;
-
-		resolvenew_in_jointree(j->larg, varno, subtlist);
-		resolvenew_in_jointree(j->rarg, varno, subtlist);
-		j->quals = ResolveNew(j->quals,
-							  varno, 0, subtlist, CMD_SELECT, 0);
-
-		/*
-		 * We don't bother to update the colvars list, since it won't be
-		 * used again ...
-		 */
-	}
-	else
-		elog(ERROR, "resolvenew_in_jointree: unexpected node type %d",
-			 nodeTag(jtnode));
-}
-
-/*
- * preprocess_jointree
- *		Attempt to simplify a query's jointree.
- *
- * If we succeed in pulling up a subquery then we might form a jointree
- * in which a FromExpr is a direct child of another FromExpr.  In that
- * case we can consider collapsing the two FromExprs into one.	This is
- * an optional conversion, since the planner will work correctly either
- * way.  But we may find a better plan (at the cost of more planning time)
- * if we merge the two nodes.
- *
- * NOTE: don't try to do this in the same jointree scan that does subquery
- * pullup!	Since we're changing the jointree structure here, that wouldn't
- * work reliably --- see comments for pull_up_subqueries().
- */
-static Node *
-preprocess_jointree(Query *parse, Node *jtnode)
-{
-	if (jtnode == NULL)
-		return NULL;
-	if (IsA(jtnode, RangeTblRef))
-	{
-		/* nothing to do here... */
-	}
-	else if (IsA(jtnode, FromExpr))
-	{
-		FromExpr   *f = (FromExpr *) jtnode;
-		List	   *newlist = NIL;
-		List	   *l;
-
-		foreach(l, f->fromlist)
-		{
-			Node	   *child = (Node *) lfirst(l);
-
-			/* Recursively simplify the child... */
-			child = preprocess_jointree(parse, child);
-			/* Now, is it a FromExpr? */
-			if (child && IsA(child, FromExpr))
-			{
-				/*
-				 * Yes, so do we want to merge it into parent?	Always do
-				 * so if child has just one element (since that doesn't
-				 * make the parent's list any longer).  Otherwise we have
-				 * to be careful about the increase in planning time
-				 * caused by combining the two join search spaces into
-				 * one.  Our heuristic is to merge if the merge will
-				 * produce a join list no longer than GEQO_RELS/2.
-				 * (Perhaps need an additional user parameter?)
-				 */
-				FromExpr   *subf = (FromExpr *) child;
-				int			childlen = length(subf->fromlist);
-				int			myothers = length(newlist) + length(lnext(l));
-
-				if (childlen <= 1 || (childlen + myothers) <= geqo_rels / 2)
-				{
-					newlist = nconc(newlist, subf->fromlist);
-					f->quals = make_and_qual(subf->quals, f->quals);
-				}
-				else
-					newlist = lappend(newlist, child);
-			}
-			else
-				newlist = lappend(newlist, child);
-		}
-		f->fromlist = newlist;
-	}
-	else if (IsA(jtnode, JoinExpr))
-	{
-		JoinExpr   *j = (JoinExpr *) jtnode;
-
-		/* Can't usefully change the JoinExpr, but recurse on children */
-		j->larg = preprocess_jointree(parse, j->larg);
-		j->rarg = preprocess_jointree(parse, j->rarg);
-	}
-	else
-		elog(ERROR, "preprocess_jointree: unexpected node type %d",
-			 nodeTag(jtnode));
-	return jtnode;
-}
-
 /*
  * preprocess_expression
  *		Do subquery_planner's preprocessing work for an expression,
@@ -731,7 +317,7 @@ preprocess_expression(Query *parse, Node *expr, int kind)
 	 * else sublinks expanded out from join aliases wouldn't get processed.
 	 */
 	if (parse->hasJoinRTEs)
-		expr = flatten_join_alias_vars(expr, parse->rtable);
+		expr = flatten_join_alias_vars(parse, expr);
 
 	/*
 	 * Simplify constant expressions.
@@ -748,7 +334,7 @@ preprocess_expression(Query *parse, Node *expr, int kind)
 	 * XXX Is there any value in re-applying eval_const_expressions after
 	 * canonicalize_qual?
 	 */
-	if (kind != EXPRKIND_TARGET)
+	if (kind == EXPRKIND_QUAL)
 	{
 		expr = (Node *) canonicalize_qual((Expr *) expr, true);
 
@@ -760,7 +346,7 @@ preprocess_expression(Query *parse, Node *expr, int kind)
 
 	/* Expand SubLinks to SubPlans */
 	if (parse->hasSubLinks)
-		expr = SS_process_sublinks(expr, (kind != EXPRKIND_TARGET));
+		expr = SS_process_sublinks(expr, (kind == EXPRKIND_QUAL));
 
 	/* Replace uplevel vars with Param nodes */
 	if (PlannerQueryLevel > 1)
@@ -791,7 +377,7 @@ preprocess_qual_conditions(Query *parse, Node *jtnode)
 		foreach(l, f->fromlist)
 			preprocess_qual_conditions(parse, lfirst(l));
 
-		f->quals = preprocess_expression(parse, f->quals, EXPRKIND_WHERE);
+		f->quals = preprocess_expression(parse, f->quals, EXPRKIND_QUAL);
 	}
 	else if (IsA(jtnode, JoinExpr))
 	{
@@ -800,7 +386,7 @@ preprocess_qual_conditions(Query *parse, Node *jtnode)
 		preprocess_qual_conditions(parse, j->larg);
 		preprocess_qual_conditions(parse, j->rarg);
 
-		j->quals = preprocess_expression(parse, j->quals, EXPRKIND_WHERE);
+		j->quals = preprocess_expression(parse, j->quals, EXPRKIND_QUAL);
 	}
 	else
 		elog(ERROR, "preprocess_qual_conditions: unexpected node type %d",
@@ -1251,12 +837,16 @@ grouping_planner(Query *parse, double tuple_fraction)
 		 */
 		if (parse->groupClause)
 		{
+			List   *groupExprs;
+
 			/*
 			 * Always estimate the number of groups.  We can't do this until
 			 * after running query_planner(), either.
 			 */
+			groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+												 parse->targetList);
 			dNumGroups = estimate_num_groups(parse,
-											 parse->groupClause,
+											 groupExprs,
 											 cheapest_path->parent->rows);
 			/* Also want it as a long int --- but 'ware overflow! */
 			numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
@@ -1552,8 +1142,10 @@ grouping_planner(Query *parse, double tuple_fraction)
 	if (parse->sortClause)
 	{
 		if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
-			result_plan = make_sortplan(parse, tlist, result_plan,
-										parse->sortClause);
+			result_plan = (Plan *) make_sort_from_sortclauses(parse,
+															  tlist,
+															  result_plan,
+															  parse->sortClause);
 	}
 
 	/*
@@ -1570,9 +1162,15 @@ grouping_planner(Query *parse, double tuple_fraction)
 		 * comparable to GROUP BY.
 		 */
 		if (!parse->groupClause && !parse->hasAggs)
+		{
+			List   *distinctExprs;
+
+			distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
+													parse->targetList);
 			result_plan->plan_rows = estimate_num_groups(parse,
-														 parse->distinctClause,
+														 distinctExprs,
 														 result_plan->plan_rows);
+		}
 	}
 
 	/*
@@ -1773,47 +1371,6 @@ make_groupsortplan(Query *parse,
 	return (Plan *) make_sort(parse, sort_tlist, subplan, keyno);
 }
 
-/*
- * make_sortplan
- *	  Add a Sort node to implement an explicit ORDER BY clause.
- */
-Plan *
-make_sortplan(Query *parse, List *tlist, Plan *plannode, List *sortcls)
-{
-	List	   *sort_tlist;
-	List	   *i;
-	int			keyno = 0;
-
-	/*
-	 * First make a copy of the tlist so that we don't corrupt the
-	 * original.
-	 */
-	sort_tlist = new_unsorted_tlist(tlist);
-
-	foreach(i, sortcls)
-	{
-		SortClause *sortcl = (SortClause *) lfirst(i);
-		TargetEntry *tle = get_sortgroupclause_tle(sortcl, sort_tlist);
-		Resdom	   *resdom = tle->resdom;
-
-		/*
-		 * Check for the possibility of duplicate order-by clauses --- the
-		 * parser should have removed 'em, but the executor will get
-		 * terribly confused if any get through!
-		 */
-		if (resdom->reskey == 0)
-		{
-			/* OK, insert the ordering info needed by the executor. */
-			resdom->reskey = ++keyno;
-			resdom->reskeyop = sortcl->sortop;
-		}
-	}
-
-	Assert(keyno > 0);
-
-	return (Plan *) make_sort(parse, sort_tlist, plannode, keyno);
-}
-
 /*
  * postprocess_setop_tlist
  *	  Fix up targetlist returned by plan_set_operations().
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 513480c4e20..123b96f1880 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.90 2003/01/15 23:10:32 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.91 2003/01/20 18:54:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,6 +31,7 @@ typedef struct
 	List	   *outer_tlist;
 	List	   *inner_tlist;
 	Index		acceptable_rel;
+	bool		tlists_have_non_vars;
 } join_references_context;
 
 typedef struct
@@ -44,11 +45,13 @@ static void fix_expr_references(Plan *plan, Node *node);
 static bool fix_expr_references_walker(Node *node, void *context);
 static void set_join_references(Join *join, List *rtable);
 static void set_uppernode_references(Plan *plan, Index subvarno);
+static bool targetlist_has_non_vars(List *tlist);
 static List *join_references(List *clauses,
 							 List *rtable,
 							 List *outer_tlist,
 							 List *inner_tlist,
-							 Index acceptable_rel);
+							 Index acceptable_rel,
+							 bool tlists_have_non_vars);
 static Node *join_references_mutator(Node *node,
 						join_references_context *context);
 static Node *replace_vars_with_subplan_refs(Node *node,
@@ -175,7 +178,10 @@ set_plan_references(Plan *plan, List *rtable)
 								rtable,
 								NIL,
 								plan->lefttree->targetlist,
-								(Index) 0);
+								(Index) 0,
+								targetlist_has_non_vars(plan->lefttree->targetlist));
+			fix_expr_references(plan,
+								(Node *) ((Hash *) plan)->hashkeys);
 			break;
 		case T_Material:
 		case T_Sort:
@@ -308,23 +314,30 @@ set_join_references(Join *join, List *rtable)
 	Plan	   *inner_plan = join->plan.righttree;
 	List	   *outer_tlist = outer_plan->targetlist;
 	List	   *inner_tlist = inner_plan->targetlist;
+	bool		tlists_have_non_vars;
+
+	tlists_have_non_vars = targetlist_has_non_vars(outer_tlist) ||
+		targetlist_has_non_vars(inner_tlist);
 
 	/* All join plans have tlist, qual, and joinqual */
 	join->plan.targetlist = join_references(join->plan.targetlist,
 											rtable,
 											outer_tlist,
 											inner_tlist,
-											(Index) 0);
+											(Index) 0,
+											tlists_have_non_vars);
 	join->plan.qual = join_references(join->plan.qual,
 									  rtable,
 									  outer_tlist,
 									  inner_tlist,
-									  (Index) 0);
+									  (Index) 0,
+									  tlists_have_non_vars);
 	join->joinqual = join_references(join->joinqual,
 									 rtable,
 									 outer_tlist,
 									 inner_tlist,
-									 (Index) 0);
+									 (Index) 0,
+									 tlists_have_non_vars);
 
 	/* Now do join-type-specific stuff */
 	if (IsA(join, NestLoop))
@@ -350,12 +363,14 @@ set_join_references(Join *join, List *rtable)
 														  rtable,
 														  outer_tlist,
 														  NIL,
-														  innerrel);
+														  innerrel,
+														  tlists_have_non_vars);
 				innerscan->indxqual = join_references(innerscan->indxqual,
 													  rtable,
 													  outer_tlist,
 													  NIL,
-													  innerrel);
+													  innerrel,
+													  tlists_have_non_vars);
 				/*
 				 * We must fix the inner qpqual too, if it has join clauses
 				 * (this could happen if the index is lossy: some indxquals
@@ -366,7 +381,8 @@ set_join_references(Join *join, List *rtable)
 													   rtable,
 													   outer_tlist,
 													   NIL,
-													   innerrel);
+													   innerrel,
+													   tlists_have_non_vars);
 			}
 		}
 		else if (IsA(inner_plan, TidScan))
@@ -378,7 +394,8 @@ set_join_references(Join *join, List *rtable)
 												 rtable,
 												 outer_tlist,
 												 NIL,
-												 innerrel);
+												 innerrel,
+												 tlists_have_non_vars);
 		}
 	}
 	else if (IsA(join, MergeJoin))
@@ -389,7 +406,8 @@ set_join_references(Join *join, List *rtable)
 										   rtable,
 										   outer_tlist,
 										   inner_tlist,
-										   (Index) 0);
+										   (Index) 0,
+										   tlists_have_non_vars);
 	}
 	else if (IsA(join, HashJoin))
 	{
@@ -399,7 +417,8 @@ set_join_references(Join *join, List *rtable)
 										  rtable,
 										  outer_tlist,
 										  inner_tlist,
-										  (Index) 0);
+										  (Index) 0,
+										  tlists_have_non_vars);
 	}
 }
 
@@ -433,22 +452,7 @@ set_uppernode_references(Plan *plan, Index subvarno)
 	else
 		subplan_targetlist = NIL;
 
-	/*
-	 * Detect whether subplan tlist has any non-Vars (typically it won't
-	 * because it's been flattened).  This allows us to save comparisons
-	 * in common cases.
-	 */
-	tlist_has_non_vars = false;
-	foreach(l, subplan_targetlist)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(l);
-
-		if (tle->expr && !IsA(tle->expr, Var))
-		{
-			tlist_has_non_vars = true;
-			break;
-		}
-	}
+	tlist_has_non_vars = targetlist_has_non_vars(subplan_targetlist);
 
 	output_targetlist = NIL;
 	foreach(l, plan->targetlist)
@@ -473,6 +477,27 @@ set_uppernode_references(Plan *plan, Index subvarno)
 									   tlist_has_non_vars);
 }
 
+/*
+ * targetlist_has_non_vars --- are there any non-Var entries in tlist?
+ *
+ * In most cases, subplan tlists will be "flat" tlists with only Vars.
+ * Checking for this allows us to save comparisons in common cases.
+ */
+static bool
+targetlist_has_non_vars(List *tlist)
+{
+	List   *l;
+
+	foreach(l, tlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+		if (tle->expr && !IsA(tle->expr, Var))
+			return true;
+	}
+	return false;
+}
+
 /*
  * join_references
  *	   Creates a new set of targetlist entries or join qual clauses by
@@ -505,7 +530,8 @@ join_references(List *clauses,
 				List *rtable,
 				List *outer_tlist,
 				List *inner_tlist,
-				Index acceptable_rel)
+				Index acceptable_rel,
+				bool tlists_have_non_vars)
 {
 	join_references_context context;
 
@@ -513,6 +539,7 @@ join_references(List *clauses,
 	context.outer_tlist = outer_tlist;
 	context.inner_tlist = inner_tlist;
 	context.acceptable_rel = acceptable_rel;
+	context.tlists_have_non_vars = tlists_have_non_vars;
 	return (List *) join_references_mutator((Node *) clauses, &context);
 }
 
@@ -554,6 +581,42 @@ join_references_mutator(Node *node,
 		/* No referent found for Var */
 		elog(ERROR, "join_references: variable not in subplan target lists");
 	}
+	/* Try matching more complex expressions too, if tlists have any */
+	if (context->tlists_have_non_vars)
+	{
+		Resdom	   *resdom;
+
+		resdom = tlist_member(node, context->outer_tlist);
+		if (resdom)
+		{
+			/* Found a matching subplan output expression */
+			Var		   *newvar;
+
+			newvar = makeVar(OUTER,
+							 resdom->resno,
+							 resdom->restype,
+							 resdom->restypmod,
+							 0);
+			newvar->varnoold = 0;		/* wasn't ever a plain Var */
+			newvar->varoattno = 0;
+			return (Node *) newvar;
+		}
+		resdom = tlist_member(node, context->inner_tlist);
+		if (resdom)
+		{
+			/* Found a matching subplan output expression */
+			Var		   *newvar;
+
+			newvar = makeVar(INNER,
+							 resdom->resno,
+							 resdom->restype,
+							 resdom->restypmod,
+							 0);
+			newvar->varnoold = 0;		/* wasn't ever a plain Var */
+			newvar->varoattno = 0;
+			return (Node *) newvar;
+		}
+	}
 	return expression_tree_mutator(node,
 								   join_references_mutator,
 								   (void *) context);
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index b30454dcae2..5f420f37250 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.67 2003/01/17 02:01:11 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.68 2003/01/20 18:54:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,9 +23,11 @@
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
 #include "optimizer/subselect.h"
+#include "optimizer/var.h"
 #include "parser/parsetree.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_oper.h"
+#include "parser/parse_relation.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
@@ -62,7 +64,8 @@ typedef struct finalize_primnode_results
 
 
 static List *convert_sublink_opers(List *lefthand, List *operOids,
-								   List *targetlist, List **paramIds);
+								   List *targetlist, int rtindex,
+								   List **righthandIds);
 static bool subplan_is_hashable(SubLink *slink, SubPlan *node);
 static Node *replace_correlation_vars_mutator(Node *node, void *context);
 static Node *process_sublinks_mutator(Node *node, bool *isTopQual);
@@ -289,6 +292,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
 		exprs = convert_sublink_opers(lefthand,
 									  slink->operOids,
 									  plan->targetlist,
+									  0,
 									  &node->paramIds);
 		node->setParam = nconc(node->setParam, listCopy(node->paramIds));
 		PlannerInitPlan = lappend(PlannerInitPlan, node);
@@ -393,6 +397,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
 		node->exprs = convert_sublink_opers(lefthand,
 											slink->operOids,
 											plan->targetlist,
+											0,
 											&node->paramIds);
 
 		/*
@@ -424,26 +429,32 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
 /*
  * convert_sublink_opers: given a lefthand-expressions list and a list of
  * operator OIDs, build a list of actually executable expressions.  The
- * righthand sides of the expressions are Params representing the results
- * of the sub-select.
+ * righthand sides of the expressions are Params or Vars representing the
+ * results of the sub-select.
  *
- * The paramids of the Params created are returned in the *paramIds list.
+ * If rtindex is 0, we build Params to represent the sub-select outputs.
+ * The paramids of the Params created are returned in the *righthandIds list.
+ *
+ * If rtindex is not 0, we build Vars using that rtindex as varno.  The
+ * Vars themselves are returned in *righthandIds (this is a bit of a type
+ * cheat, but we can get away with it).
  */
 static List *
 convert_sublink_opers(List *lefthand, List *operOids,
-					  List *targetlist, List **paramIds)
+					  List *targetlist, int rtindex,
+					  List **righthandIds)
 {
 	List	   *result = NIL;
 	List	   *lst;
 
-	*paramIds = NIL;
+	*righthandIds = NIL;
 
 	foreach(lst, operOids)
 	{
 		Oid			opid = (Oid) lfirsti(lst);
 		Node	   *leftop = lfirst(lefthand);
 		TargetEntry *te = lfirst(targetlist);
-		Param	   *prm;
+		Node	   *rightop;
 		Operator	tup;
 		Form_pg_operator opform;
 		Node	   *left,
@@ -451,12 +462,28 @@ convert_sublink_opers(List *lefthand, List *operOids,
 
 		Assert(!te->resdom->resjunk);
 
-		/* Make the Param node representing the subplan's result */
-		prm = generate_new_param(te->resdom->restype,
-								 te->resdom->restypmod);
-
-		/* Record its ID */
-		*paramIds = lappendi(*paramIds, prm->paramid);
+		if (rtindex)
+		{
+			/* Make the Var node representing the subplan's result */
+			rightop = (Node *) makeVar(rtindex,
+									   te->resdom->resno,
+									   te->resdom->restype,
+									   te->resdom->restypmod,
+									   0);
+			/* Record it for caller */
+			*righthandIds = lappend(*righthandIds, rightop);
+		}
+		else
+		{
+			/* Make the Param node representing the subplan's result */
+			Param	   *prm;
+
+			prm = generate_new_param(te->resdom->restype,
+									 te->resdom->restypmod);
+			/* Record its ID */
+			*righthandIds = lappendi(*righthandIds, prm->paramid);
+			rightop = (Node *) prm;
+		}
 
 		/* Look up the operator to get its declared input types */
 		tup = SearchSysCache(OPEROID,
@@ -473,7 +500,7 @@ convert_sublink_opers(List *lefthand, List *operOids,
 		 * function calls must be inserted for this operator!
 		 */
 		left = make_operand(leftop, exprType(leftop), opform->oprleft);
-		right = make_operand((Node *) prm, prm->paramtype, opform->oprright);
+		right = make_operand(rightop, te->resdom->restype, opform->oprright);
 		result = lappend(result,
 						 make_opclause(opid,
 									   opform->oprresult,
@@ -564,6 +591,96 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
 	return true;
 }
 
+/*
+ * convert_IN_to_join: can we convert an IN SubLink to join style?
+ *
+ * The caller has found a SubLink at the top level of WHERE, but has not
+ * checked the properties of the SubLink at all.  Decide whether it is
+ * appropriate to process this SubLink in join style.  If not, return NULL.
+ * If so, build the qual clause(s) to replace the SubLink, and return them.
+ *
+ * Side effects of a successful conversion include adding the SubLink's
+ * subselect to the query's rangetable and adding an InClauseInfo node to
+ * its in_info_list.
+ */
+Node *
+convert_IN_to_join(Query *parse, SubLink *sublink)
+{
+	Query	   *subselect = (Query *) sublink->subselect;
+	List	   *left_varnos;
+	int			rtindex;
+	RangeTblEntry *rte;
+	RangeTblRef *rtr;
+	InClauseInfo  *ininfo;
+	List	   *exprs;
+
+	/*
+	 * The sublink type must be "= ANY" --- that is, an IN operator.
+	 * (We require the operator name to be unqualified, which may be
+	 * overly paranoid, or may not be.)
+	 */
+	if (sublink->subLinkType != ANY_SUBLINK)
+		return NULL;
+	if (length(sublink->operName) != 1 ||
+		strcmp(strVal(lfirst(sublink->operName)), "=") != 0)
+		return NULL;
+	/*
+	 * The sub-select must not refer to any Vars of the parent query.
+	 * (Vars of higher levels should be okay, though.)
+	 */
+	if (contain_vars_of_level((Node *) subselect, 1))
+		return NULL;
+	/*
+	 * The left-hand expressions must contain some Vars of the current
+	 * query, else it's not gonna be a join.
+	 */
+	left_varnos = pull_varnos((Node *) sublink->lefthand);
+	if (left_varnos == NIL)
+		return NULL;
+	/*
+	 * The left-hand expressions mustn't be volatile.  (Perhaps we should
+	 * test the combining operators, too?  We'd only need to point the
+	 * function directly at the sublink ...)
+	 */
+	if (contain_volatile_functions((Node *) sublink->lefthand))
+		return NULL;
+	/*
+	 * Okay, pull up the sub-select into top range table and jointree.
+	 *
+	 * We rely here on the assumption that the outer query has no references
+	 * to the inner (necessarily true, other than the Vars that we build
+	 * below).  Therefore this is a lot easier than what pull_up_subqueries
+	 * has to go through.
+	 */
+	rte = addRangeTableEntryForSubquery(NULL,
+										subselect,
+										makeAlias("IN_subquery", NIL),
+										false);
+	parse->rtable = lappend(parse->rtable, rte);
+	rtindex = length(parse->rtable);
+	rtr = makeNode(RangeTblRef);
+	rtr->rtindex = rtindex;
+	parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
+	/*
+	 * Now build the InClauseInfo node.
+	 */
+	ininfo = makeNode(InClauseInfo);
+	ininfo->lefthand = left_varnos;
+	ininfo->righthand = makeListi1(rtindex);
+	parse->in_info_list = lcons(ininfo, parse->in_info_list);
+	/*
+	 * Build the result qual expressions.  As a side effect,
+	 * ininfo->sub_targetlist is filled with a list of the Vars
+	 * representing the subselect outputs.
+	 */
+	exprs = convert_sublink_opers(sublink->lefthand,
+								  sublink->operOids,
+								  subselect->targetList,
+								  rtindex,
+								  &ininfo->sub_targetlist);
+	return (Node *) make_ands_explicit(exprs);
+}
+
 /*
  * Replace correlation vars (uplevel vars) with Params.
  */
diff --git a/src/backend/optimizer/prep/Makefile b/src/backend/optimizer/prep/Makefile
index 60925de441f..05d4dd2de3e 100644
--- a/src/backend/optimizer/prep/Makefile
+++ b/src/backend/optimizer/prep/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for optimizer/prep
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/optimizer/prep/Makefile,v 1.13 2002/06/16 00:09:11 momjian Exp $
+#    $Header: /cvsroot/pgsql/src/backend/optimizer/prep/Makefile,v 1.14 2003/01/20 18:54:54 tgl Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -12,7 +12,7 @@ subdir = src/backend/optimizer/prep
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = prepqual.o preptlist.o prepunion.o
+OBJS = prepjointree.o prepqual.o preptlist.o prepunion.o
 
 all: SUBSYS.o
 
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
new file mode 100644
index 00000000000..083528c0490
--- /dev/null
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -0,0 +1,680 @@
+/*-------------------------------------------------------------------------
+ *
+ * prepjointree.c
+ *	  Planner preprocessing for subqueries and join tree manipulation.
+ *
+ *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.1 2003/01/20 18:54:54 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "optimizer/clauses.h"
+#include "optimizer/paths.h"
+#include "optimizer/prep.h"
+#include "optimizer/subselect.h"
+#include "optimizer/var.h"
+#include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
+
+
+static bool is_simple_subquery(Query *subquery);
+static bool has_nullable_targetlist(Query *subquery);
+static void resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist);
+static void fix_in_clause_relids(List *in_info_list, int varno,
+								 Relids subrelids);
+static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
+
+
+/*
+ * pull_up_IN_clauses
+ *		Attempt to pull up top-level IN clauses to be treated like joins.
+ *
+ * A clause "foo IN (sub-SELECT)" appearing at the top level of WHERE can
+ * be processed by pulling the sub-SELECT up to become a rangetable entry
+ * and handling the implied equality comparisons as join operators (with
+ * special join rules).
+ * This optimization *only* works at the top level of WHERE, because
+ * it cannot distinguish whether the IN ought to return FALSE or NULL in
+ * cases involving NULL inputs.  This routine searches for such clauses
+ * and does the necessary parsetree transformations if any are found.
+ *
+ * This routine has to run before preprocess_expression(), so the WHERE
+ * clause is not yet reduced to implicit-AND format.  That means we need
+ * to recursively search through explicit AND clauses, which are
+ * probably only binary ANDs.  We stop as soon as we hit a non-AND item.
+ *
+ * Returns the possibly-modified version of the given qual-tree node.
+ */
+Node *
+pull_up_IN_clauses(Query *parse, Node *node)
+{
+	if (node == NULL)
+		return NULL;
+	if (IsA(node, SubLink))
+	{
+		SubLink	   *sublink = (SubLink *) node;
+		Node	   *subst;
+
+		/* Is it a convertible IN clause?  If not, return it as-is */
+		subst = convert_IN_to_join(parse, sublink);
+		if (subst == NULL)
+			return node;
+		return subst;
+	}
+	if (and_clause(node))
+	{
+		List   *newclauses = NIL;
+		List   *oldclauses;
+
+		foreach(oldclauses, ((BoolExpr *) node)->args)
+		{
+			Node   *oldclause = lfirst(oldclauses);
+
+			newclauses = lappend(newclauses,
+								 pull_up_IN_clauses(parse,
+													oldclause));
+		}
+		return (Node *) make_andclause(newclauses);
+	}
+	/* Stop if not an AND */
+	return node;
+}
+
+/*
+ * pull_up_subqueries
+ *		Look for subqueries in the rangetable that can be pulled up into
+ *		the parent query.  If the subquery has no special features like
+ *		grouping/aggregation then we can merge it into the parent's jointree.
+ *
+ * below_outer_join is true if this jointree node is within the nullable
+ * side of an outer join.  This restricts what we can do.
+ *
+ * A tricky aspect of this code is that if we pull up a subquery we have
+ * to replace Vars that reference the subquery's outputs throughout the
+ * parent query, including quals attached to jointree nodes above the one
+ * we are currently processing!  We handle this by being careful not to
+ * change the jointree structure while recursing: no nodes other than
+ * subquery RangeTblRef entries will be replaced.  Also, we can't turn
+ * ResolveNew loose on the whole jointree, because it'll return a mutated
+ * copy of the tree; we have to invoke it just on the quals, instead.
+ */
+Node *
+pull_up_subqueries(Query *parse, Node *jtnode, bool below_outer_join)
+{
+	if (jtnode == NULL)
+		return NULL;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+		RangeTblEntry *rte = rt_fetch(varno, parse->rtable);
+		Query	   *subquery = rte->subquery;
+
+		/*
+		 * Is this a subquery RTE, and if so, is the subquery simple
+		 * enough to pull up?  (If not, do nothing at this node.)
+		 *
+		 * If we are inside an outer join, only pull up subqueries whose
+		 * targetlists are nullable --- otherwise substituting their tlist
+		 * entries for upper Var references would do the wrong thing (the
+		 * results wouldn't become NULL when they're supposed to). XXX
+		 * This could be improved by generating pseudo-variables for such
+		 * expressions; we'd have to figure out how to get the pseudo-
+		 * variables evaluated at the right place in the modified plan
+		 * tree. Fix it someday.
+		 *
+		 * Note: even if the subquery itself is simple enough, we can't pull
+		 * it up if there is a reference to its whole tuple result.
+		 * Perhaps a pseudo-variable is the answer here too.
+		 */
+		if (rte->rtekind == RTE_SUBQUERY && is_simple_subquery(subquery) &&
+			(!below_outer_join || has_nullable_targetlist(subquery)) &&
+			!contain_whole_tuple_var((Node *) parse, varno, 0))
+		{
+			int			rtoffset;
+			List	   *subtlist;
+			List	   *rt;
+
+			/*
+			 * First, pull up any IN clauses within the subquery's WHERE,
+			 * so that we don't leave unoptimized INs behind.
+			 */
+			if (subquery->hasSubLinks)
+				subquery->jointree->quals = pull_up_IN_clauses(subquery,
+															   subquery->jointree->quals);
+
+			/*
+			 * Now, recursively pull up the subquery's subqueries, so
+			 * that this routine's processing is complete for its jointree
+			 * and rangetable.	NB: if the same subquery is referenced
+			 * from multiple jointree items (which can't happen normally,
+			 * but might after rule rewriting), then we will invoke this
+			 * processing multiple times on that subquery.	OK because
+			 * nothing will happen after the first time.  We do have to be
+			 * careful to copy everything we pull up, however, or risk
+			 * having chunks of structure multiply linked.
+			 *
+			 * Note: 'false' is correct here even if we are within an outer
+			 * join in the upper query; the lower query starts with a clean
+			 * slate for outer-join semantics.
+			 */
+			subquery->jointree = (FromExpr *)
+				pull_up_subqueries(subquery, (Node *) subquery->jointree,
+								   false);
+
+			/*
+			 * Now make a modifiable copy of the subquery that we can run
+			 * OffsetVarNodes and IncrementVarSublevelsUp on.
+			 */
+			subquery = copyObject(subquery);
+
+			/*
+			 * Adjust level-0 varnos in subquery so that we can append its
+			 * rangetable to upper query's.
+			 */
+			rtoffset = length(parse->rtable);
+			OffsetVarNodes((Node *) subquery, rtoffset, 0);
+
+			/*
+			 * Upper-level vars in subquery are now one level closer to their
+			 * parent than before.
+			 */
+			IncrementVarSublevelsUp((Node *) subquery, -1, 1);
+
+			/*
+			 * Replace all of the top query's references to the subquery's
+			 * outputs with copies of the adjusted subtlist items, being
+			 * careful not to replace any of the jointree structure.
+			 * (This'd be a lot cleaner if we could use
+			 * query_tree_mutator.)
+			 */
+			subtlist = subquery->targetList;
+			parse->targetList = (List *)
+				ResolveNew((Node *) parse->targetList,
+						   varno, 0, subtlist, CMD_SELECT, 0);
+			resolvenew_in_jointree((Node *) parse->jointree, varno, subtlist);
+			Assert(parse->setOperations == NULL);
+			parse->havingQual =
+				ResolveNew(parse->havingQual,
+						   varno, 0, subtlist, CMD_SELECT, 0);
+			parse->in_info_list = (List *)
+				ResolveNew((Node *) parse->in_info_list,
+						   varno, 0, subtlist, CMD_SELECT, 0);
+
+			foreach(rt, parse->rtable)
+			{
+				RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
+
+				if (rte->rtekind == RTE_JOIN)
+					rte->joinaliasvars = (List *)
+						ResolveNew((Node *) rte->joinaliasvars,
+								   varno, 0, subtlist, CMD_SELECT, 0);
+			}
+
+			/*
+			 * Now append the adjusted rtable entries to upper query. (We
+			 * hold off until after fixing the upper rtable entries; no
+			 * point in running that code on the subquery ones too.)
+			 */
+			parse->rtable = nconc(parse->rtable, subquery->rtable);
+
+			/*
+			 * Pull up any FOR UPDATE markers, too.  (OffsetVarNodes
+			 * already adjusted the marker values, so just nconc the
+			 * list.)
+			 */
+			parse->rowMarks = nconc(parse->rowMarks, subquery->rowMarks);
+
+			/*
+			 * We also have to fix the relid lists of any parent InClauseInfo
+			 * nodes.  (This could perhaps be done by ResolveNew, but it
+			 * would clutter that routine's API unreasonably.)
+			 */
+			if (parse->in_info_list)
+			{
+				Relids	subrelids;
+
+				subrelids = get_relids_in_jointree((Node *) subquery->jointree);
+				fix_in_clause_relids(parse->in_info_list, varno, subrelids);
+			}
+
+			/*
+			 * And now append any subquery InClauseInfos to our list.
+			 */
+			parse->in_info_list = nconc(parse->in_info_list,
+										subquery->in_info_list);
+
+			/*
+			 * Miscellaneous housekeeping.
+			 */
+			parse->hasSubLinks |= subquery->hasSubLinks;
+			/* subquery won't be pulled up if it hasAggs, so no work there */
+
+			/*
+			 * Return the adjusted subquery jointree to replace the
+			 * RangeTblRef entry in my jointree.
+			 */
+			return (Node *) subquery->jointree;
+		}
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		List	   *l;
+
+		foreach(l, f->fromlist)
+			lfirst(l) = pull_up_subqueries(parse, lfirst(l),
+										   below_outer_join);
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+
+		/* Recurse, being careful to tell myself when inside outer join */
+		switch (j->jointype)
+		{
+			case JOIN_INNER:
+				j->larg = pull_up_subqueries(parse, j->larg,
+											 below_outer_join);
+				j->rarg = pull_up_subqueries(parse, j->rarg,
+											 below_outer_join);
+				break;
+			case JOIN_LEFT:
+				j->larg = pull_up_subqueries(parse, j->larg,
+											 below_outer_join);
+				j->rarg = pull_up_subqueries(parse, j->rarg,
+											 true);
+				break;
+			case JOIN_FULL:
+				j->larg = pull_up_subqueries(parse, j->larg,
+											 true);
+				j->rarg = pull_up_subqueries(parse, j->rarg,
+											 true);
+				break;
+			case JOIN_RIGHT:
+				j->larg = pull_up_subqueries(parse, j->larg,
+											 true);
+				j->rarg = pull_up_subqueries(parse, j->rarg,
+											 below_outer_join);
+				break;
+			case JOIN_UNION:
+
+				/*
+				 * This is where we fail if upper levels of planner
+				 * haven't rewritten UNION JOIN as an Append ...
+				 */
+				elog(ERROR, "UNION JOIN is not implemented yet");
+				break;
+			default:
+				elog(ERROR, "pull_up_subqueries: unexpected join type %d",
+					 j->jointype);
+				break;
+		}
+	}
+	else
+		elog(ERROR, "pull_up_subqueries: unexpected node type %d",
+			 nodeTag(jtnode));
+	return jtnode;
+}
+
+/*
+ * is_simple_subquery
+ *	  Check a subquery in the range table to see if it's simple enough
+ *	  to pull up into the parent query.
+ */
+static bool
+is_simple_subquery(Query *subquery)
+{
+	/*
+	 * Let's just make sure it's a valid subselect ...
+	 */
+	if (!IsA(subquery, Query) ||
+		subquery->commandType != CMD_SELECT ||
+		subquery->resultRelation != 0 ||
+		subquery->into != NULL ||
+		subquery->isPortal)
+		elog(ERROR, "is_simple_subquery: subquery is bogus");
+
+	/*
+	 * Can't currently pull up a query with setops. Maybe after querytree
+	 * redesign...
+	 */
+	if (subquery->setOperations)
+		return false;
+
+	/*
+	 * Can't pull up a subquery involving grouping, aggregation, sorting,
+	 * or limiting.
+	 */
+	if (subquery->hasAggs ||
+		subquery->groupClause ||
+		subquery->havingQual ||
+		subquery->sortClause ||
+		subquery->distinctClause ||
+		subquery->limitOffset ||
+		subquery->limitCount)
+		return false;
+
+	/*
+	 * Don't pull up a subquery that has any set-returning functions in
+	 * its targetlist.	Otherwise we might well wind up inserting
+	 * set-returning functions into places where they mustn't go, such as
+	 * quals of higher queries.
+	 */
+	if (expression_returns_set((Node *) subquery->targetList))
+		return false;
+
+	/*
+	 * Hack: don't try to pull up a subquery with an empty jointree.
+	 * query_planner() will correctly generate a Result plan for a
+	 * jointree that's totally empty, but I don't think the right things
+	 * happen if an empty FromExpr appears lower down in a jointree. Not
+	 * worth working hard on this, just to collapse SubqueryScan/Result
+	 * into Result...
+	 */
+	if (subquery->jointree->fromlist == NIL)
+		return false;
+
+	return true;
+}
+
+/*
+ * has_nullable_targetlist
+ *	  Check a subquery in the range table to see if all the non-junk
+ *	  targetlist items are simple variables (and, hence, will correctly
+ *	  go to NULL when examined above the point of an outer join).
+ *
+ * A possible future extension is to accept strict functions of simple
+ * variables, eg, "x + 1".
+ */
+static bool
+has_nullable_targetlist(Query *subquery)
+{
+	List	   *l;
+
+	foreach(l, subquery->targetList)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+		/* ignore resjunk columns */
+		if (tle->resdom->resjunk)
+			continue;
+
+		/* Okay if tlist item is a simple Var */
+		if (tle->expr && IsA(tle->expr, Var))
+			continue;
+
+		return false;
+	}
+	return true;
+}
+
+/*
+ * Helper routine for pull_up_subqueries: do ResolveNew on every expression
+ * in the jointree, without changing the jointree structure itself.  Ugly,
+ * but there's no other way...
+ */
+static void
+resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist)
+{
+	if (jtnode == NULL)
+		return;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		/* nothing to do here */
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		List	   *l;
+
+		foreach(l, f->fromlist)
+			resolvenew_in_jointree(lfirst(l), varno, subtlist);
+		f->quals = ResolveNew(f->quals,
+							  varno, 0, subtlist, CMD_SELECT, 0);
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+
+		resolvenew_in_jointree(j->larg, varno, subtlist);
+		resolvenew_in_jointree(j->rarg, varno, subtlist);
+		j->quals = ResolveNew(j->quals,
+							  varno, 0, subtlist, CMD_SELECT, 0);
+
+		/*
+		 * We don't bother to update the colvars list, since it won't be
+		 * used again ...
+		 */
+	}
+	else
+		elog(ERROR, "resolvenew_in_jointree: unexpected node type %d",
+			 nodeTag(jtnode));
+}
+
+/*
+ * preprocess_jointree
+ *		Attempt to simplify a query's jointree.
+ *
+ * If we succeed in pulling up a subquery then we might form a jointree
+ * in which a FromExpr is a direct child of another FromExpr.  In that
+ * case we can consider collapsing the two FromExprs into one.	This is
+ * an optional conversion, since the planner will work correctly either
+ * way.  But we may find a better plan (at the cost of more planning time)
+ * if we merge the two nodes.
+ *
+ * NOTE: don't try to do this in the same jointree scan that does subquery
+ * pullup!	Since we're changing the jointree structure here, that wouldn't
+ * work reliably --- see comments for pull_up_subqueries().
+ */
+Node *
+preprocess_jointree(Query *parse, Node *jtnode)
+{
+	if (jtnode == NULL)
+		return NULL;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		/* nothing to do here... */
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		List	   *newlist = NIL;
+		List	   *l;
+
+		foreach(l, f->fromlist)
+		{
+			Node	   *child = (Node *) lfirst(l);
+
+			/* Recursively simplify the child... */
+			child = preprocess_jointree(parse, child);
+			/* Now, is it a FromExpr? */
+			if (child && IsA(child, FromExpr))
+			{
+				/*
+				 * Yes, so do we want to merge it into parent?	Always do
+				 * so if child has just one element (since that doesn't
+				 * make the parent's list any longer).  Otherwise we have
+				 * to be careful about the increase in planning time
+				 * caused by combining the two join search spaces into
+				 * one.  Our heuristic is to merge if the merge will
+				 * produce a join list no longer than GEQO_RELS/2.
+				 * (Perhaps need an additional user parameter?)
+				 */
+				FromExpr   *subf = (FromExpr *) child;
+				int			childlen = length(subf->fromlist);
+				int			myothers = length(newlist) + length(lnext(l));
+
+				if (childlen <= 1 || (childlen + myothers) <= geqo_rels / 2)
+				{
+					newlist = nconc(newlist, subf->fromlist);
+					f->quals = make_and_qual(subf->quals, f->quals);
+				}
+				else
+					newlist = lappend(newlist, child);
+			}
+			else
+				newlist = lappend(newlist, child);
+		}
+		f->fromlist = newlist;
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+
+		/* Can't usefully change the JoinExpr, but recurse on children */
+		j->larg = preprocess_jointree(parse, j->larg);
+		j->rarg = preprocess_jointree(parse, j->rarg);
+	}
+	else
+		elog(ERROR, "preprocess_jointree: unexpected node type %d",
+			 nodeTag(jtnode));
+	return jtnode;
+}
+
+/*
+ * fix_in_clause_relids: update RT-index lists of InClauseInfo nodes
+ *
+ * When we pull up a subquery, any InClauseInfo references to the subquery's
+ * RT index have to be replaced by the list of substituted relids.
+ *
+ * We assume we may modify the InClauseInfo nodes in-place.
+ */
+static void
+fix_in_clause_relids(List *in_info_list, int varno, Relids subrelids)
+{
+	List	   *l;
+
+	foreach(l, in_info_list)
+	{
+		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+		if (intMember(varno, ininfo->lefthand))
+		{
+			ininfo->lefthand = lremovei(varno, ininfo->lefthand);
+			ininfo->lefthand = nconc(ininfo->lefthand, listCopy(subrelids));
+		}
+		if (intMember(varno, ininfo->righthand))
+		{
+			ininfo->righthand = lremovei(varno, ininfo->righthand);
+			ininfo->righthand = nconc(ininfo->righthand, listCopy(subrelids));
+		}
+	}
+}
+
+/*
+ * get_relids_in_jointree: get list of base RT indexes present in a jointree
+ */
+List *
+get_relids_in_jointree(Node *jtnode)
+{
+	Relids		result = NIL;
+
+	if (jtnode == NULL)
+		return result;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+
+		result = makeListi1(varno);
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		List	   *l;
+
+		/*
+		 * Note: we assume it's impossible to see same RT index from more
+		 * than one subtree, so nconc() is OK rather than set_unioni().
+		 */
+		foreach(l, f->fromlist)
+		{
+			result = nconc(result,
+						   get_relids_in_jointree(lfirst(l)));
+		}
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+
+		/* join's own RT index is not wanted in result */
+		result = get_relids_in_jointree(j->larg);
+		result = nconc(result, get_relids_in_jointree(j->rarg));
+	}
+	else
+		elog(ERROR, "get_relids_in_jointree: unexpected node type %d",
+			 nodeTag(jtnode));
+	return result;
+}
+
+/*
+ * get_relids_for_join: get list of base RT indexes making up a join
+ */
+List *
+get_relids_for_join(Query *parse, int joinrelid)
+{
+	Node	   *jtnode;
+
+	jtnode = find_jointree_node_for_rel((Node *) parse->jointree, joinrelid);
+	if (!jtnode)
+		elog(ERROR, "get_relids_for_join: join node %d not found", joinrelid);
+	return get_relids_in_jointree(jtnode);
+}
+
+/*
+ * find_jointree_node_for_rel: locate jointree node for a base or join RT index
+ *
+ * Returns NULL if not found
+ */
+static Node *
+find_jointree_node_for_rel(Node *jtnode, int relid)
+{
+	if (jtnode == NULL)
+		return NULL;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+
+		if (relid == varno)
+			return jtnode;
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		List	   *l;
+
+		/*
+		 * Note: we assume it's impossible to see same RT index from more
+		 * than one subtree, so nconc() is OK rather than set_unioni().
+		 */
+		foreach(l, f->fromlist)
+		{
+			jtnode = find_jointree_node_for_rel(lfirst(l), relid);
+			if (jtnode)
+				return jtnode;
+		}
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+
+		if (relid == j->rtindex)
+			return jtnode;
+		jtnode = find_jointree_node_for_rel(j->larg, relid);
+		if (jtnode)
+			return jtnode;
+		jtnode = find_jointree_node_for_rel(j->rarg, relid);
+		if (jtnode)
+			return jtnode;
+	}
+	else
+		elog(ERROR, "find_jointree_node_for_rel: unexpected node type %d",
+			 nodeTag(jtnode));
+	return NULL;
+}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 807364fac8d..97e4d56a9f4 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.87 2003/01/17 02:01:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.88 2003/01/20 18:54:54 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,6 +62,7 @@ static List *generate_append_tlist(List *colTypes, bool flag,
 					  List *refnames_tlist);
 static Node *adjust_inherited_attrs_mutator(Node *node,
 							   adjust_inherited_attrs_context *context);
+static List *adjust_rtindex_list(List *relids, Index oldrelid, Index newrelid);
 static List *adjust_inherited_tlist(List *tlist, Oid new_relid);
 
 
@@ -239,8 +240,9 @@ generate_union_plan(SetOperationStmt *op, Query *parse,
 
 		tlist = new_unsorted_tlist(tlist);
 		sortList = addAllTargetsToSortList(NIL, tlist);
-		plan = make_sortplan(parse, tlist, plan, sortList);
-		plan = (Plan *) make_unique(tlist, plan, copyObject(sortList));
+		plan = (Plan *) make_sort_from_sortclauses(parse, tlist,
+												   plan, sortList);
+		plan = (Plan *) make_unique(tlist, plan, sortList);
 	}
 	return plan;
 }
@@ -292,7 +294,7 @@ generate_nonunion_plan(SetOperationStmt *op, Query *parse,
 	 */
 	tlist = new_unsorted_tlist(tlist);
 	sortList = addAllTargetsToSortList(NIL, tlist);
-	plan = make_sortplan(parse, tlist, plan, sortList);
+	plan = (Plan *) make_sort_from_sortclauses(parse, tlist, plan, sortList);
 	switch (op->op)
 	{
 		case SETOP_INTERSECT:
@@ -830,6 +832,23 @@ adjust_inherited_attrs_mutator(Node *node,
 			j->rtindex = context->new_rt_index;
 		return (Node *) j;
 	}
+	if (IsA(node, InClauseInfo))
+	{
+		/* Copy the InClauseInfo node with correct mutation of subnodes */
+		InClauseInfo   *ininfo;
+
+		ininfo = (InClauseInfo *) expression_tree_mutator(node,
+										  adjust_inherited_attrs_mutator,
+														  (void *) context);
+		/* now fix InClauseInfo's rtindex lists */
+		ininfo->lefthand = adjust_rtindex_list(ininfo->lefthand,
+											   context->old_rt_index,
+											   context->new_rt_index);
+		ininfo->righthand = adjust_rtindex_list(ininfo->righthand,
+												context->old_rt_index,
+												context->new_rt_index);
+		return (Node *) ininfo;
+	}
 
 	/*
 	 * We have to process RestrictInfo nodes specially.
@@ -856,26 +875,12 @@ adjust_inherited_attrs_mutator(Node *node,
 		/*
 		 * Adjust left/right relids lists too.
 		 */
-		if (intMember(context->old_rt_index, oldinfo->left_relids))
-		{
-			newinfo->left_relids = listCopy(oldinfo->left_relids);
-			newinfo->left_relids = lremovei(context->old_rt_index,
-											newinfo->left_relids);
-			newinfo->left_relids = lconsi(context->new_rt_index,
-										  newinfo->left_relids);
-		}
-		else
-			newinfo->left_relids = oldinfo->left_relids;
-		if (intMember(context->old_rt_index, oldinfo->right_relids))
-		{
-			newinfo->right_relids = listCopy(oldinfo->right_relids);
-			newinfo->right_relids = lremovei(context->old_rt_index,
-											 newinfo->right_relids);
-			newinfo->right_relids = lconsi(context->new_rt_index,
-										   newinfo->right_relids);
-		}
-		else
-			newinfo->right_relids = oldinfo->right_relids;
+		newinfo->left_relids = adjust_rtindex_list(oldinfo->left_relids,
+												   context->old_rt_index,
+												   context->new_rt_index);
+		newinfo->right_relids = adjust_rtindex_list(oldinfo->right_relids,
+													context->old_rt_index,
+													context->new_rt_index);
 
 		newinfo->eval_cost.startup = -1; /* reset these too */
 		newinfo->this_selec = -1;
@@ -922,6 +927,23 @@ adjust_inherited_attrs_mutator(Node *node,
 								   (void *) context);
 }
 
+/*
+ * Substitute newrelid for oldrelid in a list of RT indexes
+ */
+static List *
+adjust_rtindex_list(List *relids, Index oldrelid, Index newrelid)
+{
+	if (intMember(oldrelid, relids))
+	{
+		/* Ensure we have a modifiable copy */
+		relids = listCopy(relids);
+		/* Remove old, add new */
+		relids = lremovei(oldrelid, relids);
+		relids = lconsi(newrelid, relids);
+	}
+	return relids;
+}
+
 /*
  * Adjust the targetlist entries of an inherited UPDATE operation
  *
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index acd17ba87d2..253c9e88138 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.124 2003/01/17 03:25:03 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.125 2003/01/20 18:54:54 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -2200,6 +2200,15 @@ expression_tree_walker(Node *node,
 					return true;
 			}
 			break;
+		case T_InClauseInfo:
+			{
+				InClauseInfo *ininfo = (InClauseInfo *) node;
+
+				if (expression_tree_walker((Node *) ininfo->sub_targetlist,
+										   walker, context))
+					return true;
+			}
+			break;
 		default:
 			elog(ERROR, "expression_tree_walker: Unexpected node type %d",
 				 nodeTag(node));
@@ -2241,6 +2250,8 @@ query_tree_walker(Query *query,
 		return true;
 	if (walker(query->havingQual, context))
 		return true;
+	if (walker(query->in_info_list, context))
+		return true;
 	foreach(rt, query->rtable)
 	{
 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
@@ -2610,6 +2621,16 @@ expression_tree_mutator(Node *node,
 				return (Node *) newnode;
 			}
 			break;
+		case T_InClauseInfo:
+			{
+				InClauseInfo *ininfo = (InClauseInfo *) node;
+				InClauseInfo *newnode;
+
+				FLATCOPY(newnode, ininfo, InClauseInfo);
+				MUTATE(newnode->sub_targetlist, ininfo->sub_targetlist, List *);
+				return (Node *) newnode;
+			}
+			break;
 		default:
 			elog(ERROR, "expression_tree_mutator: Unexpected node type %d",
 				 nodeTag(node));
@@ -2662,6 +2683,7 @@ query_tree_mutator(Query *query,
 	MUTATE(query->jointree, query->jointree, FromExpr *);
 	MUTATE(query->setOperations, query->setOperations, Node *);
 	MUTATE(query->havingQual, query->havingQual, Node *);
+	MUTATE(query->in_info_list, query->in_info_list, List *);
 	foreach(rt, query->rtable)
 	{
 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c
index 0f3cf201908..c202615b1f5 100644
--- a/src/backend/optimizer/util/joininfo.c
+++ b/src/backend/optimizer/util/joininfo.c
@@ -8,37 +8,29 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/joininfo.c,v 1.31 2002/06/20 20:29:31 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/joininfo.c,v 1.32 2003/01/20 18:54:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-
 #include "optimizer/joininfo.h"
 
-static JoinInfo *joininfo_member(List *join_relids, List *joininfo_list);
 
 /*
- * joininfo_member
- *	  Determines whether a node has already been created for a join
- *	  between a set of join relations and the relation described by
- *	  'joininfo_list'.
- *
- * 'join_relids' is a list of relids corresponding to the join relation
- * 'joininfo_list' is the list of joininfo nodes against which this is
- *				checked
- *
- * Returns the corresponding node in 'joininfo_list' if such a node
- * exists.
+ * find_joininfo_node
+ *	  Find the joininfo node within a relation entry corresponding
+ *	  to a join between 'this_rel' and the relations in 'join_relids'.
+ *	  If there is no such node, return NULL.
  *
+ * Returns a joininfo node, or NULL.
  */
-static JoinInfo *
-joininfo_member(List *join_relids, List *joininfo_list)
+JoinInfo *
+find_joininfo_node(RelOptInfo *this_rel, Relids join_relids)
 {
 	List	   *i;
 
-	foreach(i, joininfo_list)
+	foreach(i, this_rel->joininfo)
 	{
 		JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
 
@@ -48,22 +40,19 @@ joininfo_member(List *join_relids, List *joininfo_list)
 	return NULL;
 }
 
-
 /*
- * find_joininfo_node
+ * make_joininfo_node
  *	  Find the joininfo node within a relation entry corresponding
  *	  to a join between 'this_rel' and the relations in 'join_relids'.
  *	  A new node is created and added to the relation entry's joininfo
  *	  field if the desired one can't be found.
  *
  * Returns a joininfo node.
- *
  */
 JoinInfo *
-find_joininfo_node(RelOptInfo *this_rel, Relids join_relids)
+make_joininfo_node(RelOptInfo *this_rel, Relids join_relids)
 {
-	JoinInfo   *joininfo = joininfo_member(join_relids,
-										   this_rel->joininfo);
+	JoinInfo   *joininfo = find_joininfo_node(this_rel, join_relids);
 
 	if (joininfo == NULL)
 	{
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 615d9966973..a5cc94e831b 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.83 2002/12/05 15:50:35 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.84 2003/01/20 18:54:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,8 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/restrictinfo.h"
+#include "utils/memutils.h"
+#include "utils/selfuncs.h"
 
 
 /*****************************************************************************
@@ -149,6 +151,7 @@ set_cheapest(RelOptInfo *parent_rel)
 
 	parent_rel->cheapest_startup_path = cheapest_startup_path;
 	parent_rel->cheapest_total_path = cheapest_total_path;
+	parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
 }
 
 /*
@@ -489,6 +492,111 @@ create_material_path(RelOptInfo *rel, Path *subpath)
 	return pathnode;
 }
 
+/*
+ * create_unique_path
+ *	  Creates a path representing elimination of distinct rows from the
+ *	  input data.
+ *
+ * If used at all, this is likely to be called repeatedly on the same rel;
+ * and the input subpath should always be the same (the cheapest_total path
+ * for the rel).  So we cache the result.
+ */
+UniquePath *
+create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
+{
+	UniquePath *pathnode;
+	Path		sort_path;		/* dummy for result of cost_sort */
+	MemoryContext oldcontext;
+	List	   *sub_targetlist;
+	List	   *l;
+	int			numCols;
+
+	/* Caller made a mistake if subpath isn't cheapest_total */
+	Assert(subpath == rel->cheapest_total_path);
+
+	/* If result already cached, return it */
+	if (rel->cheapest_unique_path)
+		return (UniquePath *) rel->cheapest_unique_path;
+
+	/*
+	 * We must ensure path struct is allocated in same context as parent
+	 * rel; otherwise GEQO memory management causes trouble.  (Compare
+	 * best_inner_indexscan().)
+	 */
+	oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
+
+	pathnode = makeNode(UniquePath);
+
+	/* There is no substructure to allocate, so can switch back right away */
+	MemoryContextSwitchTo(oldcontext);
+
+	pathnode->path.pathtype = T_Unique;
+	pathnode->path.parent = rel;
+
+	/*
+	 * Treat the output as always unsorted, since we don't necessarily have
+	 * pathkeys to represent it.
+	 */
+	pathnode->path.pathkeys = NIL;
+
+	pathnode->subpath = subpath;
+
+	/*
+	 * Try to identify the targetlist that will actually be unique-ified.
+	 * In current usage, this routine is only used for sub-selects of IN
+	 * clauses, so we should be able to find the tlist in in_info_list.
+	 */
+	sub_targetlist = NIL;
+	foreach(l, root->in_info_list)
+	{
+		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+		if (sameseti(ininfo->righthand, rel->relids))
+		{
+			sub_targetlist = ininfo->sub_targetlist;
+			break;
+		}
+	}
+
+	/*
+	 * If we know the targetlist, try to estimate number of result rows;
+	 * otherwise punt.
+	 */
+	if (sub_targetlist)
+	{
+		pathnode->rows = estimate_num_groups(root, sub_targetlist, rel->rows);
+		numCols = length(sub_targetlist);
+	}
+	else
+	{
+		pathnode->rows = rel->rows;
+		numCols = length(rel->targetlist); /* second-best estimate */
+	}
+
+	/*
+	 * Estimate cost for sort+unique implementation
+	 */
+	cost_sort(&sort_path, root, NIL,
+			  subpath->total_cost,
+			  rel->rows,
+			  rel->width);
+	/*
+	 * Charge one cpu_operator_cost per comparison per input tuple. We
+	 * assume all columns get compared at most of the tuples.  (XXX probably
+	 * this is an overestimate.)  This should agree with make_unique.
+	 */
+	sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
+
+	pathnode->use_hash = false;	/* for now */
+
+	pathnode->path.startup_cost = sort_path.startup_cost;
+	pathnode->path.total_cost = sort_path.total_cost;
+
+	rel->cheapest_unique_path = (Path *) pathnode;
+
+	return pathnode;
+}
+
 /*
  * create_subqueryscan_path
  *	  Creates a path corresponding to a sequential scan of a subquery,
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 87207f617cc..144fac75501 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.43 2003/01/15 19:35:44 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.44 2003/01/20 18:54:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -140,6 +140,7 @@ make_base_rel(Query *root, int relid)
 	rel->pathlist = NIL;
 	rel->cheapest_startup_path = NULL;
 	rel->cheapest_total_path = NULL;
+	rel->cheapest_unique_path = NULL;
 	rel->pruneable = true;
 	rel->rtekind = rte->rtekind;
 	rel->indexlist = NIL;
@@ -244,6 +245,7 @@ find_join_rel(Query *root, Relids relids)
  *	  Returns relation entry corresponding to the union of two given rels,
  *	  creating a new relation entry if none already exists.
  *
+ * 'joinrelids' is the Relids list that uniquely identifies the join
  * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be
  *		joined
  * 'jointype': type of join (inner/outer)
@@ -256,27 +258,20 @@ find_join_rel(Query *root, Relids relids)
  */
 RelOptInfo *
 build_join_rel(Query *root,
+			   List *joinrelids,
 			   RelOptInfo *outer_rel,
 			   RelOptInfo *inner_rel,
 			   JoinType jointype,
 			   List **restrictlist_ptr)
 {
-	List	   *joinrelids;
 	RelOptInfo *joinrel;
 	List	   *restrictlist;
 	List	   *new_outer_tlist;
 	List	   *new_inner_tlist;
 
-	/* We should never try to join two overlapping sets of rels. */
-	Assert(nonoverlap_setsi(outer_rel->relids, inner_rel->relids));
-
 	/*
 	 * See if we already have a joinrel for this set of base rels.
-	 *
-	 * nconc(listCopy(x), y) is an idiom for making a new list without
-	 * changing either input list.
 	 */
-	joinrelids = nconc(listCopy(outer_rel->relids), inner_rel->relids);
 	joinrel = find_join_rel(root, joinrelids);
 
 	if (joinrel)
@@ -299,13 +294,14 @@ build_join_rel(Query *root,
 	 */
 	joinrel = makeNode(RelOptInfo);
 	joinrel->reloptkind = RELOPT_JOINREL;
-	joinrel->relids = joinrelids;
+	joinrel->relids = listCopy(joinrelids);
 	joinrel->rows = 0;
 	joinrel->width = 0;
 	joinrel->targetlist = NIL;
 	joinrel->pathlist = NIL;
 	joinrel->cheapest_startup_path = NULL;
 	joinrel->cheapest_total_path = NULL;
+	joinrel->cheapest_unique_path = NULL;
 	joinrel->pruneable = true;
 	joinrel->rtekind = RTE_JOIN;
 	joinrel->indexlist = NIL;
@@ -557,7 +553,7 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
 			 */
 			JoinInfo   *new_joininfo;
 
-			new_joininfo = find_joininfo_node(joinrel, new_unjoined_relids);
+			new_joininfo = make_joininfo_node(joinrel, new_unjoined_relids);
 			new_joininfo->jinfo_restrictinfo =
 				set_union(new_joininfo->jinfo_restrictinfo,
 						  joininfo->jinfo_restrictinfo);
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 0d268b8e40c..6f90ea87568 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.53 2002/12/12 15:49:32 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.54 2003/01/20 18:54:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -255,3 +255,25 @@ get_sortgroupclause_expr(SortClause *sortClause, List *targetList)
 
 	return (Node *) tle->expr;
 }
+
+/*
+ * get_sortgrouplist_exprs
+ *		Given a list of SortClauses (or GroupClauses), build a list
+ *		of the referenced targetlist expressions.
+ */
+List *
+get_sortgrouplist_exprs(List *sortClauses, List *targetList)
+{
+	List   *result = NIL;
+	List   *l;
+
+	foreach(l, sortClauses)
+	{
+		SortClause *sortcl = (SortClause *) lfirst(l);
+		Node	   *sortexpr;
+
+		sortexpr = get_sortgroupclause_expr(sortcl, targetList);
+		result = lappend(result, sortexpr);
+	}
+	return result;
+}
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index 1eb9d9774ed..729ded51323 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.46 2003/01/17 02:01:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.47 2003/01/20 18:54:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -16,6 +16,7 @@
 
 #include "nodes/plannodes.h"
 #include "optimizer/clauses.h"
+#include "optimizer/prep.h"
 #include "optimizer/var.h"
 #include "parser/parsetree.h"
 
@@ -41,7 +42,7 @@ typedef struct
 
 typedef struct
 {
-	List	   *rtable;
+	Query	   *root;
 	int			sublevels_up;
 } flatten_join_alias_vars_context;
 
@@ -50,10 +51,13 @@ static bool pull_varnos_walker(Node *node,
 static bool contain_var_reference_walker(Node *node,
 							 contain_var_reference_context *context);
 static bool contain_var_clause_walker(Node *node, void *context);
+static bool contain_vars_of_level_walker(Node *node, int *sublevels_up);
+static bool contain_vars_above_level_walker(Node *node, int *sublevels_up);
 static bool pull_var_clause_walker(Node *node,
 					   pull_var_clause_context *context);
 static Node *flatten_join_alias_vars_mutator(Node *node,
 								flatten_join_alias_vars_context *context);
+static List *alias_rtindex_list(Query *root, List *rtlist);
 
 
 /*
@@ -224,6 +228,103 @@ contain_var_clause_walker(Node *node, void *context)
 	return expression_tree_walker(node, contain_var_clause_walker, context);
 }
 
+/*
+ * contain_vars_of_level
+ *	  Recursively scan a clause to discover whether it contains any Var nodes
+ *	  of the specified query level.
+ *
+ *	  Returns true if any such Var found.
+ *
+ * Will recurse into sublinks.  Also, may be invoked directly on a Query.
+ */
+bool
+contain_vars_of_level(Node *node, int levelsup)
+{
+	int		sublevels_up = levelsup;
+
+	return query_or_expression_tree_walker(node,
+										   contain_vars_of_level_walker,
+										   (void *) &sublevels_up,
+										   0);
+}
+
+static bool
+contain_vars_of_level_walker(Node *node, int *sublevels_up)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, Var))
+	{
+		if (((Var *) node)->varlevelsup == *sublevels_up)
+			return true;		/* abort tree traversal and return true */
+	}
+	if (IsA(node, Query))
+	{
+		/* Recurse into subselects */
+		bool		result;
+
+		(*sublevels_up)++;
+		result = query_tree_walker((Query *) node,
+								   contain_vars_of_level_walker,
+								   (void *) sublevels_up,
+								   0);
+		(*sublevels_up)--;
+		return result;
+	}
+	return expression_tree_walker(node,
+								  contain_vars_of_level_walker,
+								  (void *) sublevels_up);
+}
+
+/*
+ * contain_vars_above_level
+ *	  Recursively scan a clause to discover whether it contains any Var nodes
+ *	  above the specified query level.  (For example, pass zero to detect
+ *	  all nonlocal Vars.)
+ *
+ *	  Returns true if any such Var found.
+ *
+ * Will recurse into sublinks.  Also, may be invoked directly on a Query.
+ */
+bool
+contain_vars_above_level(Node *node, int levelsup)
+{
+	int		sublevels_up = levelsup;
+
+	return query_or_expression_tree_walker(node,
+										   contain_vars_above_level_walker,
+										   (void *) &sublevels_up,
+										   0);
+}
+
+static bool
+contain_vars_above_level_walker(Node *node, int *sublevels_up)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, Var))
+	{
+		if (((Var *) node)->varlevelsup > *sublevels_up)
+			return true;		/* abort tree traversal and return true */
+	}
+	if (IsA(node, Query))
+	{
+		/* Recurse into subselects */
+		bool		result;
+
+		(*sublevels_up)++;
+		result = query_tree_walker((Query *) node,
+								   contain_vars_above_level_walker,
+								   (void *) sublevels_up,
+								   0);
+		(*sublevels_up)--;
+		return result;
+	}
+	return expression_tree_walker(node,
+								  contain_vars_above_level_walker,
+								  (void *) sublevels_up);
+}
+
 
 /*
  * pull_var_clause
@@ -277,11 +378,11 @@ pull_var_clause_walker(Node *node, pull_var_clause_context *context)
  * to be applied directly to a Query node.
  */
 Node *
-flatten_join_alias_vars(Node *node, List *rtable)
+flatten_join_alias_vars(Query *root, Node *node)
 {
 	flatten_join_alias_vars_context context;
 
-	context.rtable = rtable;
+	context.root = root;
 	context.sublevels_up = 0;
 
 	return flatten_join_alias_vars_mutator(node, &context);
@@ -301,7 +402,7 @@ flatten_join_alias_vars_mutator(Node *node,
 
 		if (var->varlevelsup != context->sublevels_up)
 			return node;		/* no need to copy, really */
-		rte = rt_fetch(var->varno, context->rtable);
+		rte = rt_fetch(var->varno, context->root->rtable);
 		if (rte->rtekind != RTE_JOIN)
 			return node;
 		Assert(var->varattno > 0);
@@ -309,6 +410,24 @@ flatten_join_alias_vars_mutator(Node *node,
 		/* expand it; recurse in case join input is itself a join */
 		return flatten_join_alias_vars_mutator(newvar, context);
 	}
+	if (IsA(node, InClauseInfo))
+	{
+		/* Copy the InClauseInfo node with correct mutation of subnodes */
+		InClauseInfo   *ininfo;
+
+		ininfo = (InClauseInfo *) expression_tree_mutator(node,
+														  flatten_join_alias_vars_mutator,
+														  (void *) context);
+		/* now fix InClauseInfo's rtindex lists */
+		if (context->sublevels_up == 0)
+		{
+			ininfo->lefthand = alias_rtindex_list(context->root,
+												  ininfo->lefthand);
+			ininfo->righthand = alias_rtindex_list(context->root,
+												   ininfo->righthand);
+		}
+		return (Node *) ininfo;
+	}
 
 	if (IsA(node, Query))
 	{
@@ -329,3 +448,27 @@ flatten_join_alias_vars_mutator(Node *node,
 	return expression_tree_mutator(node, flatten_join_alias_vars_mutator,
 								   (void *) context);
 }
+
+/*
+ * alias_rtindex_list: in a list of RT indexes, replace joins by their
+ * underlying base relids
+ */
+static List *
+alias_rtindex_list(Query *root, List *rtlist)
+{
+	List   *result = NIL;
+	List   *l;
+
+	foreach(l, rtlist)
+	{
+		int		rtindex = lfirsti(l);
+		RangeTblEntry *rte;
+
+		rte = rt_fetch(rtindex, root->rtable);
+		if (rte->rtekind == RTE_JOIN)
+			result = nconc(result, get_relids_for_join(root, rtindex));
+		else
+			result = lappendi(result, rtindex);
+	}
+	return result;
+}
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 4a4f6824b78..44604289663 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteManip.c,v 1.69 2003/01/17 02:01:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteManip.c,v 1.70 2003/01/20 18:54:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -90,8 +90,8 @@ checkExprHasSubLink_walker(Node *node, void *context)
  *
  * Find all Var nodes in the given tree with varlevelsup == sublevels_up,
  * and increment their varno fields (rangetable indexes) by 'offset'.
- * The varnoold fields are adjusted similarly.	Also, RangeTblRef and
- * JoinExpr nodes in join trees and setOp trees are adjusted.
+ * The varnoold fields are adjusted similarly.	Also, adjust other nodes
+ * that contain rangetable indexes, such as RangeTblRef and JoinExpr.
  *
  * NOTE: although this has the form of a walker, we cheat and modify the
  * nodes in-place.	The given expression tree should have been copied
@@ -137,6 +137,25 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context)
 			j->rtindex += context->offset;
 		/* fall through to examine children */
 	}
+	if (IsA(node, InClauseInfo))
+	{
+		InClauseInfo   *ininfo = (InClauseInfo *) node;
+
+		if (context->sublevels_up == 0)
+		{
+			List	*rt;
+
+			foreach(rt, ininfo->lefthand)
+			{
+				lfirsti(rt) += context->offset;
+			}
+			foreach(rt, ininfo->righthand)
+			{
+				lfirsti(rt) += context->offset;
+			}
+		}
+		/* fall through to examine children */
+	}
 	if (IsA(node, Query))
 	{
 		/* Recurse into subselects */
@@ -196,8 +215,8 @@ OffsetVarNodes(Node *node, int offset, int sublevels_up)
  *
  * Find all Var nodes in the given tree belonging to a specific relation
  * (identified by sublevels_up and rt_index), and change their varno fields
- * to 'new_index'.	The varnoold fields are changed too.  Also, RangeTblRef
- * and JoinExpr nodes in join trees and setOp trees are adjusted.
+ * to 'new_index'.	The varnoold fields are changed too.  Also, adjust other
+ * nodes that contain rangetable indexes, such as RangeTblRef and JoinExpr.
  *
  * NOTE: although this has the form of a walker, we cheat and modify the
  * nodes in-place.	The given expression tree should have been copied
@@ -247,6 +266,27 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
 			j->rtindex = context->new_index;
 		/* fall through to examine children */
 	}
+	if (IsA(node, InClauseInfo))
+	{
+		InClauseInfo   *ininfo = (InClauseInfo *) node;
+
+		if (context->sublevels_up == 0)
+		{
+			List	*rt;
+
+			foreach(rt, ininfo->lefthand)
+			{
+				if (lfirsti(rt) == context->rt_index)
+					lfirsti(rt) = context->new_index;
+			}
+			foreach(rt, ininfo->righthand)
+			{
+				if (lfirsti(rt) == context->rt_index)
+					lfirsti(rt) = context->new_index;
+			}
+		}
+		/* fall through to examine children */
+	}
 	if (IsA(node, Query))
 	{
 		/* Recurse into subselects */
@@ -423,6 +463,16 @@ rangeTableEntry_used_walker(Node *node,
 			return true;
 		/* fall through to examine children */
 	}
+	if (IsA(node, InClauseInfo))
+	{
+		InClauseInfo   *ininfo = (InClauseInfo *) node;
+
+		if (context->sublevels_up == 0 &&
+			(intMember(context->rt_index, ininfo->lefthand) ||
+			 intMember(context->rt_index, ininfo->righthand)))
+			return true;
+		/* fall through to examine children */
+	}
 	if (IsA(node, Query))
 	{
 		/* Recurse into subselects */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index fe6f38eee85..42ad9f5f94b 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.126 2003/01/15 19:35:44 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.127 2003/01/20 18:54:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1825,8 +1825,7 @@ mergejoinscansel(Query *root, Node *clause,
  *
  * Inputs:
  *	root - the query
- *	groupClauses - list of GroupClauses (or SortClauses for the DISTINCT
- *		case, but those are equivalent structs)
+ *	groupExprs - list of expressions being grouped by
  *	input_rows - number of rows estimated to arrive at the group/unique
  *		filter step
  *
@@ -1867,7 +1866,7 @@ mergejoinscansel(Query *root, Node *clause,
  * do better).
  */
 double
-estimate_num_groups(Query *root, List *groupClauses, double input_rows)
+estimate_num_groups(Query *root, List *groupExprs, double input_rows)
 {
 	List	   *allvars = NIL;
 	List	   *varinfos = NIL;
@@ -1879,14 +1878,12 @@ estimate_num_groups(Query *root, List *groupClauses, double input_rows)
 	} MyVarInfo;
 
 	/* We should not be called unless query has GROUP BY (or DISTINCT) */
-	Assert(groupClauses != NIL);
+	Assert(groupExprs != NIL);
 
 	/* Step 1: get the unique Vars used */
-	foreach(l, groupClauses)
+	foreach(l, groupExprs)
 	{
-		GroupClause *grpcl = (GroupClause *) lfirst(l);
-		Node	   *groupexpr = get_sortgroupclause_expr(grpcl,
-														 root->targetList);
+		Node	   *groupexpr = (Node *) lfirst(l);
 		List	   *varshere;
 
 		varshere = pull_var_clause(groupexpr, false);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index f119b5111db..bf8bb1719ed 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: nodes.h,v 1.134 2002/12/16 16:22:46 tgl Exp $
+ * $Id: nodes.h,v 1.135 2003/01/20 18:55:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -152,10 +152,12 @@ typedef enum NodeTag
 	T_AppendPath,
 	T_ResultPath,
 	T_MaterialPath,
+	T_UniquePath,
 	T_PathKeyItem,
 	T_RestrictInfo,
 	T_JoinInfo,
 	T_InnerIndexscanInfo,
+	T_InClauseInfo,
 
 	/*
 	 * TAGS FOR MEMORY NODES (memnodes.h)
@@ -408,11 +410,20 @@ typedef enum JoinType
 	 * join in the executor.  (The planner must convert it to an Append
 	 * plan.)
 	 */
-	JOIN_UNION
+	JOIN_UNION,
 
 	/*
-	 * Eventually we will have some additional join types for efficient
-	 * support of queries like WHERE foo IN (SELECT bar FROM ...).
+	 * These are used for queries like WHERE foo IN (SELECT bar FROM ...).
+	 * Only JOIN_IN is actually implemented in the executor; the others
+	 * are defined for internal use in the planner.
+	 */
+	JOIN_IN,					/* at most one result per outer row */
+	JOIN_REVERSE_IN,			/* at most one result per inner row */
+	JOIN_UNIQUE_OUTER,			/* outer path must be made unique */
+	JOIN_UNIQUE_INNER			/* inner path must be made unique */
+
+	/*
+	 * We might need additional join types someday.
 	 */
 } JoinType;
 
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 6ca3894b0da..9bbee593bf4 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parsenodes.h,v 1.225 2003/01/06 00:31:45 tgl Exp $
+ * $Id: parsenodes.h,v 1.226 2003/01/20 18:55:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -101,6 +101,7 @@ typedef struct Query
 	List	   *join_rel_list;	/* list of join-relation RelOptInfos */
 	List	   *equi_key_list;	/* list of lists of equijoined
 								 * PathKeyItems */
+	List	   *in_info_list;	/* list of InClauseInfos */
 	List	   *query_pathkeys; /* desired pathkeys for query_planner() */
 	bool		hasJoinRTEs;	/* true if any RTEs are RTE_JOIN kind */
 } Query;
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 9ef4fab957e..d3b01b7fed0 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_list.h,v 1.30 2002/11/24 21:52:15 tgl Exp $
+ * $Id: pg_list.h,v 1.31 2003/01/20 18:55:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -145,7 +145,8 @@ extern List *set_intersecti(List *list1, List *list2);
 
 extern bool equali(List *list1, List *list2);
 extern bool sameseti(List *list1, List *list2);
-extern bool nonoverlap_setsi(List *list1, List *list2);
+extern bool overlap_setsi(List *list1, List *list2);
+#define nonoverlap_setsi(list1, list2) (!overlap_setsi(list1, list2))
 extern bool is_subseti(List *list1, List *list2);
 
 extern void freeList(List *list);
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index a21debe02f9..c2d8970234f 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: relation.h,v 1.76 2003/01/15 19:35:44 tgl Exp $
+ * $Id: relation.h,v 1.77 2003/01/20 18:55:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -97,6 +97,8 @@ typedef struct QualCost
  *								(regardless of its ordering)
  *		cheapest_total_path - the pathlist member with lowest total cost
  *							  (regardless of its ordering)
+ *		cheapest_unique_path - for caching cheapest path to produce unique
+ *							   (no duplicates) output from relation
  *		pruneable - flag to let the planner know whether it can prune the
  *					pathlist of this RelOptInfo or not.
  *
@@ -183,6 +185,7 @@ typedef struct RelOptInfo
 	List	   *pathlist;		/* Path structures */
 	struct Path *cheapest_startup_path;
 	struct Path *cheapest_total_path;
+	struct Path *cheapest_unique_path;
 	bool		pruneable;
 
 	/* information about a base rel (not set for join rels!) */
@@ -403,6 +406,23 @@ typedef struct MaterialPath
 	Path	   *subpath;
 } MaterialPath;
 
+/*
+ * UniquePath represents elimination of distinct rows from the output of
+ * its subpath.
+ *
+ * This is unlike the other Path nodes in that it can actually generate
+ * two different plans: either hash-based or sort-based implementation.
+ * The decision is sufficiently localized that it's not worth having two
+ * separate Path node types.
+ */
+typedef struct UniquePath
+{
+	Path		path;
+	Path	   *subpath;
+	bool		use_hash;
+	double		rows;			/* estimated number of result tuples */
+} UniquePath;
+
 /*
  * All join-type paths share these fields.
  */
@@ -649,4 +669,25 @@ typedef struct InnerIndexscanInfo
 	Path	   *best_innerpath;	/* best inner indexscan, or NULL if none */
 } InnerIndexscanInfo;
 
+/*
+ * IN clause info.
+ *
+ * When we convert top-level IN quals into join operations, we must restrict
+ * the order of joining and use special join methods at some join points.
+ * We record information about each such IN clause in an InClauseInfo struct.
+ * These structs are kept in the Query node's in_info_list.
+ */
+
+typedef struct InClauseInfo
+{
+	NodeTag		type;
+	List	   *lefthand;		/* base relids in lefthand expressions */
+	List	   *righthand;		/* base relids coming from the subselect */
+	List	   *sub_targetlist;	/* targetlist of original RHS subquery */
+	/*
+	 * Note: sub_targetlist is just a list of Vars or expressions;
+	 * it does not contain TargetEntry nodes.
+	 */
+} InClauseInfo;
+
 #endif   /* RELATION_H */
diff --git a/src/include/optimizer/joininfo.h b/src/include/optimizer/joininfo.h
index f17e278238c..37131b722d2 100644
--- a/src/include/optimizer/joininfo.h
+++ b/src/include/optimizer/joininfo.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: joininfo.h,v 1.21 2002/06/20 20:29:51 momjian Exp $
+ * $Id: joininfo.h,v 1.22 2003/01/20 18:55:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,5 +17,6 @@
 #include "nodes/relation.h"
 
 extern JoinInfo *find_joininfo_node(RelOptInfo *this_rel, List *join_relids);
+extern JoinInfo *make_joininfo_node(RelOptInfo *this_rel, List *join_relids);
 
 #endif   /* JOININFO_H */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 77ed27e7e55..759b18c2499 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pathnode.h,v 1.47 2003/01/15 19:35:47 tgl Exp $
+ * $Id: pathnode.h,v 1.48 2003/01/20 18:55:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,6 +38,8 @@ extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths);
 extern ResultPath *create_result_path(RelOptInfo *rel, Path *subpath,
 									  List *constantqual);
 extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
+extern UniquePath *create_unique_path(Query *root, RelOptInfo *rel,
+									  Path *subpath);
 extern Path *create_subqueryscan_path(RelOptInfo *rel);
 extern Path *create_functionscan_path(Query *root, RelOptInfo *rel);
 
@@ -75,6 +77,7 @@ extern void build_base_rel(Query *root, int relid);
 extern RelOptInfo *build_other_rel(Query *root, int relid);
 extern RelOptInfo *find_base_rel(Query *root, int relid);
 extern RelOptInfo *build_join_rel(Query *root,
+			   List *joinrelids,
 			   RelOptInfo *outer_rel,
 			   RelOptInfo *inner_rel,
 			   JoinType jointype,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 66925931609..cf9c2ddeb64 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: planmain.h,v 1.66 2003/01/15 23:10:32 tgl Exp $
+ * $Id: planmain.h,v 1.67 2003/01/20 18:55:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,6 +32,8 @@ extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
 extern Append *make_append(List *appendplans, bool isTarget, List *tlist);
 extern Sort *make_sort(Query *root, List *tlist,
 		  Plan *lefttree, int keycount);
+extern Sort *make_sort_from_sortclauses(Query *root, List *tlist,
+										Plan *lefttree, List *sortcls);
 extern Agg *make_agg(Query *root, List *tlist, List *qual,
 					 AggStrategy aggstrategy,
 					 int numGroupCols, AttrNumber *grpColIdx,
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index f49583a7ef3..16885b2f138 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: planner.h,v 1.24 2002/06/20 20:29:51 momjian Exp $
+ * $Id: planner.h,v 1.25 2003/01/20 18:55:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,7 +21,4 @@
 extern Plan *planner(Query *parse);
 extern Plan *subquery_planner(Query *parse, double tuple_fraction);
 
-extern Plan *make_sortplan(Query *parse, List *tlist,
-			  Plan *plannode, List *sortcls);
-
 #endif   /* PLANNER_H */
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
index 1bb64af3ae5..17ecb4d593f 100644
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: prep.h,v 1.33 2002/08/29 16:03:49 tgl Exp $
+ * $Id: prep.h,v 1.34 2003/01/20 18:55:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,16 @@
 #include "nodes/parsenodes.h"
 #include "nodes/plannodes.h"
 
+/*
+ * prototypes for prepjointree.c
+ */
+extern Node *pull_up_IN_clauses(Query *parse, Node *node);
+extern Node *pull_up_subqueries(Query *parse, Node *jtnode,
+				   bool below_outer_join);
+extern Node *preprocess_jointree(Query *parse, Node *jtnode);
+extern List *get_relids_in_jointree(Node *jtnode);
+extern List *get_relids_for_join(Query *parse, int joinrelid);
+
 /*
  * prototypes for prepqual.c
  */
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 8fead9929f6..2e6a4640684 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -2,6 +2,11 @@
  *
  * subselect.h
  *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: subselect.h,v 1.17 2003/01/20 18:55:05 tgl Exp $
+ *
  *-------------------------------------------------------------------------
  */
 #ifndef SUBSELECT_H
@@ -14,8 +19,9 @@ extern List *PlannerInitPlan;	/* init subplans for current query */
 extern List *PlannerParamVar;	/* to get Var from Param->paramid */
 extern int	PlannerPlanId;		/* to assign unique ID to subquery plans */
 
-extern List *SS_finalize_plan(Plan *plan, List *rtable);
+extern Node *convert_IN_to_join(Query *parse, SubLink *sublink);
 extern Node *SS_replace_correlation_vars(Node *expr);
 extern Node *SS_process_sublinks(Node *expr, bool isQual);
+extern List *SS_finalize_plan(Plan *plan, List *rtable);
 
 #endif   /* SUBSELECT_H */
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 7b82b5ae291..b38f4016f98 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tlist.h,v 1.32 2002/06/20 20:29:51 momjian Exp $
+ * $Id: tlist.h,v 1.33 2003/01/20 18:55:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,5 +32,7 @@ extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause,
 						List *targetList);
 extern Node *get_sortgroupclause_expr(SortClause *sortClause,
 						 List *targetList);
+extern List *get_sortgrouplist_exprs(List *sortClauses,
+						 List *targetList);
 
 #endif   /* TLIST_H */
diff --git a/src/include/optimizer/var.h b/src/include/optimizer/var.h
index 07b8b311d07..b207acac593 100644
--- a/src/include/optimizer/var.h
+++ b/src/include/optimizer/var.h
@@ -7,14 +7,14 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: var.h,v 1.24 2003/01/15 19:35:47 tgl Exp $
+ * $Id: var.h,v 1.25 2003/01/20 18:55:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef VAR_H
 #define VAR_H
 
-#include "nodes/primnodes.h"
+#include "nodes/parsenodes.h"
 
 
 extern List *pull_varnos(Node *node);
@@ -22,7 +22,9 @@ extern bool contain_var_reference(Node *node, int varno, int varattno,
 					  int levelsup);
 extern bool contain_whole_tuple_var(Node *node, int varno, int levelsup);
 extern bool contain_var_clause(Node *node);
+extern bool contain_vars_of_level(Node *node, int levelsup);
+extern bool contain_vars_above_level(Node *node, int levelsup);
 extern List *pull_var_clause(Node *node, bool includeUpperVars);
-extern Node *flatten_join_alias_vars(Node *node, List *rtable);
+extern Node *flatten_join_alias_vars(Query *root, Node *node);
 
 #endif   /* VAR_H */
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 49f3bc7e005..037c2b2f5e3 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: selfuncs.h,v 1.10 2002/11/19 23:22:00 tgl Exp $
+ * $Id: selfuncs.h,v 1.11 2003/01/20 18:55:07 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,7 +75,7 @@ extern void mergejoinscansel(Query *root, Node *clause,
 				 Selectivity *leftscan,
 				 Selectivity *rightscan);
 
-extern double estimate_num_groups(Query *root, List *groupClauses,
+extern double estimate_num_groups(Query *root, List *groupExprs,
 								  double input_rows);
 
 extern Datum btcostestimate(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 5a2ef11c21b..8debffe00ba 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -58,10 +58,10 @@ SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
  six | Uncorrelated Field 
 -----+--------------------
      |                  1
-     |                  2
-     |                  3
      |                  1
      |                  2
+     |                  2
+     |                  3
      |                  3
 (6 rows)
 
@@ -71,10 +71,10 @@ SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
  six | Uncorrelated Field 
 -----+--------------------
      |                  1
-     |                  2
-     |                  3
      |                  1
      |                  2
+     |                  2
+     |                  3
      |                  3
 (6 rows)
 
@@ -134,10 +134,10 @@ SELECT '' AS five, f1 AS "Correlated Field"
                      WHERE f3 IS NOT NULL);
  five | Correlated Field 
 ------+------------------
-      |                2
-      |                3
       |                1
       |                2
+      |                2
+      |                3
       |                3
 (5 rows)
 
-- 
GitLab