From e3b9852728902bc816bf02574a87eda9a0ca91a1 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 20 Dec 2005 02:30:36 +0000
Subject: [PATCH] Teach planner how to rearrange join order for some classes of
 OUTER JOIN. Per my recent proposal.  I ended up basing the implementation on
 the existing mechanism for enforcing valid join orders of IN joins --- the
 rules for valid outer-join orders are somewhat similar.

---
 doc/src/sgml/config.sgml                  |  43 +--
 doc/src/sgml/perform.sgml                 |  41 +-
 src/backend/nodes/copyfuncs.c             |  21 +-
 src/backend/nodes/equalfuncs.c            |  16 +-
 src/backend/nodes/outfuncs.c              |  18 +-
 src/backend/optimizer/README              | 119 ++++--
 src/backend/optimizer/geqo/geqo_eval.c    |  25 +-
 src/backend/optimizer/path/allpaths.c     |  62 ++-
 src/backend/optimizer/path/joinrels.c     | 348 +++++++++--------
 src/backend/optimizer/plan/initsplan.c    | 442 +++++++++++++++-------
 src/backend/optimizer/plan/planmain.c     |  11 +-
 src/backend/optimizer/plan/planner.c      |  15 +-
 src/backend/optimizer/prep/prepjointree.c | 273 +------------
 src/backend/optimizer/util/clauses.c      | 134 ++++++-
 src/backend/optimizer/util/relnode.c      |   4 +-
 src/backend/utils/misc/guc.c              |   6 +-
 src/include/nodes/nodes.h                 |   3 +-
 src/include/nodes/primnodes.h             |   9 +-
 src/include/nodes/relation.h              |  47 ++-
 src/include/optimizer/clauses.h           |   3 +-
 src/include/optimizer/paths.h             |   9 +-
 src/include/optimizer/planmain.h          |   8 +-
 src/include/optimizer/prep.h              |   6 +-
 23 files changed, 955 insertions(+), 708 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2d1e5081834..8322463cea7 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.38 2005/12/09 15:51:13 petere Exp $
+$PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.39 2005/12/20 02:30:35 tgl Exp $
 -->
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -2028,6 +2028,7 @@ SELECT * FROM parent WHERE key = 2400;
         this many items.  Smaller values reduce planning time but may
         yield inferior query plans.  The default is 8.  It is usually
         wise to keep this less than <xref linkend="guc-geqo-threshold">.
+        For more information see <xref linkend="explicit-joins">.
        </para>
       </listitem>
      </varlistentry>
@@ -2039,48 +2040,24 @@ SELECT * FROM parent WHERE key = 2400;
       </indexterm>
       <listitem>
        <para>
-        The planner will rewrite explicit inner <literal>JOIN</>
-        constructs into lists of <literal>FROM</> items whenever a
-        list of no more than this many items in total would
-        result. Prior to <productname>PostgreSQL</> 7.4, joins
-        specified via the <literal>JOIN</literal> construct would
-        never be reordered by the query planner. The query planner has
-        subsequently been improved so that inner joins written in this
-        form can be reordered; this configuration parameter controls
-        the extent to which this reordering is performed.
-        <note>
-         <para>
-          At present, the order of outer joins specified via the
-          <literal>JOIN</> construct is never adjusted by the query
-          planner; therefore, <varname>join_collapse_limit</> has no
-          effect on this behavior. The planner may be improved to
-          reorder some classes of outer joins in a future release of
-          <productname>PostgreSQL</productname>.
-         </para>
-        </note>
+        The planner will rewrite explicit <literal>JOIN</>
+        constructs (except <literal>FULL JOIN</>s) into lists of
+        <literal>FROM</> items whenever a list of no more than this many items
+        would result.  Smaller values reduce planning time but may
+        yield inferior query plans.
        </para>
 
        <para>
         By default, this variable is set the same as
         <varname>from_collapse_limit</varname>, which is appropriate
         for most uses. Setting it to 1 prevents any reordering of
-        inner <literal>JOIN</>s. Thus, the explicit join order
+        explicit <literal>JOIN</>s. Thus, the explicit join order
         specified in the query will be the actual order in which the
         relations are joined. The query planner does not always choose
         the optimal join order; advanced users may elect to
         temporarily set this variable to 1, and then specify the join
-        order they desire explicitly. Another consequence of setting
-        this variable to 1 is that the query planner will behave more
-        like the <productname>PostgreSQL</productname> 7.3 query
-        planner, which some users might find useful for backward
-        compatibility reasons.
-       </para>
-
-       <para>
-        Setting this variable to a value between 1 and
-        <varname>from_collapse_limit</varname> might be useful to
-        trade off planning time against the quality of the chosen plan
-        (higher values produce better plans).
+        order they desire explicitly.
+        For more information see <xref linkend="explicit-joins">.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index 53fa8210f83..9632fc9a496 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/perform.sgml,v 1.54 2005/11/04 23:14:00 petere Exp $
+$PostgreSQL: pgsql/doc/src/sgml/perform.sgml,v 1.55 2005/12/20 02:30:35 tgl Exp $
 -->
 
  <chapter id="performance-tips">
@@ -627,7 +627,7 @@ SELECT * FROM a, b, c WHERE a.id = b.id AND b.ref = c.id;
   </para>
 
   <para>
-   When the query involves outer joins, the planner has much less freedom
+   When the query involves outer joins, the planner has less freedom
    than it does for plain (inner) joins. For example, consider
 <programlisting>
 SELECT * FROM a LEFT JOIN (b JOIN c ON (b.ref = c.id)) ON (a.id = b.id);
@@ -637,16 +637,30 @@ SELECT * FROM a LEFT JOIN (b JOIN c ON (b.ref = c.id)) ON (a.id = b.id);
    emitted for each row of A that has no matching row in the join of B and C.
    Therefore the planner has no choice of join order here: it must join
    B to C and then join A to that result.  Accordingly, this query takes
-   less time to plan than the previous query.
+   less time to plan than the previous query.  In other cases, the planner
+   may be able to determine that more than one join order is safe.
+   For example, given
+<programlisting>
+SELECT * FROM a LEFT JOIN b ON (a.bid = b.id) LEFT JOIN c ON (a.cid = c.id);
+</programlisting>
+   it is valid to join A to either B or C first.  Currently, only
+   <literal>FULL JOIN</> completely constrains the join order.  Most
+   practical cases involving <literal>LEFT JOIN</> or <literal>RIGHT JOIN</>
+   can be rearranged to some extent.
   </para>
 
   <para>
    Explicit inner join syntax (<literal>INNER JOIN</>, <literal>CROSS
    JOIN</>, or unadorned <literal>JOIN</>) is semantically the same as
-   listing the input relations in <literal>FROM</>, so it does not need to
-   constrain the join order.  But it is possible to instruct the
-   <productname>PostgreSQL</productname> query planner to treat
-   explicit inner <literal>JOIN</>s as constraining the join order anyway.
+   listing the input relations in <literal>FROM</>, so it does not
+   constrain the join order.
+  </para>
+
+  <para>
+   Even though most kinds of <literal>JOIN</> don't completely constrain
+   the join order, it is possible to instruct the
+   <productname>PostgreSQL</productname> query planner to treat all
+   <literal>JOIN</> clauses as constraining the join order anyway.
    For example, these three queries are logically equivalent:
 <programlisting>
 SELECT * FROM a, b, c WHERE a.id = b.id AND b.ref = c.id;
@@ -660,7 +674,8 @@ SELECT * FROM a JOIN (b JOIN c ON (b.ref = c.id)) ON (a.id = b.id);
   </para>
 
   <para>
-   To force the planner to follow the <literal>JOIN</> order for inner joins,
+   To force the planner to follow the join order laid out by explicit
+   <literal>JOIN</>s,
    set the <xref linkend="guc-join-collapse-limit"> run-time parameter to 1.
    (Other possible values are discussed below.)
   </para>
@@ -697,9 +712,9 @@ FROM x, y,
 WHERE somethingelse;
 </programlisting>
    This situation might arise from use of a view that contains a join;
-   the view's <literal>SELECT</> rule will be inserted in place of the view reference,
-   yielding a query much like the above.  Normally, the planner will try
-   to collapse the subquery into the parent, yielding
+   the view's <literal>SELECT</> rule will be inserted in place of the view
+   reference, yielding a query much like the above.  Normally, the planner
+   will try to collapse the subquery into the parent, yielding
 <programlisting>
 SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
 </programlisting>
@@ -722,12 +737,12 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
    linkend="guc-join-collapse-limit">
    are similarly named because they do almost the same thing: one controls
    when the planner will <quote>flatten out</> subselects, and the
-   other controls when it will flatten out explicit inner joins.  Typically
+   other controls when it will flatten out explicit joins.  Typically
    you would either set <varname>join_collapse_limit</> equal to
    <varname>from_collapse_limit</> (so that explicit joins and subselects
    act similarly) or set <varname>join_collapse_limit</> to 1 (if you want
    to control join order with explicit joins).  But you might set them
-   differently if you are trying to fine-tune the trade off between planning
+   differently if you are trying to fine-tune the trade-off between planning
    time and run time.
   </para>
  </sect1>
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 7d708e3fb1d..1d816ead3a2 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.322 2005/11/26 22:14:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.323 2005/12/20 02:30:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1277,6 +1277,22 @@ _copyRestrictInfo(RestrictInfo *from)
 	return newnode;
 }
 
+/*
+ * _copyOuterJoinInfo
+ */
+static OuterJoinInfo *
+_copyOuterJoinInfo(OuterJoinInfo *from)
+{
+	OuterJoinInfo *newnode = makeNode(OuterJoinInfo);
+
+	COPY_BITMAPSET_FIELD(min_lefthand);
+	COPY_BITMAPSET_FIELD(min_righthand);
+	COPY_SCALAR_FIELD(is_full_join);
+	COPY_SCALAR_FIELD(lhs_strict);
+
+	return newnode;
+}
+
 /*
  * _copyInClauseInfo
  */
@@ -2906,6 +2922,9 @@ copyObject(void *from)
 		case T_RestrictInfo:
 			retval = _copyRestrictInfo(from);
 			break;
+		case T_OuterJoinInfo:
+			retval = _copyOuterJoinInfo(from);
+			break;
 		case T_InClauseInfo:
 			retval = _copyInClauseInfo(from);
 			break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 91d54b462c4..824a7ff82c3 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.258 2005/11/22 18:17:11 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.259 2005/12/20 02:30:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -613,6 +613,17 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
 	return true;
 }
 
+static bool
+_equalOuterJoinInfo(OuterJoinInfo *a, OuterJoinInfo *b)
+{
+	COMPARE_BITMAPSET_FIELD(min_lefthand);
+	COMPARE_BITMAPSET_FIELD(min_righthand);
+	COMPARE_SCALAR_FIELD(is_full_join);
+	COMPARE_SCALAR_FIELD(lhs_strict);
+
+	return true;
+}
+
 static bool
 _equalInClauseInfo(InClauseInfo *a, InClauseInfo *b)
 {
@@ -1954,6 +1965,9 @@ equal(void *a, void *b)
 		case T_RestrictInfo:
 			retval = _equalRestrictInfo(a, b);
 			break;
+		case T_OuterJoinInfo:
+			retval = _equalOuterJoinInfo(a, b);
+			break;
 		case T_InClauseInfo:
 			retval = _equalInClauseInfo(a, b);
 			break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 646ac6daabf..aa5fd99db86 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.264 2005/11/28 04:35:30 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.265 2005/12/20 02:30:35 tgl Exp $
  *
  * NOTES
  *	  Every node type that can appear in stored rules' parsetrees *must*
@@ -1167,6 +1167,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
 	WRITE_NODE_FIELD(left_join_clauses);
 	WRITE_NODE_FIELD(right_join_clauses);
 	WRITE_NODE_FIELD(full_join_clauses);
+	WRITE_NODE_FIELD(oj_info_list);
 	WRITE_NODE_FIELD(in_info_list);
 	WRITE_NODE_FIELD(query_pathkeys);
 	WRITE_NODE_FIELD(group_pathkeys);
@@ -1201,7 +1202,6 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node)
 	WRITE_FLOAT_FIELD(tuples, "%.0f");
 	WRITE_NODE_FIELD(subplan);
 	WRITE_NODE_FIELD(baserestrictinfo);
-	WRITE_BITMAPSET_FIELD(outerjoinset);
 	WRITE_NODE_FIELD(joininfo);
 	WRITE_BITMAPSET_FIELD(index_outer_relids);
 	WRITE_NODE_FIELD(index_inner_paths);
@@ -1265,6 +1265,17 @@ _outInnerIndexscanInfo(StringInfo str, InnerIndexscanInfo *node)
 	WRITE_NODE_FIELD(best_innerpath);
 }
 
+static void
+_outOuterJoinInfo(StringInfo str, OuterJoinInfo *node)
+{
+	WRITE_NODE_TYPE("OUTERJOININFO");
+
+	WRITE_BITMAPSET_FIELD(min_lefthand);
+	WRITE_BITMAPSET_FIELD(min_righthand);
+	WRITE_BOOL_FIELD(is_full_join);
+	WRITE_BOOL_FIELD(lhs_strict);
+}
+
 static void
 _outInClauseInfo(StringInfo str, InClauseInfo *node)
 {
@@ -2019,6 +2030,9 @@ _outNode(StringInfo str, void *obj)
 			case T_InnerIndexscanInfo:
 				_outInnerIndexscanInfo(str, obj);
 				break;
+			case T_OuterJoinInfo:
+				_outOuterJoinInfo(str, obj);
+				break;
 			case T_InClauseInfo:
 				_outInClauseInfo(str, obj);
 				break;
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index b19f6118ff1..df9828b2150 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -40,10 +40,11 @@ is derived from the cheapest Path for the RelOptInfo that includes all the
 base rels of the query.
 
 Possible Paths for a primitive table relation include plain old sequential
-scan, plus index scans for any indexes that exist on the table.  A subquery
-base relation just has one Path, a "SubqueryScan" path (which links to the
-subplan that was built by a recursive invocation of the planner).  Likewise
-a function-RTE base relation has only one possible Path.
+scan, plus index scans for any indexes that exist on the table, plus bitmap
+index scans using one or more indexes.  A subquery base relation just has
+one Path, a "SubqueryScan" path (which links to the subplan that was built
+by a recursive invocation of the planner).  Likewise a function-RTE base
+relation has only one possible Path.
 
 Joins always occur using two RelOptInfos.  One is outer, the other inner.
 Outers drive lookups of values in the inner.  In a nested loop, lookups of
@@ -84,20 +85,26 @@ If we have only a single base relation in the query, we are done.
 Otherwise we have to figure out how to join the base relations into a
 single join relation.
 
-2) If the query's FROM clause contains explicit JOIN clauses, we join
-those pairs of relations in exactly the tree structure indicated by the
-JOIN clauses.  (This is absolutely necessary when dealing with outer JOINs.
-For inner JOINs we have more flexibility in theory, but don't currently
-exploit it in practice.)  For each such join pair, we generate a Path
-for each feasible join method, and select the cheapest Path.  Note that
-the JOIN clause structure determines the join Path structure, but it
-doesn't constrain the join implementation method at each join (nestloop,
-merge, hash), nor does it say which rel is considered outer or inner at
-each join.  We consider all these possibilities in building Paths.
+2) Normally, any explicit JOIN clauses are "flattened" so that we just
+have a list of relations to join.  However, FULL OUTER JOIN clauses are
+never flattened, and other kinds of JOIN might not be either, if the
+flattening process is stopped by join_collapse_limit or from_collapse_limit
+restrictions.  Therefore, we end up with a planning problem that contains
+both lists of relations to be joined in any order, and JOIN nodes that
+force a particular join order.  For each un-flattened JOIN node, we join
+exactly that pair of relations (after recursively planning their inputs,
+if the inputs aren't single base relations).  We generate a Path for each
+feasible join method, and select the cheapest Path.  Note that the JOIN
+clause structure determines the join Path structure, but it doesn't
+constrain the join implementation method at each join (nestloop, merge,
+hash), nor does it say which rel is considered outer or inner at each
+join.  We consider all these possibilities in building Paths.
 
 3) At the top level of the FROM clause we will have a list of relations
-that are either base rels or joinrels constructed per JOIN directives.
-We can join these rels together in any order the planner sees fit.
+that are either base rels or joinrels constructed per un-flattened JOIN
+directives.  (This is also the situation, recursively, when we can flatten
+sub-joins underneath an un-flattenable JOIN into a list of relations to
+join.)  We can join these rels together in any order the planner sees fit.
 The standard (non-GEQO) planner does this as follows:
 
 Consider joining each RelOptInfo to each other RelOptInfo specified in its
@@ -156,12 +163,76 @@ joining {1 2 3} to {4} (left-handed), {4} to {1 2 3} (right-handed), and
 scanning code produces these potential join combinations one at a time,
 all the ways to produce the same set of joined base rels will share the
 same RelOptInfo, so the paths produced from different join combinations
-that produce equivalent joinrels will compete in add_path.
+that produce equivalent joinrels will compete in add_path().
 
 Once we have built the final join rel, we use either the cheapest path
 for it or the cheapest path with the desired ordering (if that's cheaper
 than applying a sort to the cheapest other path).
 
+If the query contains one-sided outer joins (LEFT or RIGHT joins), or
+"IN (sub-select)" WHERE clauses that were converted to joins, then some of
+the possible join orders may be illegal.  These are excluded by having
+make_join_rel consult side lists of outer joins and IN joins to see
+whether a proposed join is illegal.  (The same consultation allows it
+to see which join style should be applied for a valid join, ie,
+JOIN_INNER, JOIN_LEFT, etc.)
+
+
+Valid OUTER JOIN optimizations
+------------------------------
+
+The planner's treatment of outer join reordering is based on the following
+identities:
+
+1.	(A leftjoin B on (Pab)) innerjoin C on (Pac)
+	= (A innerjoin C on (Pac)) leftjoin B on (Pab)
+
+where Pac is a predicate referencing A and C, etc (in this case, clearly
+Pac cannot reference B, or the transformation is nonsensical).
+
+2.	(A leftjoin B on (Pab)) leftjoin C on (Pac)
+	= (A leftjoin C on (Pac)) leftjoin B on (Pab)
+
+3.	(A leftjoin B on (Pab)) leftjoin C on (Pbc)
+	= A leftjoin (B leftjoin C on (Pbc)) on (Pab)
+
+Identity 3 only holds if predicate Pbc must fail for all-null B rows
+(that is, Pbc is strict for at least one column of B).  If Pbc is not
+strict, the first form might produce some rows with nonnull C columns
+where the second form would make those entries null.
+
+RIGHT JOIN is equivalent to LEFT JOIN after switching the two input
+tables, so the same identities work for right joins.  Only FULL JOIN
+cannot be re-ordered at all.
+
+An example of a case that does *not* work is moving an innerjoin into or
+out of the nullable side of an outer join:
+
+	A leftjoin (B join C on (Pbc)) on (Pab)
+	!= (A leftjoin B on (Pab)) join C on (Pbc)
+
+FULL JOIN ordering is enforced by not collapsing FULL JOIN nodes when
+translating the jointree to "joinlist" representation.  LEFT and RIGHT
+JOIN nodes are normally collapsed so that they participate fully in the
+join order search.  To avoid generating illegal join orders, the planner
+creates an OuterJoinInfo node for each outer join, and make_join_rel
+checks this list to decide if a proposed join is legal.
+
+What we store in OuterJoinInfo nodes are the minimum sets of Relids
+required on each side of the join to form the outer join.  Note that
+these are minimums; there's no explicit maximum, since joining other
+rels to the OJ's syntactic rels may be legal.  Per identities 1 and 2,
+non-FULL joins can be freely associated into the lefthand side of an
+OJ, but in general they can't be associated into the righthand side.
+So the restriction enforced by make_join_rel is that a proposed join
+can't join across a RHS boundary (ie, join anything inside the RHS
+to anything else) unless the join validly implements some outer join.
+(To support use of identity 3, we have to allow cases where an apparent
+violation of a lower OJ's RHS is committed while forming an upper OJ.
+If this wouldn't in fact be legal, the upper OJ's minimum LHS or RHS
+set must be expanded to include the whole of the lower OJ, thereby
+preventing it from being formed before the lower OJ is.)
+
 
 Pulling up subqueries
 ---------------------
@@ -180,13 +251,13 @@ of the join tree.  Each FROM-list is planned using the dynamic-programming
 search method described above.
 
 If pulling up a subquery produces a FROM-list as a direct child of another
-FROM-list (with no explicit JOIN directives between), then we can merge the
-two FROM-lists together.  Once that's done, the subquery is an absolutely
-integral part of the outer query and will not constrain the join tree
-search space at all.  However, that could result in unpleasant growth of
-planning time, since the dynamic-programming search has runtime exponential
-in the number of FROM-items considered.  Therefore, we don't merge
-FROM-lists if the result would have too many FROM-items in one list.
+FROM-list, then we can merge the two FROM-lists together.  Once that's
+done, the subquery is an absolutely integral part of the outer query and
+will not constrain the join tree search space at all.  However, that could
+result in unpleasant growth of planning time, since the dynamic-programming
+search has runtime exponential in the number of FROM-items considered.
+Therefore, we don't merge FROM-lists if the result would have too many
+FROM-items in one list.
 
 
 Optimizer Functions
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index b6c859b7675..29a4390a28c 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.78 2005/11/22 18:17:11 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.79 2005/12/20 02:30:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -216,12 +216,11 @@ gimme_tree(Gene *tour, int num_gene, GeqoEvalData *evaldata)
 
 			/*
 			 * Construct a RelOptInfo representing the join of these two input
-			 * relations.  These are always inner joins. Note that we expect
-			 * the joinrel not to exist in root->join_rel_list yet, and so the
-			 * paths constructed for it will only include the ones we want.
+			 * relations.  Note that we expect the joinrel not to exist in
+			 * root->join_rel_list yet, and so the paths constructed for it
+			 * will only include the ones we want.
 			 */
-			joinrel = make_join_rel(evaldata->root, outer_rel, inner_rel,
-									JOIN_INNER);
+			joinrel = make_join_rel(evaldata->root, outer_rel, inner_rel);
 
 			/* Can't pop stack here if join order is not valid */
 			if (!joinrel)
@@ -262,6 +261,20 @@ desirable_join(PlannerInfo *root,
 	if (have_relevant_joinclause(outer_rel, inner_rel))
 		return true;
 
+	/*
+	 * Join if the rels are members of the same outer-join RHS. This is needed
+	 * to improve the odds that we will find a valid solution in a case where
+	 * an OJ RHS has a clauseless join.
+	 */
+	foreach(l, root->oj_info_list)
+	{
+		OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+
+		if (bms_is_subset(outer_rel->relids, ojinfo->min_righthand) &&
+			bms_is_subset(inner_rel->relids, ojinfo->min_righthand))
+			return true;
+	}
+
 	/*
 	 * Join if the rels are members of the same IN sub-select.	This is needed
 	 * to improve the odds that we will find a valid solution in a case where
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 1a0ff1ac209..19b1cfcaad4 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.138 2005/11/22 18:17:12 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.139 2005/12/20 02:30:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -51,6 +51,7 @@ static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					  Index rti, RangeTblEntry *rte);
 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					  RangeTblEntry *rte);
+static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
 static RelOptInfo *make_one_rel_by_joins(PlannerInfo *root, int levels_needed,
 					  List *initial_rels);
 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
@@ -73,7 +74,7 @@ static void recurse_push_qual(Node *setOp, Query *topquery,
  *	  single rel that represents the join of all base rels in the query.
  */
 RelOptInfo *
-make_one_rel(PlannerInfo *root)
+make_one_rel(PlannerInfo *root, List *joinlist)
 {
 	RelOptInfo *rel;
 
@@ -85,10 +86,7 @@ make_one_rel(PlannerInfo *root)
 	/*
 	 * Generate access paths for the entire join tree.
 	 */
-	Assert(root->parse->jointree != NULL &&
-		   IsA(root->parse->jointree, FromExpr));
-
-	rel = make_fromexpr_rel(root, root->parse->jointree);
+	rel = make_rel_from_joinlist(root, joinlist);
 
 	/*
 	 * The result should join all and only the query's base rels.
@@ -528,43 +526,65 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 }
 
 /*
- * make_fromexpr_rel
- *	  Build access paths for a FromExpr jointree node.
+ * make_rel_from_joinlist
+ *	  Build access paths using a "joinlist" to guide the join path search.
+ *
+ * See comments for deconstruct_jointree() for definition of the joinlist
+ * data structure.
  */
-RelOptInfo *
-make_fromexpr_rel(PlannerInfo *root, FromExpr *from)
+static RelOptInfo *
+make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
 {
 	int			levels_needed;
-	List	   *initial_rels = NIL;
-	ListCell   *jt;
+	List	   *initial_rels;
+	ListCell   *jl;
 
 	/*
-	 * Count the number of child jointree nodes.  This is the depth of the
+	 * Count the number of child joinlist nodes.  This is the depth of the
 	 * dynamic-programming algorithm we must employ to consider all ways of
 	 * joining the child nodes.
 	 */
-	levels_needed = list_length(from->fromlist);
+	levels_needed = list_length(joinlist);
 
 	if (levels_needed <= 0)
 		return NULL;			/* nothing to do? */
 
 	/*
-	 * Construct a list of rels corresponding to the child jointree nodes.
+	 * Construct a list of rels corresponding to the child joinlist nodes.
 	 * This may contain both base rels and rels constructed according to
-	 * explicit JOIN directives.
+	 * sub-joinlists.
 	 */
-	foreach(jt, from->fromlist)
+	initial_rels = NIL;
+	foreach(jl, joinlist)
 	{
-		Node	   *jtnode = (Node *) lfirst(jt);
+		Node	   *jlnode = (Node *) lfirst(jl);
+		RelOptInfo *thisrel;
+
+		if (IsA(jlnode, RangeTblRef))
+		{
+			int			varno = ((RangeTblRef *) jlnode)->rtindex;
+
+			thisrel = find_base_rel(root, varno);
+		}
+		else if (IsA(jlnode, List))
+		{
+			/* Recurse to handle subproblem */
+			thisrel = make_rel_from_joinlist(root, (List *) jlnode);
+		}
+		else
+		{
+			elog(ERROR, "unrecognized joinlist node type: %d",
+				 (int) nodeTag(jlnode));
+			thisrel = NULL;		/* keep compiler quiet */
+		}
 
-		initial_rels = lappend(initial_rels,
-							   make_jointree_rel(root, jtnode));
+		initial_rels = lappend(initial_rels, thisrel);
 	}
 
 	if (levels_needed == 1)
 	{
 		/*
-		 * Single jointree node, so we're done.
+		 * Single joinlist node, so we're done.
 		 */
 		return (RelOptInfo *) linitial(initial_rels);
 	}
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 778b1676180..b5762c97ba9 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.77 2005/11/22 18:17:12 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.78 2005/12/20 02:30:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -25,7 +25,7 @@ static List *make_rels_by_clause_joins(PlannerInfo *root,
 static List *make_rels_by_clauseless_joins(PlannerInfo *root,
 							  RelOptInfo *old_rel,
 							  ListCell *other_rels);
-static bool is_inside_IN(PlannerInfo *root, RelOptInfo *rel);
+static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
 
 
 /*
@@ -86,15 +86,16 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
 												 other_rels);
 
 			/*
-			 * An exception occurs when there is a clauseless join inside an
-			 * IN (sub-SELECT) construct.  Here, the members of the subselect
-			 * all have join clauses (against the stuff outside the IN), but
-			 * they *must* be joined to each other before we can make use of
-			 * those join clauses.	So do the clauseless join bit.
+			 * An exception occurs when there is a clauseless join inside a
+			 * construct that restricts join order, i.e., an outer join RHS
+			 * or an IN (sub-SELECT) construct.  Here, the rel may well have
+			 * join clauses against stuff outside the OJ RHS or IN sub-SELECT,
+			 * but the clauseless join *must* be done before we can make use
+			 * of those join clauses.	So do the clauseless join bit.
 			 *
 			 * See also the last-ditch case below.
 			 */
-			if (new_rels == NIL && is_inside_IN(root, old_rel))
+			if (new_rels == NIL && has_join_restriction(root, old_rel))
 				new_rels = make_rels_by_clauseless_joins(root,
 														 old_rel,
 														 other_rels);
@@ -169,8 +170,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
 					{
 						RelOptInfo *jrel;
 
-						jrel = make_join_rel(root, old_rel, new_rel,
-											 JOIN_INNER);
+						jrel = make_join_rel(root, old_rel, new_rel);
 						/* Avoid making duplicate entries ... */
 						if (jrel)
 							result_rels = list_append_unique_ptr(result_rels,
@@ -219,8 +219,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
 		}
 
 		/*----------
-		 * When IN clauses are involved, there may be no legal way to make
-		 * an N-way join for some values of N.	For example consider
+		 * When OJs or IN clauses are involved, there may be no legal way
+		 * to make an N-way join for some values of N.	For example consider
 		 *
 		 * SELECT ... FROM t1 WHERE
 		 *	 x IN (SELECT ... FROM t2,t3 WHERE ...) AND
@@ -231,11 +231,12 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
 		 * to accept failure at level 4 and go on to discover a workable
 		 * bushy plan at level 5.
 		 *
-		 * However, if there are no IN clauses then make_join_rel() should
+		 * However, if there are no such clauses then make_join_rel() should
 		 * never fail, and so the following sanity check is useful.
 		 *----------
 		 */
-		if (result_rels == NIL && root->in_info_list == NIL)
+		if (result_rels == NIL &&
+			root->oj_info_list == NIL && root->in_info_list == NIL)
 			elog(ERROR, "failed to build any %d-way joins", level);
 	}
 
@@ -273,7 +274,7 @@ make_rels_by_clause_joins(PlannerInfo *root,
 		{
 			RelOptInfo *jrel;
 
-			jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER);
+			jrel = make_join_rel(root, old_rel, other_rel);
 			if (jrel)
 				result = lcons(jrel, result);
 		}
@@ -312,7 +313,7 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
 		{
 			RelOptInfo *jrel;
 
-			jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER);
+			jrel = make_join_rel(root, old_rel, other_rel);
 
 			/*
 			 * As long as given other_rels are distinct, don't need to test to
@@ -328,85 +329,31 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
 
 
 /*
- * is_inside_IN
- *		Detect whether the specified relation is inside an IN (sub-SELECT).
- *
- * Note that we are actually only interested in rels that have been pulled up
- * out of an IN, so the routine name is a slight misnomer.
+ * has_join_restriction
+ *		Detect whether the specified relation has join-order restrictions
+ *		due to being inside an OJ RHS or an IN (sub-SELECT).
  */
 static bool
-is_inside_IN(PlannerInfo *root, RelOptInfo *rel)
+has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
 {
 	ListCell   *l;
 
-	foreach(l, root->in_info_list)
+	foreach(l, root->oj_info_list)
 	{
-		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+		OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
 
-		if (bms_is_subset(rel->relids, ininfo->righthand))
+		if (bms_is_subset(rel->relids, ojinfo->min_righthand))
 			return true;
 	}
-	return false;
-}
-
 
-/*
- * make_jointree_rel
- *		Find or build a RelOptInfo join rel representing a specific
- *		jointree item.	For JoinExprs, we only consider the construction
- *		path that corresponds exactly to what the user wrote.
- */
-RelOptInfo *
-make_jointree_rel(PlannerInfo *root, Node *jtnode)
-{
-	if (IsA(jtnode, RangeTblRef))
-	{
-		int			varno = ((RangeTblRef *) jtnode)->rtindex;
-
-		return find_base_rel(root, varno);
-	}
-	else if (IsA(jtnode, FromExpr))
-	{
-		FromExpr   *f = (FromExpr *) jtnode;
-
-		/* Recurse back to multi-way-join planner */
-		return make_fromexpr_rel(root, f);
-	}
-	else if (IsA(jtnode, JoinExpr))
+	foreach(l, root->in_info_list)
 	{
-		JoinExpr   *j = (JoinExpr *) jtnode;
-		RelOptInfo *rel,
-				   *lrel,
-				   *rrel;
-
-		/* Recurse */
-		lrel = make_jointree_rel(root, j->larg);
-		rrel = make_jointree_rel(root, j->rarg);
-
-		/* Make this join rel */
-		rel = make_join_rel(root, lrel, rrel, j->jointype);
-
-		if (rel == NULL)		/* oops */
-			elog(ERROR, "invalid join order");
-
-		/*
-		 * Since we are only going to consider this one way to do it, we're
-		 * done generating Paths for this joinrel and can now select the
-		 * cheapest.  In fact we *must* do so now, since next level up will
-		 * need it!
-		 */
-		set_cheapest(rel);
-
-#ifdef OPTIMIZER_DEBUG
-		debug_print_rel(root, rel);
-#endif
+		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
 
-		return rel;
+		if (bms_is_subset(rel->relids, ininfo->righthand))
+			return true;
 	}
-	else
-		elog(ERROR, "unrecognized node type: %d",
-			 (int) nodeTag(jtnode));
-	return NULL;				/* keep compiler quiet */
+	return false;
 }
 
 
@@ -418,16 +365,19 @@ make_jointree_rel(PlannerInfo *root, Node *jtnode)
  *	   (The join rel may already contain paths generated from other
  *	   pairs of rels that add up to the same set of base rels.)
  *
- * NB: will return NULL if attempted join is not valid.  This can only
- * happen when working with IN clauses that have been turned into joins.
+ * NB: will return NULL if attempted join is not valid.  This can happen
+ * when working with outer joins, or with IN clauses that have been turned
+ * into joins.
  */
 RelOptInfo *
-make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
-			  JoinType jointype)
+make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 {
 	Relids		joinrelids;
+	JoinType	jointype;
+	bool		is_valid_inner;
 	RelOptInfo *joinrel;
 	List	   *restrictlist;
+	ListCell   *l;
 
 	/* We should never try to join two overlapping sets of rels. */
 	Assert(!bms_overlap(rel1->relids, rel2->relids));
@@ -436,94 +386,176 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 	joinrelids = bms_union(rel1->relids, rel2->relids);
 
 	/*
-	 * If we are implementing IN clauses as joins, there are some joins that
-	 * are illegal.  Check to see if the proposed join is trouble. We can skip
-	 * the work if looking at an outer join, however, because only top-level
-	 * joins might be affected.
+	 * If we have any outer joins, the proposed join might be illegal; and
+	 * in any case we have to determine its join type.  Scan the OJ list
+	 * for conflicts.
 	 */
-	if (jointype == JOIN_INNER)
-	{
-		ListCell   *l;
+	jointype = JOIN_INNER;		/* default if no match to an OJ */
+	is_valid_inner = true;
 
-		foreach(l, root->in_info_list)
-		{
-			InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-			/*
-			 * This IN clause is not relevant unless its RHS overlaps the
-			 * proposed join.  (Check this first as a fast path for dismissing
-			 * most irrelevant INs quickly.)
-			 */
-			if (!bms_overlap(ininfo->righthand, joinrelids))
-				continue;
+	foreach(l, root->oj_info_list)
+	{
+		OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
 
-			/*
-			 * If we are still building the IN clause's RHS, then this IN
-			 * clause isn't relevant yet.
-			 */
-			if (bms_is_subset(joinrelids, ininfo->righthand))
-				continue;
+		/*
+		 * This OJ is not relevant unless its RHS overlaps the proposed join.
+		 * (Check this first as a fast path for dismissing most irrelevant OJs
+		 * quickly.)
+		 */
+		if (!bms_overlap(ojinfo->min_righthand, joinrelids))
+			continue;
 
-			/*
-			 * Cannot join if proposed join contains rels not in the RHS *and*
-			 * contains only part of the RHS.  We must build the complete RHS
-			 * (subselect's join) before it can be joined to rels outside the
-			 * subselect.
-			 */
-			if (!bms_is_subset(ininfo->righthand, joinrelids))
-			{
-				bms_free(joinrelids);
-				return NULL;
-			}
+		/*
+		 * Also, not relevant if proposed join is fully contained within RHS
+		 * (ie, we're still building up the RHS).
+		 */
+		if (bms_is_subset(joinrelids, ojinfo->min_righthand))
+			continue;
 
-			/*
-			 * At this point we are considering a join of the IN's RHS to some
-			 * other rel(s).
-			 *
-			 * If we already joined IN's RHS to any other rels in either input
-			 * path, then this join is not constrained (the necessary work was
-			 * done at the lower level where that join occurred).
-			 */
-			if (bms_is_subset(ininfo->righthand, rel1->relids) &&
-				!bms_equal(ininfo->righthand, rel1->relids))
-				continue;
-			if (bms_is_subset(ininfo->righthand, rel2->relids) &&
-				!bms_equal(ininfo->righthand, rel2->relids))
-				continue;
+		/*
+		 * Also, not relevant if OJ is already done within either input.
+		 */
+		if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
+			bms_is_subset(ojinfo->min_righthand, rel1->relids))
+			continue;
+		if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
+			bms_is_subset(ojinfo->min_righthand, rel2->relids))
+			continue;
 
-			/*
-			 * JOIN_IN technique will work if outerrel includes LHS and
-			 * innerrel is exactly RHS; conversely JOIN_REVERSE_IN handles
-			 * RHS/LHS.
-			 *
-			 * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS;
-			 * conversely JOIN_UNIQUE_INNER will work if innerrel is exactly
-			 * RHS.
-			 *
-			 * But none of these will work if we already found another IN that
-			 * needs to trigger here.
-			 */
+		/*
+		 * If one input contains min_lefthand and the other contains
+		 * min_righthand, then we can perform the OJ at this join.
+		 *
+		 * Barf if we get matches to more than one OJ (is that possible?)
+		 */
+		if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
+			bms_is_subset(ojinfo->min_righthand, rel2->relids))
+		{
 			if (jointype != JOIN_INNER)
 			{
+				/* invalid join path */
 				bms_free(joinrelids);
 				return NULL;
 			}
-			if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
-				bms_equal(ininfo->righthand, rel2->relids))
-				jointype = JOIN_IN;
-			else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
-					 bms_equal(ininfo->righthand, rel1->relids))
-				jointype = JOIN_REVERSE_IN;
-			else if (bms_equal(ininfo->righthand, rel1->relids))
-				jointype = JOIN_UNIQUE_OUTER;
-			else if (bms_equal(ininfo->righthand, rel2->relids))
-				jointype = JOIN_UNIQUE_INNER;
-			else
+			jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_LEFT;
+		}
+		else if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
+				 bms_is_subset(ojinfo->min_righthand, rel1->relids))
+		{
+			if (jointype != JOIN_INNER)
 			{
 				/* invalid join path */
 				bms_free(joinrelids);
 				return NULL;
 			}
+			jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_RIGHT;
+		}
+		else
+		{
+			/*----------
+			 * Otherwise, the proposed join overlaps the RHS but isn't
+			 * a valid implementation of this OJ.  It might still be
+			 * a valid implementation of some other OJ, however.  We have
+			 * to allow this to support the associative identity
+			 *	(a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
+			 * since joining B directly to C violates the lower OJ's RHS.
+			 * We assume that make_outerjoininfo() set things up correctly
+			 * so that we'll only match to the upper OJ if the transformation
+			 * is valid.  Set flag here to check at bottom of loop.
+			 *----------
+			 */
+			is_valid_inner = false;
+		}
+	}
+
+	/* Fail if violated some OJ's RHS and didn't match to another OJ */
+	if (jointype == JOIN_INNER && !is_valid_inner)
+	{
+		/* invalid join path */
+		bms_free(joinrelids);
+		return NULL;
+	}
+
+	/*
+	 * Similarly, if we are implementing IN clauses as joins, check for
+	 * illegal join path and detect whether we need a non-default join type.
+	 */
+	foreach(l, root->in_info_list)
+	{
+		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+		/*
+		 * This IN clause is not relevant unless its RHS overlaps the
+		 * proposed join.  (Check this first as a fast path for dismissing
+		 * most irrelevant INs quickly.)
+		 */
+		if (!bms_overlap(ininfo->righthand, joinrelids))
+			continue;
+
+		/*
+		 * If we are still building the IN clause's RHS, then this IN
+		 * clause isn't relevant yet.
+		 */
+		if (bms_is_subset(joinrelids, ininfo->righthand))
+			continue;
+
+		/*
+		 * Cannot join if proposed join contains rels not in the RHS *and*
+		 * contains only part of the RHS.  We must build the complete RHS
+		 * (subselect's join) before it can be joined to rels outside the
+		 * subselect.
+		 */
+		if (!bms_is_subset(ininfo->righthand, joinrelids))
+		{
+			bms_free(joinrelids);
+			return NULL;
+		}
+
+		/*
+		 * At this point we are considering a join of the IN's RHS to some
+		 * other rel(s).
+		 *
+		 * If we already joined IN's RHS to any other rels in either input
+		 * path, then this join is not constrained (the necessary work was
+		 * done at the lower level where that join occurred).
+		 */
+		if (bms_is_subset(ininfo->righthand, rel1->relids) &&
+			!bms_equal(ininfo->righthand, rel1->relids))
+			continue;
+		if (bms_is_subset(ininfo->righthand, rel2->relids) &&
+			!bms_equal(ininfo->righthand, rel2->relids))
+			continue;
+
+		/*
+		 * JOIN_IN technique will work if outerrel includes LHS and innerrel
+		 * is exactly RHS; conversely JOIN_REVERSE_IN handles RHS/LHS.
+		 *
+		 * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS; conversely
+		 * JOIN_UNIQUE_INNER will work if innerrel is exactly RHS.
+		 *
+		 * But none of these will work if we already found an OJ or another IN
+		 * that needs to trigger here.
+		 */
+		if (jointype != JOIN_INNER)
+		{
+			bms_free(joinrelids);
+			return NULL;
+		}
+		if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
+			bms_equal(ininfo->righthand, rel2->relids))
+			jointype = JOIN_IN;
+		else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
+				 bms_equal(ininfo->righthand, rel1->relids))
+			jointype = JOIN_REVERSE_IN;
+		else if (bms_equal(ininfo->righthand, rel1->relids))
+			jointype = JOIN_UNIQUE_OUTER;
+		else if (bms_equal(ininfo->righthand, rel2->relids))
+			jointype = JOIN_UNIQUE_INNER;
+		else
+		{
+			/* invalid join path */
+			bms_free(joinrelids);
+			return NULL;
 		}
 	}
 
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 72d9c7402eb..4b132d65611 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.112 2005/11/22 18:17:12 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.113 2005/12/20 02:30:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,16 +34,25 @@
 #include "utils/syscache.h"
 
 
-static void mark_baserels_for_outer_join(PlannerInfo *root, Relids rels,
-							 Relids outerrels);
+/* These parameters are set by GUC */
+int			from_collapse_limit;
+int			join_collapse_limit;
+
+
+static void add_vars_to_targetlist(PlannerInfo *root, List *vars,
+					   Relids where_needed);
+static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
+								 bool below_outer_join, Relids *qualscope);
+static OuterJoinInfo *make_outerjoininfo(PlannerInfo *root,
+										 Relids left_rels, Relids right_rels,
+										 bool is_full_join, Node *clause);
 static void distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 						bool is_pushed_down,
 						bool is_deduced,
 						bool below_outer_join,
-						Relids outerjoin_nonnullable,
-						Relids qualscope);
-static void add_vars_to_targetlist(PlannerInfo *root, List *vars,
-					   Relids where_needed);
+						Relids qualscope,
+						Relids ojscope,
+						Relids outerjoin_nonnullable);
 static bool qual_is_redundant(PlannerInfo *root, RestrictInfo *restrictinfo,
 				  List *restrictlist);
 static void check_mergejoinable(RestrictInfo *restrictinfo);
@@ -162,66 +171,117 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars, Relids where_needed)
 
 /*****************************************************************************
  *
- *	  QUALIFICATIONS
+ *	  JOIN TREE PROCESSING
  *
  *****************************************************************************/
 
-
 /*
- * distribute_quals_to_rels
+ * deconstruct_jointree
  *	  Recursively scan the query's join tree for WHERE and JOIN/ON qual
  *	  clauses, and add these to the appropriate restrictinfo and joininfo
- *	  lists belonging to base RelOptInfos.	Also, base RelOptInfos are marked
- *	  with outerjoinset information, to aid in proper positioning of qual
- *	  clauses that appear above outer joins.
+ *	  lists belonging to base RelOptInfos.  Also, add OuterJoinInfo nodes
+ *	  to root->oj_info_list for any outer joins appearing in the query tree.
+ *	  Return a "joinlist" data structure showing the join order decisions
+ *	  that need to be made by make_one_rel().
  *
- * jtnode is the jointree node currently being examined.  below_outer_join
- * is TRUE if this node is within the nullable side of a higher-level outer
- * join.
+ * The "joinlist" result is a list of items that are either RangeTblRef
+ * jointree nodes or sub-joinlists.  All the items at the same level of
+ * joinlist must be joined in an order to be determined by make_one_rel()
+ * (note that legal orders may be constrained by OuterJoinInfo nodes).
+ * A sub-joinlist represents a subproblem to be planned separately. Currently
+ * sub-joinlists arise only from FULL OUTER JOIN or when collapsing of
+ * subproblems is stopped by join_collapse_limit or from_collapse_limit.
  *
  * NOTE: when dealing with inner joins, it is appropriate to let a qual clause
  * be evaluated at the lowest level where all the variables it mentions are
  * available.  However, we cannot push a qual down into the nullable side(s)
  * of an outer join since the qual might eliminate matching rows and cause a
- * NULL row to be incorrectly emitted by the join.	Therefore, rels appearing
- * within the nullable side(s) of an outer join are marked with
- *		outerjoinset = set of Relids used at the outer join node.
- * This set will be added to the set of rels referenced by quals using such
- * a rel, thereby forcing them up the join tree to the right level.
+ * NULL row to be incorrectly emitted by the join.  Therefore, we artificially
+ * OR the minimum-relids of such an outer join into the required_relids of
+ * clauses appearing above it.  This forces those clauses to be delayed until
+ * application of the outer join (or maybe even higher in the join tree).
+ */
+List *
+deconstruct_jointree(PlannerInfo *root)
+{
+	Relids		qualscope;
+
+	/* Start recursion at top of jointree */
+	Assert(root->parse->jointree != NULL &&
+		   IsA(root->parse->jointree, FromExpr));
+
+	return deconstruct_recurse(root, (Node *) root->parse->jointree, false,
+							   &qualscope);
+}
+
+/*
+ * deconstruct_recurse
+ *	  One recursion level of deconstruct_jointree processing.
  *
- * To ease the calculation of these values, distribute_quals_to_rels() returns
- * the set of base Relids involved in its own level of join.  This is just an
- * internal convenience; no outside callers pay attention to the result.
+ * Inputs:
+ *	jtnode is the jointree node to examine
+ *	below_outer_join is TRUE if this node is within the nullable side of a
+ *		higher-level outer join
+ * Outputs:
+ *	*qualscope gets the set of base Relids syntactically included in this
+ *		jointree node (do not modify or free this, as it may also be pointed
+ *		to by RestrictInfo nodes)
+ *	Return value is the appropriate joinlist for this jointree node
+ *
+ * In addition, entries will be added to root->oj_info_list for outer joins.
  */
-Relids
-distribute_quals_to_rels(PlannerInfo *root, Node *jtnode,
-						 bool below_outer_join)
+static List *
+deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
+					Relids *qualscope)
 {
-	Relids		result = NULL;
+	List	   *joinlist;
 
 	if (jtnode == NULL)
-		return result;
+	{
+		*qualscope = NULL;
+		return NIL;
+	}
 	if (IsA(jtnode, RangeTblRef))
 	{
 		int			varno = ((RangeTblRef *) jtnode)->rtindex;
 
 		/* No quals to deal with, just return correct result */
-		result = bms_make_singleton(varno);
+		*qualscope = bms_make_singleton(varno);
+		joinlist = list_make1(jtnode);
 	}
 	else if (IsA(jtnode, FromExpr))
 	{
 		FromExpr   *f = (FromExpr *) jtnode;
+		int			remaining;
 		ListCell   *l;
 
 		/*
-		 * First, recurse to handle child joins.
+		 * First, recurse to handle child joins.  We collapse subproblems
+		 * into a single joinlist whenever the resulting joinlist wouldn't
+		 * exceed from_collapse_limit members.  Also, always collapse
+		 * one-element subproblems, since that won't lengthen the joinlist
+		 * anyway.
 		 */
+		*qualscope = NULL;
+		joinlist = NIL;
+		remaining = list_length(f->fromlist);
 		foreach(l, f->fromlist)
 		{
-			result = bms_add_members(result,
-									 distribute_quals_to_rels(root,
-															  lfirst(l),
-														  below_outer_join));
+			Relids	sub_qualscope;
+			List   *sub_joinlist;
+			int		sub_members;
+
+			sub_joinlist = deconstruct_recurse(root, lfirst(l),
+											   below_outer_join,
+											   &sub_qualscope);
+			*qualscope = bms_add_members(*qualscope, sub_qualscope);
+			sub_members = list_length(sub_joinlist);
+			remaining--;
+			if (sub_members <= 1 ||
+				list_length(joinlist) + sub_members + remaining <= from_collapse_limit)
+				joinlist = list_concat(joinlist, sub_joinlist);
+			else
+				joinlist = lappend(joinlist, sub_joinlist);
 		}
 
 		/*
@@ -231,7 +291,7 @@ distribute_quals_to_rels(PlannerInfo *root, Node *jtnode,
 		foreach(l, (List *) f->quals)
 			distribute_qual_to_rels(root, (Node *) lfirst(l),
 									true, false, below_outer_join,
-									NULL, result);
+									*qualscope, NULL, NULL);
 	}
 	else if (IsA(jtnode, JoinExpr))
 	{
@@ -239,7 +299,10 @@ distribute_quals_to_rels(PlannerInfo *root, Node *jtnode,
 		Relids		leftids,
 					rightids,
 					nonnullable_rels,
-					nullable_rels;
+					ojscope;
+		List	   *leftjoinlist,
+				   *rightjoinlist;
+		OuterJoinInfo *ojinfo;
 		ListCell   *qual;
 
 		/*
@@ -249,55 +312,55 @@ distribute_quals_to_rels(PlannerInfo *root, Node *jtnode,
 		 * Then we place our own join quals, which are restricted by lower
 		 * outer joins in any case, and are forced to this level if this is an
 		 * outer join and they mention the outer side.	Finally, if this is an
-		 * outer join, we mark baserels contained within the inner side(s)
-		 * with our own rel set; this will prevent quals above us in the join
-		 * tree that use those rels from being pushed down below this level.
-		 * (It's okay for upper quals to be pushed down to the outer side,
-		 * however.)
+		 * outer join, we create an oj_info_list entry for the join.  This
+		 * will prevent quals above us in the join tree that use those rels
+		 * from being pushed down below this level.  (It's okay for upper
+		 * quals to be pushed down to the outer side, however.)
 		 */
 		switch (j->jointype)
 		{
 			case JOIN_INNER:
-				leftids = distribute_quals_to_rels(root, j->larg,
-												   below_outer_join);
-				rightids = distribute_quals_to_rels(root, j->rarg,
-													below_outer_join);
-
-				result = bms_union(leftids, rightids);
+				leftjoinlist = deconstruct_recurse(root, j->larg,
+												   below_outer_join,
+												   &leftids);
+				rightjoinlist = deconstruct_recurse(root, j->rarg,
+													below_outer_join,
+													&rightids);
+				*qualscope = bms_union(leftids, rightids);
 				/* Inner join adds no restrictions for quals */
 				nonnullable_rels = NULL;
-				nullable_rels = NULL;
 				break;
 			case JOIN_LEFT:
-				leftids = distribute_quals_to_rels(root, j->larg,
-												   below_outer_join);
-				rightids = distribute_quals_to_rels(root, j->rarg,
-													true);
-
-				result = bms_union(leftids, rightids);
+				leftjoinlist = deconstruct_recurse(root, j->larg,
+												   below_outer_join,
+												   &leftids);
+				rightjoinlist = deconstruct_recurse(root, j->rarg,
+													true,
+													&rightids);
+				*qualscope = bms_union(leftids, rightids);
 				nonnullable_rels = leftids;
-				nullable_rels = rightids;
 				break;
 			case JOIN_FULL:
-				leftids = distribute_quals_to_rels(root, j->larg,
-												   true);
-				rightids = distribute_quals_to_rels(root, j->rarg,
-													true);
-
-				result = bms_union(leftids, rightids);
+				leftjoinlist = deconstruct_recurse(root, j->larg,
+												   true,
+												   &leftids);
+				rightjoinlist = deconstruct_recurse(root, j->rarg,
+													true,
+													&rightids);
+				*qualscope = bms_union(leftids, rightids);
 				/* each side is both outer and inner */
-				nonnullable_rels = result;
-				nullable_rels = result;
+				nonnullable_rels = *qualscope;
 				break;
 			case JOIN_RIGHT:
-				leftids = distribute_quals_to_rels(root, j->larg,
-												   true);
-				rightids = distribute_quals_to_rels(root, j->rarg,
-													below_outer_join);
-
-				result = bms_union(leftids, rightids);
-				nonnullable_rels = rightids;
-				nullable_rels = leftids;
+				/* notice we switch leftids and rightids */
+				leftjoinlist = deconstruct_recurse(root, j->larg,
+												   true,
+												   &rightids);
+				rightjoinlist = deconstruct_recurse(root, j->rarg,
+													below_outer_join,
+													&leftids);
+				*qualscope = bms_union(leftids, rightids);
+				nonnullable_rels = leftids;
 				break;
 			case JOIN_UNION:
 
@@ -309,73 +372,184 @@ distribute_quals_to_rels(PlannerInfo *root, Node *jtnode,
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("UNION JOIN is not implemented")));
 				nonnullable_rels = NULL;		/* keep compiler quiet */
-				nullable_rels = NULL;
+				leftjoinlist = rightjoinlist = NIL;
 				break;
 			default:
 				elog(ERROR, "unrecognized join type: %d",
 					 (int) j->jointype);
 				nonnullable_rels = NULL;		/* keep compiler quiet */
-				nullable_rels = NULL;
+				leftjoinlist = rightjoinlist = NIL;
 				break;
 		}
 
+		/*
+		 * For an OJ, form the OuterJoinInfo now, because we need the OJ's
+		 * semantic scope (ojscope) to pass to distribute_qual_to_rels.
+		 */
+		if (j->jointype != JOIN_INNER)
+		{
+			ojinfo = make_outerjoininfo(root, leftids, rightids,
+										(j->jointype == JOIN_FULL), j->quals);
+			ojscope = bms_union(ojinfo->min_lefthand, ojinfo->min_righthand);
+		}
+		else
+		{
+			ojinfo = NULL;
+			ojscope = NULL;
+		}
+
+		/* Process the qual clauses */
 		foreach(qual, (List *) j->quals)
 			distribute_qual_to_rels(root, (Node *) lfirst(qual),
 									false, false, below_outer_join,
-									nonnullable_rels, result);
+									*qualscope, ojscope, nonnullable_rels);
+
+		/* Now we can add the OuterJoinInfo to oj_info_list */
+		if (ojinfo)
+			root->oj_info_list = lappend(root->oj_info_list, ojinfo);
 
-		if (nullable_rels != NULL)
-			mark_baserels_for_outer_join(root, nullable_rels, result);
+		/*
+		 * Finally, compute the output joinlist.  We fold subproblems together
+		 * except at a FULL JOIN or where join_collapse_limit would be
+		 * exceeded.
+		 */
+		if (j->jointype != JOIN_FULL &&
+			(list_length(leftjoinlist) + list_length(rightjoinlist) <=
+			 join_collapse_limit))
+			joinlist = list_concat(leftjoinlist, rightjoinlist);
+		else					/* force the join order at this node */
+			joinlist = list_make1(list_make2(leftjoinlist, rightjoinlist));
 	}
 	else
+	{
 		elog(ERROR, "unrecognized node type: %d",
 			 (int) nodeTag(jtnode));
-	return result;
+		joinlist = NIL;			/* keep compiler quiet */
+	}
+	return joinlist;
 }
 
 /*
- * mark_baserels_for_outer_join
- *	  Mark all base rels listed in 'rels' as having the given outerjoinset.
+ * make_outerjoininfo
+ *	  Build an OuterJoinInfo for the current outer join
+ *
+ * Inputs:
+ *	left_rels: the base Relids syntactically on outer side of join
+ *	right_rels: the base Relids syntactically on inner side of join
+ *	is_full_join: what it says
+ *	clause: the outer join's join condition
+ *
+ * If the join is a RIGHT JOIN, left_rels and right_rels are switched by
+ * the caller, so that left_rels is always the nonnullable side.  Hence
+ * we need only distinguish the LEFT and FULL cases.
+ *
+ * The node should eventually be put into root->oj_info_list, but we
+ * do not do that here.
  */
-static void
-mark_baserels_for_outer_join(PlannerInfo *root, Relids rels, Relids outerrels)
+static OuterJoinInfo *
+make_outerjoininfo(PlannerInfo *root,
+				   Relids left_rels, Relids right_rels,
+				   bool is_full_join, Node *clause)
 {
-	Relids		tmprelids;
-	int			relno;
+	OuterJoinInfo *ojinfo = makeNode(OuterJoinInfo);
+	Relids		clause_relids;
+	Relids		strict_relids;
+	ListCell   *l;
+
+	/* If it's a full join, no need to be very smart */
+	ojinfo->is_full_join = is_full_join;
+	if (is_full_join)
+	{
+		ojinfo->min_lefthand = left_rels;
+		ojinfo->min_righthand = right_rels;
+		ojinfo->lhs_strict = false;			/* don't care about this */
+		return ojinfo;
+	}
+
+	/*
+	 * Retrieve all relids mentioned within the join clause.
+	 */
+	clause_relids = pull_varnos(clause);
+
+	/*
+	 * For which relids is the clause strict, ie, it cannot succeed if the
+	 * rel's columns are all NULL?
+	 */
+	strict_relids = find_nonnullable_rels(clause);
 
-	tmprelids = bms_copy(rels);
-	while ((relno = bms_first_member(tmprelids)) >= 0)
+	/* Remember whether the clause is strict for any LHS relations */
+	ojinfo->lhs_strict = bms_overlap(strict_relids, left_rels);
+
+	/*
+	 * Required LHS is basically the LHS rels mentioned in the clause...
+	 * but if there aren't any, punt and make it the full LHS, to avoid
+	 * having an empty min_lefthand which will confuse later processing.
+	 * (We don't try to be smart about such cases, just correct.)
+	 * We may have to add more rels based on lower outer joins; see below.
+	 */
+	ojinfo->min_lefthand = bms_intersect(clause_relids, left_rels);
+	if (bms_is_empty(ojinfo->min_lefthand))
+		ojinfo->min_lefthand = bms_copy(left_rels);
+
+	/*
+	 * Required RHS is normally the full set of RHS rels.  Sometimes we
+	 * can exclude some, see below.
+	 */
+	ojinfo->min_righthand = bms_copy(right_rels);
+
+	foreach(l, root->oj_info_list)
 	{
-		RelOptInfo *rel = find_base_rel(root, relno);
+		OuterJoinInfo *otherinfo = (OuterJoinInfo *) lfirst(l);
+
+		/* ignore full joins --- other mechanisms preserve their ordering */
+		if (otherinfo->is_full_join)
+			continue;
 
 		/*
-		 * Since we do this bottom-up, any outer-rels previously marked should
-		 * be within the new outer join set.
+		 * For a lower OJ in our LHS, if our join condition uses the lower
+		 * join's RHS and is not strict for that rel, we must preserve the
+		 * ordering of the two OJs, so add lower OJ's full required relset to
+		 * min_lefthand.
 		 */
-		Assert(bms_is_subset(rel->outerjoinset, outerrels));
-
+		if (bms_overlap(ojinfo->min_lefthand, otherinfo->min_righthand) &&
+			!bms_overlap(strict_relids, otherinfo->min_righthand))
+		{
+			ojinfo->min_lefthand = bms_add_members(ojinfo->min_lefthand,
+												   otherinfo->min_lefthand);
+			ojinfo->min_lefthand = bms_add_members(ojinfo->min_lefthand,
+												   otherinfo->min_righthand);
+		}
 		/*
-		 * Presently the executor cannot support FOR UPDATE/SHARE marking of
-		 * rels appearing on the nullable side of an outer join. (It's
-		 * somewhat unclear what that would mean, anyway: what should we mark
-		 * when a result row is generated from no element of the nullable
-		 * relation?)  So, complain if target rel is FOR UPDATE/SHARE. It's
-		 * sufficient to make this check once per rel, so do it only if rel
-		 * wasn't already known nullable.
+		 * For a lower OJ in our RHS, if our join condition does not use the
+		 * lower join's RHS and the lower OJ's join condition is strict, we
+		 * can interchange the ordering of the two OJs, so exclude the lower
+		 * RHS from our min_righthand.
 		 */
-		if (rel->outerjoinset == NULL)
+		if (bms_overlap(ojinfo->min_righthand, otherinfo->min_righthand) &&
+			!bms_overlap(clause_relids, otherinfo->min_righthand) &&
+			otherinfo->lhs_strict)
 		{
-			if (list_member_int(root->parse->rowMarks, relno))
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("SELECT FOR UPDATE/SHARE cannot be applied to the nullable side of an outer join")));
+			ojinfo->min_righthand = bms_del_members(ojinfo->min_righthand,
+													otherinfo->min_righthand);
 		}
-
-		rel->outerjoinset = outerrels;
 	}
-	bms_free(tmprelids);
+
+	/* Neither set should be empty, else we might get confused later */
+	Assert(!bms_is_empty(ojinfo->min_lefthand));
+	Assert(!bms_is_empty(ojinfo->min_righthand));
+	/* Shouldn't overlap either */
+	Assert(!bms_overlap(ojinfo->min_lefthand, ojinfo->min_righthand));
+
+	return ojinfo;
 }
 
+
+/*****************************************************************************
+ *
+ *	  QUALIFICATIONS
+ *
+ *****************************************************************************/
+
 /*
  * distribute_qual_to_rels
  *	  Add clause information to either the baserestrictinfo or joininfo list
@@ -392,21 +566,26 @@ mark_baserels_for_outer_join(PlannerInfo *root, Relids rels, Relids outerrels)
  * 'is_deduced': TRUE if the qual came from implied-equality deduction
  * 'below_outer_join': TRUE if the qual is from a JOIN/ON that is below the
  *		nullable side of a higher-level outer join.
+ * 'qualscope': set of baserels the qual's syntactic scope covers
+ * 'ojscope': NULL if not an outer-join qual, else the minimum set of baserels
+ *		needed to form this join
  * 'outerjoin_nonnullable': NULL if not an outer-join qual, else the set of
  *		baserels appearing on the outer (nonnullable) side of the join
- * 'qualscope': set of baserels the qual's syntactic scope covers
+ *		(for FULL JOIN this includes both sides of the join, and must in fact
+ *		equal qualscope)
  *
- * 'qualscope' identifies what level of JOIN the qual came from.  For a top
- * level qual (WHERE qual), qualscope lists all baserel ids and in addition
- * 'is_pushed_down' will be TRUE.
+ * 'qualscope' identifies what level of JOIN the qual came from syntactically.
+ * 'ojscope' is needed if we decide to force the qual up to the outer-join
+ * level, which will be ojscope not necessarily qualscope.
  */
 static void
 distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 						bool is_pushed_down,
 						bool is_deduced,
 						bool below_outer_join,
-						Relids outerjoin_nonnullable,
-						Relids qualscope)
+						Relids qualscope,
+						Relids ojscope,
+						Relids outerjoin_nonnullable)
 {
 	Relids		relids;
 	bool		outerjoin_delayed;
@@ -427,16 +606,20 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 	 */
 	if (!bms_is_subset(relids, qualscope))
 		elog(ERROR, "JOIN qualification may not refer to other relations");
+	if (ojscope && !bms_is_subset(relids, ojscope))
+		elog(ERROR, "JOIN qualification may not refer to other relations");
 
 	/*
 	 * If the clause is variable-free, we force it to be evaluated at its
 	 * original syntactic level.  Note that this should not happen for
 	 * top-level clauses, because query_planner() special-cases them.  But it
 	 * will happen for variable-free JOIN/ON clauses.  We don't have to be
-	 * real smart about such a case, we just have to be correct.
+	 * real smart about such a case, we just have to be correct.  Also note
+	 * that for an outer-join clause, we must force it to the OJ's semantic
+	 * level, not the syntactic scope.
 	 */
 	if (bms_is_empty(relids))
-		relids = qualscope;
+		relids = ojscope ? ojscope : qualscope;
 
 	/*
 	 * Check to see if clause application must be delayed by outer-join
@@ -451,6 +634,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 		 * be delayed by outer-join rules.
 		 */
 		Assert(bms_equal(relids, qualscope));
+		Assert(!ojscope);
 		/* Needn't feed it back for more deductions */
 		outerjoin_delayed = false;
 		maybe_equijoin = false;
@@ -471,7 +655,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 		 * result, so we treat it the same as an ordinary inner-join qual,
 		 * except for not setting maybe_equijoin (see below).
 		 */
-		relids = qualscope;
+		Assert(ojscope);
+		relids = ojscope;
 		outerjoin_delayed = true;
 
 		/*
@@ -493,28 +678,27 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 		 * we have all the rels it mentions, and (2) we are at or above any
 		 * outer joins that can null any of these rels and are below the
 		 * syntactic location of the given qual. To enforce the latter, scan
-		 * the base rels listed in relids, and merge their outer-join sets
+		 * the oj_info_list and merge the required-relid sets of any such OJs
 		 * into the clause's own reference list.  At the time we are called,
-		 * the outerjoinset of each baserel will show exactly those outer
-		 * joins that are below the qual in the join tree.
+		 * the oj_info_list contains only outer joins below this qual.
 		 */
 		Relids		addrelids = NULL;
-		Relids		tmprelids;
-		int			relno;
+		ListCell   *l;
 
 		outerjoin_delayed = false;
-		tmprelids = bms_copy(relids);
-		while ((relno = bms_first_member(tmprelids)) >= 0)
+		foreach(l, root->oj_info_list)
 		{
-			RelOptInfo *rel = find_base_rel(root, relno);
+			OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
 
-			if (rel->outerjoinset != NULL)
+			if (bms_overlap(relids, ojinfo->min_righthand) ||
+				(ojinfo->is_full_join &&
+				 bms_overlap(relids, ojinfo->min_lefthand)))
 			{
-				addrelids = bms_add_members(addrelids, rel->outerjoinset);
+				addrelids = bms_add_members(addrelids, ojinfo->min_lefthand);
+				addrelids = bms_add_members(addrelids, ojinfo->min_righthand);
 				outerjoin_delayed = true;
 			}
 		}
-		bms_free(tmprelids);
 
 		if (bms_is_subset(addrelids, relids))
 		{
@@ -553,9 +737,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 	 * its original syntactic level.  This allows us to distinguish original
 	 * JOIN/ON quals from higher-level quals pushed down to the same joinrel.
 	 * A qual originating from WHERE is always considered "pushed down".
+	 * Note that for an outer-join qual, we have to compare to ojscope not
+	 * qualscope.
 	 */
 	if (!is_pushed_down)
-		is_pushed_down = !bms_equal(relids, qualscope);
+		is_pushed_down = !bms_equal(relids, ojscope ? ojscope : qualscope);
 
 	/*
 	 * Build the RestrictInfo node itself.
@@ -864,7 +1050,7 @@ process_implied_equality(PlannerInfo *root,
 	 * taken for an original JOIN/ON clause.
 	 */
 	distribute_qual_to_rels(root, (Node *) clause,
-							true, true, false, NULL, relids);
+							true, true, false, relids, NULL, NULL);
 }
 
 /*
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 06d351bf59d..3729fd2b199 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.90 2005/11/22 18:17:13 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.91 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -83,6 +83,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
 {
 	Query	   *parse = root->parse;
 	List	   *constant_quals;
+	List	   *joinlist;
 	RelOptInfo *final_rel;
 	Path	   *cheapestpath;
 	Path	   *sortedpath;
@@ -134,6 +135,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
 	root->left_join_clauses = NIL;
 	root->right_join_clauses = NIL;
 	root->full_join_clauses = NIL;
+	root->oj_info_list = NIL;
 
 	/*
 	 * Construct RelOptInfo nodes for all base relations in query.
@@ -144,7 +146,8 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
 	 * Examine the targetlist and qualifications, adding entries to baserel
 	 * targetlists for all referenced Vars.  Restrict and join clauses are
 	 * added to appropriate lists belonging to the mentioned relations.  We
-	 * also build lists of equijoined keys for pathkey construction.
+	 * also build lists of equijoined keys for pathkey construction, and
+	 * form a target joinlist for make_one_rel() to work from.
 	 *
 	 * Note: all subplan nodes will have "flat" (var-only) tlists. This
 	 * implies that all expression evaluations are done at the root of the
@@ -154,7 +157,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
 	 */
 	build_base_rel_tlists(root, tlist);
 
-	(void) distribute_quals_to_rels(root, (Node *) parse->jointree, false);
+	joinlist = deconstruct_jointree(root);
 
 	/*
 	 * Use the completed lists of equijoined keys to deduce any implied but
@@ -175,7 +178,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
 	/*
 	 * Ready to do the primary planning.
 	 */
-	final_rel = make_one_rel(root);
+	final_rel = make_one_rel(root, joinlist);
 
 	if (!final_rel || !final_rel->cheapest_total_path)
 		elog(ERROR, "failed to construct the join relation");
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 9f6d0957b1e..0dd9e1e8d2a 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.195 2005/11/22 18:17:13 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.196 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -352,17 +352,6 @@ subquery_planner(Query *parse, double tuple_fraction,
 	if (root->hasOuterJoins)
 		reduce_outer_joins(root);
 
-	/*
-	 * See if we can simplify the jointree; opportunities for this may come
-	 * from having pulled up subqueries, or from flattening explicit JOIN
-	 * syntax.	We must do this after flattening JOIN alias variables, since
-	 * eliminating explicit JOIN nodes from the jointree will cause
-	 * get_relids_for_join() to fail.  But it should happen after
-	 * reduce_outer_joins, anyway.
-	 */
-	parse->jointree = (FromExpr *)
-		simplify_jointree(root, (Node *) parse->jointree);
-
 	/*
 	 * Do the main planning.  If we have an inherited target relation, that
 	 * needs special processing, else go straight to grouping_planner.
@@ -567,6 +556,8 @@ inheritance_planner(PlannerInfo *root, List *inheritlist)
 			adjust_inherited_attrs((Node *) root->in_info_list,
 								   parentRTindex, parentOID,
 								   childRTindex, childOID);
+		/* There shouldn't be any OJ info to translate, though */
+		Assert(subroot.oj_info_list == NIL);
 
 		/* Generate plan */
 		subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index cc3d904eca5..a25787685b1 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -8,7 +8,6 @@
  *		pull_up_subqueries
  *		do expression preprocessing (including flattening JOIN alias vars)
  *		reduce_outer_joins
- *		simplify_jointree
  *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
@@ -16,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.32 2005/11/22 18:17:14 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.33 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,11 +30,6 @@
 #include "utils/lsyscache.h"
 
 
-/* These parameters are set by GUC */
-int			from_collapse_limit;
-int			join_collapse_limit;
-
-
 typedef struct reduce_outer_joins_state
 {
 	Relids		relids;			/* base relids within this subtree */
@@ -52,7 +46,6 @@ static void reduce_outer_joins_pass2(Node *jtnode,
 						 reduce_outer_joins_state *state,
 						 PlannerInfo *root,
 						 Relids nonnullable_rels);
-static Relids find_nonnullable_rels(Node *node, bool top_level);
 static void fix_in_clause_relids(List *in_info_list, int varno,
 					 Relids subrelids);
 static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
@@ -334,6 +327,13 @@ pull_up_subqueries(PlannerInfo *root, Node *jtnode, bool below_outer_join)
 			root->in_info_list = list_concat(root->in_info_list,
 											 subroot->in_info_list);
 
+			/*
+			 * We don't have to do the equivalent bookkeeping for outer-join
+			 * info, because that hasn't been set up yet.
+			 */
+			Assert(root->oj_info_list == NIL);
+			Assert(subroot->oj_info_list == NIL);
+
 			/*
 			 * Miscellaneous housekeeping.
 			 */
@@ -695,7 +695,7 @@ reduce_outer_joins_pass2(Node *jtnode,
 		Relids		pass_nonnullable;
 
 		/* Scan quals to see if we can add any nonnullability constraints */
-		pass_nonnullable = find_nonnullable_rels(f->quals, true);
+		pass_nonnullable = find_nonnullable_rels(f->quals);
 		pass_nonnullable = bms_add_members(pass_nonnullable,
 										   nonnullable_rels);
 		/* And recurse --- but only into interesting subtrees */
@@ -772,7 +772,7 @@ reduce_outer_joins_pass2(Node *jtnode,
 			 */
 			if (jointype != JOIN_FULL)
 			{
-				local_nonnullable = find_nonnullable_rels(j->quals, true);
+				local_nonnullable = find_nonnullable_rels(j->quals);
 				local_nonnullable = bms_add_members(local_nonnullable,
 													nonnullable_rels);
 			}
@@ -805,256 +805,6 @@ reduce_outer_joins_pass2(Node *jtnode,
 			 (int) nodeTag(jtnode));
 }
 
-/*
- * find_nonnullable_rels
- *		Determine which base rels are forced nonnullable by given quals
- *
- * We don't use expression_tree_walker here because we don't want to
- * descend through very many kinds of nodes; only the ones we can be sure
- * are strict.	We can descend through the top level of implicit AND'ing,
- * but not through any explicit ANDs (or ORs) below that, since those are not
- * strict constructs.  The List case handles the top-level implicit AND list
- * as well as lists of arguments to strict operators/functions.
- */
-static Relids
-find_nonnullable_rels(Node *node, bool top_level)
-{
-	Relids		result = NULL;
-
-	if (node == NULL)
-		return NULL;
-	if (IsA(node, Var))
-	{
-		Var		   *var = (Var *) node;
-
-		if (var->varlevelsup == 0)
-			result = bms_make_singleton(var->varno);
-	}
-	else if (IsA(node, List))
-	{
-		ListCell   *l;
-
-		foreach(l, (List *) node)
-		{
-			result = bms_join(result, find_nonnullable_rels(lfirst(l),
-															top_level));
-		}
-	}
-	else if (IsA(node, FuncExpr))
-	{
-		FuncExpr   *expr = (FuncExpr *) node;
-
-		if (func_strict(expr->funcid))
-			result = find_nonnullable_rels((Node *) expr->args, false);
-	}
-	else if (IsA(node, OpExpr))
-	{
-		OpExpr	   *expr = (OpExpr *) node;
-
-		if (op_strict(expr->opno))
-			result = find_nonnullable_rels((Node *) expr->args, false);
-	}
-	else if (IsA(node, BoolExpr))
-	{
-		BoolExpr   *expr = (BoolExpr *) node;
-
-		/* NOT is strict, others are not */
-		if (expr->boolop == NOT_EXPR)
-			result = find_nonnullable_rels((Node *) expr->args, false);
-	}
-	else if (IsA(node, RelabelType))
-	{
-		RelabelType *expr = (RelabelType *) node;
-
-		result = find_nonnullable_rels((Node *) expr->arg, top_level);
-	}
-	else if (IsA(node, ConvertRowtypeExpr))
-	{
-		/* not clear this is useful, but it can't hurt */
-		ConvertRowtypeExpr *expr = (ConvertRowtypeExpr *) node;
-
-		result = find_nonnullable_rels((Node *) expr->arg, top_level);
-	}
-	else if (IsA(node, NullTest))
-	{
-		NullTest   *expr = (NullTest *) node;
-
-		/*
-		 * IS NOT NULL can be considered strict, but only at top level; else
-		 * we might have something like NOT (x IS NOT NULL).
-		 */
-		if (top_level && expr->nulltesttype == IS_NOT_NULL)
-			result = find_nonnullable_rels((Node *) expr->arg, false);
-	}
-	else if (IsA(node, BooleanTest))
-	{
-		BooleanTest *expr = (BooleanTest *) node;
-
-		/*
-		 * Appropriate boolean tests are strict at top level.
-		 */
-		if (top_level &&
-			(expr->booltesttype == IS_TRUE ||
-			 expr->booltesttype == IS_FALSE ||
-			 expr->booltesttype == IS_NOT_UNKNOWN))
-			result = find_nonnullable_rels((Node *) expr->arg, false);
-	}
-	return result;
-}
-
-/*
- * simplify_jointree
- *		Attempt to simplify a query's jointree.
- *
- * If we succeed in pulling up a subquery then we might form a jointree
- * in which a FromExpr is a direct child of another FromExpr.  In that
- * case we can consider collapsing the two FromExprs into one.	This is
- * an optional conversion, since the planner will work correctly either
- * way.  But we may find a better plan (at the cost of more planning time)
- * if we merge the two nodes, creating a single join search space out of
- * two.  To allow the user to trade off planning time against plan quality,
- * we provide a control parameter from_collapse_limit that limits the size
- * of the join search space that can be created this way.
- *
- * We also consider flattening explicit inner JOINs into FromExprs (which
- * will in turn allow them to be merged into parent FromExprs).  The tradeoffs
- * here are the same as for flattening FromExprs, but we use a different
- * control parameter so that the user can use explicit JOINs to control the
- * join order even when they are inner JOINs.
- *
- * NOTE: don't try to do this in the same jointree scan that does subquery
- * pullup!	Since we're changing the jointree structure here, that wouldn't
- * work reliably --- see comments for pull_up_subqueries().
- */
-Node *
-simplify_jointree(PlannerInfo *root, Node *jtnode)
-{
-	if (jtnode == NULL)
-		return NULL;
-	if (IsA(jtnode, RangeTblRef))
-	{
-		/* nothing to do here... */
-	}
-	else if (IsA(jtnode, FromExpr))
-	{
-		FromExpr   *f = (FromExpr *) jtnode;
-		List	   *newlist = NIL;
-		int			children_remaining;
-		ListCell   *l;
-
-		children_remaining = list_length(f->fromlist);
-		foreach(l, f->fromlist)
-		{
-			Node	   *child = (Node *) lfirst(l);
-
-			children_remaining--;
-			/* Recursively simplify this child... */
-			child = simplify_jointree(root, child);
-			/* Now, is it a FromExpr? */
-			if (child && IsA(child, FromExpr))
-			{
-				/*
-				 * Yes, so do we want to merge it into parent?	Always do so
-				 * if child has just one element (since that doesn't make the
-				 * parent's list any longer).  Otherwise merge if the
-				 * resulting join list would be no longer than
-				 * from_collapse_limit.
-				 */
-				FromExpr   *subf = (FromExpr *) child;
-				int			childlen = list_length(subf->fromlist);
-				int			myothers = list_length(newlist) + children_remaining;
-
-				if (childlen <= 1 ||
-					(childlen + myothers) <= from_collapse_limit)
-				{
-					newlist = list_concat(newlist, subf->fromlist);
-
-					/*
-					 * By now, the quals have been converted to implicit-AND
-					 * lists, so we just need to join the lists.  NOTE: we put
-					 * the pulled-up quals first.
-					 */
-					f->quals = (Node *) list_concat((List *) subf->quals,
-													(List *) f->quals);
-				}
-				else
-					newlist = lappend(newlist, child);
-			}
-			else
-				newlist = lappend(newlist, child);
-		}
-		f->fromlist = newlist;
-	}
-	else if (IsA(jtnode, JoinExpr))
-	{
-		JoinExpr   *j = (JoinExpr *) jtnode;
-
-		/* Recursively simplify the children... */
-		j->larg = simplify_jointree(root, j->larg);
-		j->rarg = simplify_jointree(root, j->rarg);
-
-		/*
-		 * If it is an outer join, we must not flatten it.	An inner join is
-		 * semantically equivalent to a FromExpr; we convert it to one,
-		 * allowing it to be flattened into its parent, if the resulting
-		 * FromExpr would have no more than join_collapse_limit members.
-		 */
-		if (j->jointype == JOIN_INNER && join_collapse_limit > 1)
-		{
-			int			leftlen,
-						rightlen;
-
-			if (j->larg && IsA(j->larg, FromExpr))
-				leftlen = list_length(((FromExpr *) j->larg)->fromlist);
-			else
-				leftlen = 1;
-			if (j->rarg && IsA(j->rarg, FromExpr))
-				rightlen = list_length(((FromExpr *) j->rarg)->fromlist);
-			else
-				rightlen = 1;
-			if ((leftlen + rightlen) <= join_collapse_limit)
-			{
-				FromExpr   *f = makeNode(FromExpr);
-
-				f->fromlist = NIL;
-				f->quals = NULL;
-
-				if (j->larg && IsA(j->larg, FromExpr))
-				{
-					FromExpr   *subf = (FromExpr *) j->larg;
-
-					f->fromlist = subf->fromlist;
-					f->quals = subf->quals;
-				}
-				else
-					f->fromlist = list_make1(j->larg);
-
-				if (j->rarg && IsA(j->rarg, FromExpr))
-				{
-					FromExpr   *subf = (FromExpr *) j->rarg;
-
-					f->fromlist = list_concat(f->fromlist,
-											  subf->fromlist);
-					f->quals = (Node *) list_concat((List *) f->quals,
-													(List *) subf->quals);
-				}
-				else
-					f->fromlist = lappend(f->fromlist, j->rarg);
-
-				/* pulled-up quals first */
-				f->quals = (Node *) list_concat((List *) f->quals,
-												(List *) j->quals);
-
-				return (Node *) f;
-			}
-		}
-	}
-	else
-		elog(ERROR, "unrecognized node type: %d",
-			 (int) nodeTag(jtnode));
-	return jtnode;
-}
-
 /*
  * fix_in_clause_relids: update RT-index sets of InClauseInfo nodes
  *
@@ -1128,9 +878,6 @@ get_relids_in_jointree(Node *jtnode)
 
 /*
  * get_relids_for_join: get set of base RT indexes making up a join
- *
- * NB: this will not work reliably after simplify_jointree() is run,
- * since that may eliminate join nodes from the jointree.
  */
 Relids
 get_relids_for_join(PlannerInfo *root, int joinrelid)
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 23b229815a4..2cdb3b35739 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.203 2005/11/22 18:17:14 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.204 2005/12/20 02:30:36 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -69,6 +69,7 @@ static bool contain_subplans_walker(Node *node, void *context);
 static bool contain_mutable_functions_walker(Node *node, void *context);
 static bool contain_volatile_functions_walker(Node *node, void *context);
 static bool contain_nonstrict_functions_walker(Node *node, void *context);
+static Relids find_nonnullable_rels_walker(Node *node, bool top_level);
 static bool set_coercionform_dontcare_walker(Node *node, void *context);
 static Node *eval_const_expressions_mutator(Node *node,
 							   eval_const_expressions_context *context);
@@ -861,6 +862,131 @@ contain_nonstrict_functions_walker(Node *node, void *context)
 }
 
 
+/*
+ * find_nonnullable_rels
+ *		Determine which base rels are forced nonnullable by given clause.
+ *
+ * Returns the set of all Relids that are referenced in the clause in such
+ * a way that the clause cannot possibly return TRUE if any of these Relids
+ * is an all-NULL row.  (It is OK to err on the side of conservatism; hence
+ * the analysis here is simplistic.)
+ *
+ * The semantics here are subtly different from contain_nonstrict_functions:
+ * that function is concerned with NULL results from arbitrary expressions,
+ * but here we assume that the input is a Boolean expression, and wish to
+ * see if NULL inputs will provably cause a FALSE-or-NULL result.  We expect
+ * the expression to have been AND/OR flattened and converted to implicit-AND
+ * format.
+ *
+ * We don't use expression_tree_walker here because we don't want to
+ * descend through very many kinds of nodes; only the ones we can be sure
+ * are strict.	We can descend through the top level of implicit AND'ing,
+ * but not through any explicit ANDs (or ORs) below that, since those are not
+ * strict constructs.  The List case handles the top-level implicit AND list
+ * as well as lists of arguments to strict operators/functions.
+ */
+Relids
+find_nonnullable_rels(Node *clause)
+{
+	return find_nonnullable_rels_walker(clause, true);
+}
+
+static Relids
+find_nonnullable_rels_walker(Node *node, bool top_level)
+{
+	Relids		result = NULL;
+
+	if (node == NULL)
+		return NULL;
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		if (var->varlevelsup == 0)
+			result = bms_make_singleton(var->varno);
+	}
+	else if (IsA(node, List))
+	{
+		ListCell   *l;
+
+		foreach(l, (List *) node)
+		{
+			result = bms_join(result,
+							  find_nonnullable_rels_walker(lfirst(l),
+														   top_level));
+		}
+	}
+	else if (IsA(node, FuncExpr))
+	{
+		FuncExpr   *expr = (FuncExpr *) node;
+
+		if (func_strict(expr->funcid))
+			result = find_nonnullable_rels_walker((Node *) expr->args, false);
+	}
+	else if (IsA(node, OpExpr))
+	{
+		OpExpr	   *expr = (OpExpr *) node;
+
+		if (op_strict(expr->opno))
+			result = find_nonnullable_rels_walker((Node *) expr->args, false);
+	}
+	else if (IsA(node, ScalarArrayOpExpr))
+	{
+		/* Strict if it's "foo op ANY array" and op is strict */
+		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
+
+		if (expr->useOr && op_strict(expr->opno))
+			result = find_nonnullable_rels_walker((Node *) expr->args, false);
+	}
+	else if (IsA(node, BoolExpr))
+	{
+		BoolExpr   *expr = (BoolExpr *) node;
+
+		/* NOT is strict, others are not */
+		if (expr->boolop == NOT_EXPR)
+			result = find_nonnullable_rels_walker((Node *) expr->args, false);
+	}
+	else if (IsA(node, RelabelType))
+	{
+		RelabelType *expr = (RelabelType *) node;
+
+		result = find_nonnullable_rels_walker((Node *) expr->arg, top_level);
+	}
+	else if (IsA(node, ConvertRowtypeExpr))
+	{
+		/* not clear this is useful, but it can't hurt */
+		ConvertRowtypeExpr *expr = (ConvertRowtypeExpr *) node;
+
+		result = find_nonnullable_rels_walker((Node *) expr->arg, top_level);
+	}
+	else if (IsA(node, NullTest))
+	{
+		NullTest   *expr = (NullTest *) node;
+
+		/*
+		 * IS NOT NULL can be considered strict, but only at top level; else
+		 * we might have something like NOT (x IS NOT NULL).
+		 */
+		if (top_level && expr->nulltesttype == IS_NOT_NULL)
+			result = find_nonnullable_rels_walker((Node *) expr->arg, false);
+	}
+	else if (IsA(node, BooleanTest))
+	{
+		BooleanTest *expr = (BooleanTest *) node;
+
+		/*
+		 * Appropriate boolean tests are strict at top level.
+		 */
+		if (top_level &&
+			(expr->booltesttype == IS_TRUE ||
+			 expr->booltesttype == IS_FALSE ||
+			 expr->booltesttype == IS_NOT_UNKNOWN))
+			result = find_nonnullable_rels_walker((Node *) expr->arg, false);
+	}
+	return result;
+}
+
+
 /*****************************************************************************
  *		Check for "pseudo-constant" clauses
  *****************************************************************************/
@@ -2794,7 +2920,8 @@ expression_tree_walker(Node *node,
 		case T_CaseTestExpr:
 		case T_SetToDefault:
 		case T_RangeTblRef:
-			/* primitive node types with no subnodes */
+		case T_OuterJoinInfo:
+			/* primitive node types with no expression subnodes */
 			break;
 		case T_Aggref:
 			return walker(((Aggref *) node)->target, context);
@@ -3191,7 +3318,8 @@ expression_tree_mutator(Node *node,
 		case T_CaseTestExpr:
 		case T_SetToDefault:
 		case T_RangeTblRef:
-			/* primitive node types with no subnodes */
+		case T_OuterJoinInfo:
+			/* primitive node types with no expression subnodes */
 			return (Node *) copyObject(node);
 		case T_Aggref:
 			{
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index ed8f4148e33..cef0c63a66f 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.73 2005/11/22 18:17:15 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.74 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -133,7 +133,6 @@ make_reloptinfo(PlannerInfo *root, int relid, RelOptKind reloptkind)
 	rel->baserestrictinfo = NIL;
 	rel->baserestrictcost.startup = 0;
 	rel->baserestrictcost.per_tuple = 0;
-	rel->outerjoinset = NULL;
 	rel->joininfo = NIL;
 	rel->index_outer_relids = NULL;
 	rel->index_inner_paths = NIL;
@@ -369,7 +368,6 @@ build_join_rel(PlannerInfo *root,
 	joinrel->baserestrictinfo = NIL;
 	joinrel->baserestrictcost.startup = 0;
 	joinrel->baserestrictcost.per_tuple = 0;
-	joinrel->outerjoinset = NULL;
 	joinrel->joininfo = NIL;
 	joinrel->index_outer_relids = NULL;
 	joinrel->index_inner_paths = NIL;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 01e66f69762..b7c4599a030 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.301 2005/11/22 18:17:26 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.302 2005/12/20 02:30:36 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -45,7 +45,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/geqo.h"
 #include "optimizer/paths.h"
-#include "optimizer/prep.h"
+#include "optimizer/planmain.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_relation.h"
 #include "postmaster/autovacuum.h"
@@ -1010,7 +1010,7 @@ static struct config_int ConfigureNamesInt[] =
 		{"join_collapse_limit", PGC_USERSET, QUERY_TUNING_OTHER,
 			gettext_noop("Sets the FROM-list size beyond which JOIN constructs are not "
 						 "flattened."),
-			gettext_noop("The planner will flatten explicit inner JOIN "
+			gettext_noop("The planner will flatten explicit JOIN "
 			"constructs into lists of FROM items whenever a list of no more "
 						 "than this many items would result.")
 		},
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index e9ec4b8ad65..0d6a4871ac2 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.178 2005/11/22 18:17:30 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.179 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -186,6 +186,7 @@ typedef enum NodeTag
 	T_PathKeyItem,
 	T_RestrictInfo,
 	T_InnerIndexscanInfo,
+	T_OuterJoinInfo,
 	T_InClauseInfo,
 
 	/*
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 1cdd64b26eb..40fda441b97 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.109 2005/10/15 02:49:45 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.110 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -845,12 +845,7 @@ typedef struct TargetEntry
  * or qualified join.  Also, FromExpr nodes can appear to denote an
  * ordinary cross-product join ("FROM foo, bar, baz WHERE ...").
  * FromExpr is like a JoinExpr of jointype JOIN_INNER, except that it
- * may have any number of child nodes, not just two.  Also, there is an
- * implementation-defined difference: the planner is allowed to join the
- * children of a FromExpr using whatever join order seems good to it.
- * At present, JoinExpr nodes are always joined in exactly the order
- * implied by the jointree structure (except the planner may choose to
- * swap inner and outer members of a join pair).
+ * may have any number of child nodes, not just two.
  *
  * NOTE: the top level of a Query's jointree is always a FromExpr.
  * Even if the jointree contains no rels, there will be a FromExpr.
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index aa6217d0313..1d490fc17bf 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.121 2005/11/26 22:14:57 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.122 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -97,6 +97,8 @@ typedef struct PlannerInfo
 	List	   *full_join_clauses;		/* list of RestrictInfos for full
 										 * outer join clauses */
 
+	List	   *oj_info_list;	/* list of OuterJoinInfos */
+
 	List	   *in_info_list;	/* list of InClauseInfos */
 
 	List	   *query_pathkeys; /* desired pathkeys for query_planner(), and
@@ -201,10 +203,6 @@ typedef struct PlannerInfo
  *					participates (only used for base rels)
  *		baserestrictcost - Estimated cost of evaluating the baserestrictinfo
  *					clauses at a single tuple (only used for base rels)
- *		outerjoinset - For a base rel: if the rel appears within the nullable
- *					side of an outer join, the set of all relids
- *					participating in the highest such outer join; else NULL.
- *					Otherwise, unused.
  *		joininfo  - List of RestrictInfo nodes, containing info about each
  *					join clause in which this relation participates
  *		index_outer_relids - only used for base rels; set of outer relids
@@ -228,10 +226,6 @@ typedef struct PlannerInfo
  * We store baserestrictcost in the RelOptInfo (for base relations) because
  * we know we will need it at least once (to price the sequential scan)
  * and may need it multiple times to price index scans.
- *
- * outerjoinset is used to ensure correct placement of WHERE clauses that
- * apply to outer-joined relations; we must not apply such WHERE clauses
- * until after the outer join is performed.
  *----------
  */
 typedef enum RelOptKind
@@ -277,7 +271,6 @@ typedef struct RelOptInfo
 	List	   *baserestrictinfo;		/* RestrictInfo structures (if base
 										 * rel) */
 	QualCost	baserestrictcost;		/* cost of evaluating the above */
-	Relids		outerjoinset;	/* set of base relids */
 	List	   *joininfo;		/* RestrictInfo structures for join clauses
 								 * involving this rel */
 
@@ -830,6 +823,40 @@ typedef struct InnerIndexscanInfo
 	Path	   *best_innerpath; /* best inner indexscan, or NULL if none */
 } InnerIndexscanInfo;
 
+/*
+ * Outer join info.
+ *
+ * One-sided outer joins constrain the order of joining partially but not
+ * completely.  We flatten such joins into the planner's top-level list of
+ * relations to join, but record information about each outer join in an
+ * OuterJoinInfo struct.  These structs are kept in the PlannerInfo node's
+ * oj_info_list.
+ *
+ * min_lefthand and min_righthand are the sets of base relids that must be
+ * available on each side when performing the outer join.  lhs_strict is
+ * true if the outer join's condition cannot succeed when the LHS variables
+ * are all NULL (this means that the outer join can commute with upper-level
+ * outer joins even if it appears in their RHS).  We don't bother to set
+ * lhs_strict for FULL JOINs, however.
+ *
+ * It is not valid for either min_lefthand or min_righthand to be empty sets;
+ * if they were, this would break the logic that enforces join order.
+ *
+ * Note: OuterJoinInfo directly represents only LEFT JOIN and FULL JOIN;
+ * RIGHT JOIN is handled by switching the inputs to make it a LEFT JOIN.
+ * We make an OuterJoinInfo for FULL JOINs even though there is no flexibility
+ * of planning for them, because this simplifies make_join_rel()'s API.
+ */
+
+typedef struct OuterJoinInfo
+{
+	NodeTag		type;
+	Relids		min_lefthand;	/* base relids in minimum LHS for join */
+	Relids		min_righthand;	/* base relids in minimum RHS for join */
+	bool		is_full_join;	/* it's a FULL OUTER JOIN */
+	bool		lhs_strict;		/* joinclause is strict for some LHS rel */
+} OuterJoinInfo;
+
 /*
  * IN clause info.
  *
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index bf00f2cc97e..0d3770dc5c4 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.80 2005/10/15 02:49:45 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.81 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -57,6 +57,7 @@ extern bool contain_subplans(Node *clause);
 extern bool contain_mutable_functions(Node *clause);
 extern bool contain_volatile_functions(Node *clause);
 extern bool contain_nonstrict_functions(Node *clause);
+extern Relids find_nonnullable_rels(Node *clause);
 
 extern bool is_pseudo_constant_clause(Node *clause);
 extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index eba65c699c0..afe3a70d71b 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.89 2005/11/25 19:47:50 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.90 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,8 +23,7 @@
 extern bool enable_geqo;
 extern int	geqo_threshold;
 
-extern RelOptInfo *make_one_rel(PlannerInfo *root);
-extern RelOptInfo *make_fromexpr_rel(PlannerInfo *root, FromExpr *from);
+extern RelOptInfo *make_one_rel(PlannerInfo *root, List *joinlist);
 
 #ifdef OPTIMIZER_DEBUG
 extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
@@ -88,10 +87,8 @@ extern void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
  *	  routines to determine which relations to join
  */
 extern List *make_rels_by_joins(PlannerInfo *root, int level, List **joinrels);
-extern RelOptInfo *make_jointree_rel(PlannerInfo *root, Node *jtnode);
 extern RelOptInfo *make_join_rel(PlannerInfo *root,
-			  RelOptInfo *rel1, RelOptInfo *rel2,
-			  JoinType jointype);
+			  RelOptInfo *rel1, RelOptInfo *rel2);
 
 /*
  * pathkeys.c
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 0e78933f792..97d2287b5c8 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.90 2005/10/15 02:49:45 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.91 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -65,10 +65,12 @@ extern bool is_projection_capable_plan(Plan *plan);
 /*
  * prototypes for plan/initsplan.c
  */
+extern int	from_collapse_limit;
+extern int	join_collapse_limit;
+
 extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
 extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
-extern Relids distribute_quals_to_rels(PlannerInfo *root, Node *jtnode,
-						 bool below_outer_join);
+extern List *deconstruct_jointree(PlannerInfo *root);
 extern void process_implied_equality(PlannerInfo *root,
 						 Node *item1, Node *item2,
 						 Oid sortop1, Oid sortop2,
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
index c26e6491f34..ce89771b179 100644
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/prep.h,v 1.52 2005/10/15 02:49:45 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/prep.h,v 1.53 2005/12/20 02:30:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,14 +21,10 @@
 /*
  * prototypes for prepjointree.c
  */
-extern int	from_collapse_limit;
-extern int	join_collapse_limit;
-
 extern Node *pull_up_IN_clauses(PlannerInfo *root, Node *node);
 extern Node *pull_up_subqueries(PlannerInfo *root, Node *jtnode,
 				   bool below_outer_join);
 extern void reduce_outer_joins(PlannerInfo *root);
-extern Node *simplify_jointree(PlannerInfo *root, Node *jtnode);
 extern Relids get_relids_in_jointree(Node *jtnode);
 extern Relids get_relids_for_join(PlannerInfo *root, int joinrelid);
 
-- 
GitLab