From 2e46b762eb1c15de2bcda785469a753a753747fb Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 28 Jan 2003 22:13:41 +0000
Subject: [PATCH] Extend join-selectivity API (oprjoin interface) so that join
 type is passed to join selectivity estimators.  Make use of this in eqjoinsel
 to derive non-bogus selectivity for IN clauses.  Further tweaking of cost
 estimation for IN. initdb forced because of pg_proc.h changes.

---
 doc/src/sgml/indexcost.sgml              |   5 +-
 src/backend/catalog/pg_operator.c        |   7 +-
 src/backend/optimizer/path/clausesel.c   |  38 ++++---
 src/backend/optimizer/path/costsize.c    | 125 +++++++++++++----------
 src/backend/optimizer/path/indxpath.c    |   8 +-
 src/backend/optimizer/plan/subselect.c   |   4 +-
 src/backend/optimizer/util/plancat.c     |  13 +--
 src/backend/utils/adt/selfuncs.c         |  75 ++++++++++----
 src/include/catalog/catversion.h         |   4 +-
 src/include/catalog/pg_proc.h            |  32 +++---
 src/include/optimizer/cost.h             |  17 +--
 src/include/optimizer/plancat.h          |   7 +-
 src/include/utils/selfuncs.h             |   4 +-
 src/test/regress/expected/opr_sanity.out |   7 +-
 src/test/regress/expected/subselect.out  |   4 +-
 src/test/regress/sql/opr_sanity.sql      |   7 +-
 16 files changed, 221 insertions(+), 136 deletions(-)

diff --git a/doc/src/sgml/indexcost.sgml b/doc/src/sgml/indexcost.sgml
index 1211653edd2..6c8c940c100 100644
--- a/doc/src/sgml/indexcost.sgml
+++ b/doc/src/sgml/indexcost.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.14 2003/01/14 10:19:02 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.15 2003/01/28 22:13:24 tgl Exp $
 -->
 
  <chapter id="indexcost">
@@ -205,7 +205,8 @@ amcostestimate (Query *root,
 
      <programlisting>
 *indexSelectivity = clauselist_selectivity(root, indexQuals,
-                                           lfirsti(rel->relids));
+                                           lfirsti(rel->relids),
+                                           JOIN_INNER);
      </programlisting>
     </para>
    </step>
diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c
index 941212a649f..4c09a40b1d7 100644
--- a/src/backend/catalog/pg_operator.c
+++ b/src/backend/catalog/pg_operator.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $
  *
  * NOTES
  *	  these routines moved here from commands/define.c and somewhat cleaned up.
@@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName,
 		typeId[0] = INTERNALOID;	/* Query */
 		typeId[1] = OIDOID;		/* operator OID */
 		typeId[2] = INTERNALOID;	/* args list */
+		typeId[3] = INT2OID;	/* jointype */
 
-		joinOid = LookupFuncName(joinName, 3, typeId);
+		joinOid = LookupFuncName(joinName, 4, typeId);
 		if (!OidIsValid(joinOid))
-			func_error("OperatorDef", joinName, 3, typeId, NULL);
+			func_error("OperatorDef", joinName, 4, typeId, NULL);
 	}
 	else
 		joinOid = InvalidOid;
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 84041a566d1..9df0a794782 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 Selectivity
 restrictlist_selectivity(Query *root,
 						 List *restrictinfo_list,
-						 int varRelid)
+						 int varRelid,
+						 JoinType jointype)
 {
 	List	   *clauselist = get_actual_clauses(restrictinfo_list);
 	Selectivity result;
 
-	result = clauselist_selectivity(root, clauselist, varRelid);
+	result = clauselist_selectivity(root, clauselist, varRelid, jointype);
 	freeList(clauselist);
 	return result;
 }
@@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root,
  *	  expression clauses.  The list can be empty, in which case 1.0
  *	  must be returned.
  *
- * See clause_selectivity() for the meaning of the varRelid parameter.
+ * See clause_selectivity() for the meaning of the additional parameters.
  *
  * Our basic approach is to take the product of the selectivities of the
  * subclauses.	However, that's only right if the subclauses have independent
@@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root,
 Selectivity
 clauselist_selectivity(Query *root,
 					   List *clauses,
-					   int varRelid)
+					   int varRelid,
+					   JoinType jointype)
 {
 	Selectivity s1 = 1.0;
 	RangeQueryClause *rqlist = NULL;
@@ -184,7 +186,7 @@ clauselist_selectivity(Query *root,
 			}
 		}
 		/* Not the right form, so treat it generically. */
-		s2 = clause_selectivity(root, clause, varRelid);
+		s2 = clause_selectivity(root, clause, varRelid, jointype);
 		s1 = s1 * s2;
 	}
 
@@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
  *
  * When varRelid is 0, all variables are treated as variables.	This
  * is appropriate for ordinary join clauses and restriction clauses.
+ *
+ * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
+ * if the clause isn't a join clause or the context is uncertain.
  */
 Selectivity
 clause_selectivity(Query *root,
 				   Node *clause,
-				   int varRelid)
+				   int varRelid,
+				   JoinType jointype)
 {
 	Selectivity s1 = 1.0;		/* default for any unhandled clause type */
 
@@ -424,14 +430,16 @@ clause_selectivity(Query *root,
 		/* inverse of the selectivity of the underlying clause */
 		s1 = 1.0 - clause_selectivity(root,
 							  (Node *) get_notclausearg((Expr *) clause),
-									  varRelid);
+									  varRelid,
+									  jointype);
 	}
 	else if (and_clause(clause))
 	{
 		/* share code with clauselist_selectivity() */
 		s1 = clauselist_selectivity(root,
 									((BoolExpr *) clause)->args,
-									varRelid);
+									varRelid,
+									jointype);
 	}
 	else if (or_clause(clause))
 	{
@@ -447,7 +455,8 @@ clause_selectivity(Query *root,
 		{
 			Selectivity s2 = clause_selectivity(root,
 												(Node *) lfirst(arg),
-												varRelid);
+												varRelid,
+												jointype);
 
 			s1 = s1 + s2 - s1 * s2;
 		}
@@ -479,7 +488,8 @@ clause_selectivity(Query *root,
 		{
 			/* Estimate selectivity for a join clause. */
 			s1 = join_selectivity(root, opno,
-								  ((OpExpr *) clause)->args);
+								  ((OpExpr *) clause)->args,
+								  jointype);
 		}
 		else
 		{
@@ -519,14 +529,16 @@ clause_selectivity(Query *root,
 		s1 = booltestsel(root,
 						 ((BooleanTest *) clause)->booltesttype,
 						 (Node *) ((BooleanTest *) clause)->arg,
-						 varRelid);
+						 varRelid,
+						 jointype);
 	}
 	else if (IsA(clause, RelabelType))
 	{
 		/* Not sure this case is needed, but it can't hurt */
 		s1 = clause_selectivity(root,
 								(Node *) ((RelabelType *) clause)->arg,
-								varRelid);
+								varRelid,
+								jointype);
 	}
 
 #ifdef SELECTIVITY_DEBUG
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index d18e29ad6f4..56282406129 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -104,7 +104,8 @@ bool		enable_hashjoin = true;
 static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
 											int nbuckets);
 static bool cost_qual_eval_walker(Node *node, QualCost *total);
-static Selectivity approx_selectivity(Query *root, List *quals);
+static Selectivity approx_selectivity(Query *root, List *quals,
+									  JoinType jointype);
 static void set_rel_width(Query *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
 	 */
 	if (path->jointype == JOIN_IN)
 	{
-		Selectivity	qual_selec = approx_selectivity(root, restrictlist);
+		Selectivity	qual_selec = approx_selectivity(root, restrictlist,
+													path->jointype);
 		double	qptuples;
 
 		qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
 	 * Note: it's probably bogus to use the normal selectivity calculation
 	 * here when either the outer or inner path is a UniquePath.
 	 */
-	merge_selec = approx_selectivity(root, mergeclauses);
+	merge_selec = approx_selectivity(root, mergeclauses,
+									 path->jpath.jointype);
 	cost_qual_eval(&merge_qual_cost, mergeclauses);
 	qpquals = set_ptrDifference(restrictlist, mergeclauses);
-	qp_selec = approx_selectivity(root, qpquals);
+	qp_selec = approx_selectivity(root, qpquals,
+								  path->jpath.jointype);
 	cost_qual_eval(&qp_qual_cost, qpquals);
 	freeList(qpquals);
 
@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
 	 * Note: it's probably bogus to use the normal selectivity calculation
 	 * here when either the outer or inner path is a UniquePath.
 	 */
-	hash_selec = approx_selectivity(root, hashclauses);
+	hash_selec = approx_selectivity(root, hashclauses,
+									path->jpath.jointype);
 	cost_qual_eval(&hash_qual_cost, hashclauses);
 	qpquals = set_ptrDifference(restrictlist, hashclauses);
-	qp_selec = approx_selectivity(root, qpquals);
+	qp_selec = approx_selectivity(root, qpquals,
+								  path->jpath.jointype);
 	cost_qual_eval(&qp_qual_cost, qpquals);
 	freeList(qpquals);
 
@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
 	 * Determine bucketsize fraction for inner relation.  We use the
 	 * smallest bucketsize estimated for any individual hashclause;
 	 * this is undoubtedly conservative.
+	 *
+	 * BUT: if inner relation has been unique-ified, we can assume it's
+	 * good for hashing.  This is important both because it's the right
+	 * answer, and because we avoid contaminating the cache with a value
+	 * that's wrong for non-unique-ified paths.
 	 */
-	innerbucketsize = 1.0;
-	foreach(hcl, hashclauses)
+	if (IsA(inner_path, UniquePath))
+		innerbucketsize = 1.0 / virtualbuckets;
+	else
 	{
-		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
-		Selectivity thisbucketsize;
+		innerbucketsize = 1.0;
+		foreach(hcl, hashclauses)
+		{
+			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
+			Selectivity thisbucketsize;
 
-		Assert(IsA(restrictinfo, RestrictInfo));
+			Assert(IsA(restrictinfo, RestrictInfo));
 
-		/*
-		 * First we have to figure out which side of the hashjoin clause is the
-		 * inner side.
-		 *
-		 * Since we tend to visit the same clauses over and over when planning
-		 * a large query, we cache the bucketsize estimate in the RestrictInfo
-		 * node to avoid repeated lookups of statistics.
-		 */
-		if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids))
-		{
-			/* righthand side is inner */
-			thisbucketsize = restrictinfo->right_bucketsize;
-			if (thisbucketsize < 0)
+			/*
+			 * First we have to figure out which side of the hashjoin clause
+			 * is the inner side.
+			 *
+			 * Since we tend to visit the same clauses over and over when
+			 * planning a large query, we cache the bucketsize estimate in the
+			 * RestrictInfo node to avoid repeated lookups of statistics.
+			 */
+			if (is_subseti(restrictinfo->right_relids,
+						   inner_path->parent->relids))
 			{
-				/* not cached yet */
-				thisbucketsize = estimate_hash_bucketsize(root,
+				/* righthand side is inner */
+				thisbucketsize = restrictinfo->right_bucketsize;
+				if (thisbucketsize < 0)
+				{
+					/* not cached yet */
+					thisbucketsize =
+						estimate_hash_bucketsize(root,
 									(Var *) get_rightop(restrictinfo->clause),
-														  virtualbuckets);
-				restrictinfo->right_bucketsize = thisbucketsize;
+												 virtualbuckets);
+					restrictinfo->right_bucketsize = thisbucketsize;
+				}
 			}
-		}
-		else
-		{
-			Assert(is_subseti(restrictinfo->left_relids,
-							  inner_path->parent->relids));
-			/* lefthand side is inner */
-			thisbucketsize = restrictinfo->left_bucketsize;
-			if (thisbucketsize < 0)
+			else
 			{
-				/* not cached yet */
-				thisbucketsize = estimate_hash_bucketsize(root,
+				Assert(is_subseti(restrictinfo->left_relids,
+								  inner_path->parent->relids));
+				/* lefthand side is inner */
+				thisbucketsize = restrictinfo->left_bucketsize;
+				if (thisbucketsize < 0)
+				{
+					/* not cached yet */
+					thisbucketsize =
+						estimate_hash_bucketsize(root,
 									(Var *) get_leftop(restrictinfo->clause),
-														  virtualbuckets);
-				restrictinfo->left_bucketsize = thisbucketsize;
+												 virtualbuckets);
+					restrictinfo->left_bucketsize = thisbucketsize;
+				}
 			}
-		}
 
-		if (innerbucketsize > thisbucketsize)
-			innerbucketsize = thisbucketsize;
+			if (innerbucketsize > thisbucketsize)
+				innerbucketsize = thisbucketsize;
+		}
 	}
 
 	/*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
  * seems OK to live with the approximation.
  */
 static Selectivity
-approx_selectivity(Query *root, List *quals)
+approx_selectivity(Query *root, List *quals, JoinType jointype)
 {
 	Selectivity total = 1.0;
 	List	   *l;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
 				restrictinfo->this_selec =
 					clause_selectivity(root,
 									   (Node *) restrictinfo->clause,
-									   0);
+									   0,
+									   jointype);
 			selec = restrictinfo->this_selec;
 		}
 		else
 		{
 			/* If it's a bare expression, must always do it the hard way */
-			selec = clause_selectivity(root, qual, 0);
+			selec = clause_selectivity(root, qual, 0, jointype);
 		}
 		total *= selec;
 	}
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
 	temp = rel->tuples *
 		restrictlist_selectivity(root,
 								 rel->baserestrictinfo,
-								 lfirsti(rel->relids));
+								 lfirsti(rel->relids),
+								 JOIN_INNER);
 
 	/*
 	 * Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 	 */
 	selec = restrictlist_selectivity(root,
 									 restrictlist,
-									 0);
+									 0,
+									 jointype);
 
 	/*
 	 * Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 	 * For JOIN_IN and variants, the Cartesian product is figured with
 	 * respect to a unique-ified input, and then we can clamp to the size
 	 * of the other input.
-	 * XXX it's not at all clear that the ordinary selectivity calculation
-	 * is appropriate in this case.
 	 */
 	switch (jointype)
 	{
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
 	temp = rel->tuples *
 		restrictlist_selectivity(root,
 								 rel->baserestrictinfo,
-								 lfirsti(rel->relids));
+								 lfirsti(rel->relids),
+								 JOIN_INNER);
 
 	/*
 	 * Force estimate to be at least one row, to make explain output look
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 443d54c6473..98e4d59f2df 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.133 2003/01/24 03:58:34 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.134 2003/01/28 22:13:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1599,12 +1599,16 @@ make_innerjoin_index_path(Query *root,
 	 * selectivity.  However, since RestrictInfo nodes aren't copied when
 	 * linking them into different lists, it should be sufficient to use
 	 * pointer comparison to remove duplicates.)
+	 *
+	 * Always assume the join type is JOIN_INNER; even if some of the
+	 * join clauses come from other contexts, that's not our problem.
 	 */
 	pathnode->rows = rel->tuples *
 		restrictlist_selectivity(root,
 								 set_ptrUnion(rel->baserestrictinfo,
 											  clausegroup),
-								 lfirsti(rel->relids));
+								 lfirsti(rel->relids),
+								 JOIN_INNER);
 	/* Like costsize.c, force estimate to be at least one row */
 	if (pathnode->rows < 1.0)
 		pathnode->rows = 1.0;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 5f420f37250..9f56a9f38d5 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.68 2003/01/20 18:54:53 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.69 2003/01/28 22:13:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -351,7 +351,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
 
 						qualsel = clauselist_selectivity(subquery,
 														 plan->qual,
-														 0);
+														 0, JOIN_INNER);
 						/* Is 10% selectivity a good threshold?? */
 						use_material = qualsel < 0.10;
 					}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 15120fafcd8..4a9f63312c3 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.75 2002/11/24 21:52:14 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.76 2003/01/28 22:13:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -196,8 +196,7 @@ find_secondary_indexes(Oid relationObjectId)
  * This code executes registered procedures stored in the
  * operator relation, by calling the function manager.
  *
- * varRelid is either 0 or a rangetable index.	See clause_selectivity()
- * for details about its meaning.
+ * See clause_selectivity() for the meaning of the additional parameters.
  */
 Selectivity
 restriction_selectivity(Query *root,
@@ -237,7 +236,8 @@ restriction_selectivity(Query *root,
 Selectivity
 join_selectivity(Query *root,
 				 Oid operator,
-				 List *args)
+				 List *args,
+				 JoinType jointype)
 {
 	RegProcedure oprjoin = get_oprjoin(operator);
 	float8		result;
@@ -249,10 +249,11 @@ join_selectivity(Query *root,
 	if (!oprjoin)
 		return (Selectivity) 0.5;
 
-	result = DatumGetFloat8(OidFunctionCall3(oprjoin,
+	result = DatumGetFloat8(OidFunctionCall4(oprjoin,
 											 PointerGetDatum(root),
 											 ObjectIdGetDatum(operator),
-											 PointerGetDatum(args)));
+											 PointerGetDatum(args),
+											 Int16GetDatum(jointype)));
 
 	if (result < 0.0 || result > 1.0)
 		elog(ERROR, "join_selectivity: bad value %f", result);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 8fb4e84ad77..d099262c46f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.130 2003/01/27 20:51:54 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.131 2003/01/28 22:13:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -56,13 +56,18 @@
  *		float8 oprrest (internal, oid, internal, int4);
  *
  * The call convention for a join estimator (oprjoin function) is similar
- * except that varRelid is not needed:
+ * except that varRelid is not needed, and instead the join type is
+ * supplied:
  *
  *		Selectivity oprjoin (Query *root,
  *							 Oid operator,
- *							 List *args);
+ *							 List *args,
+ *							 JoinType jointype);
+ *
+ *		float8 oprjoin (internal, oid, internal, int2);
  *
- *		float8 oprjoin (internal, oid, internal);
+ * (We deliberately make the SQL signature different to facilitate
+ * catching errors.)
  *----------
  */
 
@@ -1009,7 +1014,8 @@ icnlikesel(PG_FUNCTION_ARGS)
  *		booltestsel		- Selectivity of BooleanTest Node.
  */
 Selectivity
-booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid)
+booltestsel(Query *root, BoolTestType booltesttype, Node *arg,
+			int varRelid, JoinType jointype)
 {
 	Var		   *var;
 	Oid			relid;
@@ -1047,11 +1053,13 @@ booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid)
 				break;
 			case IS_TRUE:
 			case IS_NOT_FALSE:
-				selec = (double) clause_selectivity(root, arg, varRelid);
+				selec = (double) clause_selectivity(root, arg,
+													varRelid, jointype);
 				break;
 			case IS_FALSE:
 			case IS_NOT_TRUE:
-				selec = 1.0 - (double) clause_selectivity(root, arg, varRelid);
+				selec = 1.0 - (double) clause_selectivity(root, arg,
+														  varRelid, jointype);
 				break;
 			default:
 				elog(ERROR, "booltestsel: unexpected booltesttype %d",
@@ -1321,6 +1329,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
 	Query	   *root = (Query *) PG_GETARG_POINTER(0);
 	Oid			operator = PG_GETARG_OID(1);
 	List	   *args = (List *) PG_GETARG_POINTER(2);
+	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
 	Var		   *var1;
 	Var		   *var2;
 	double		selec;
@@ -1421,6 +1430,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
 			FmgrInfo	eqproc;
 			bool	   *hasmatch1;
 			bool	   *hasmatch2;
+			double		nullfrac1 = stats1->stanullfrac;
+			double		nullfrac2 = stats2->stanullfrac;
 			double		matchprodfreq,
 						matchfreq1,
 						matchfreq2,
@@ -1434,10 +1445,36 @@ eqjoinsel(PG_FUNCTION_ARGS)
 						nmatches;
 
 			fmgr_info(get_opcode(operator), &eqproc);
-			hasmatch1 = (bool *) palloc(nvalues1 * sizeof(bool));
-			memset(hasmatch1, 0, nvalues1 * sizeof(bool));
-			hasmatch2 = (bool *) palloc(nvalues2 * sizeof(bool));
-			memset(hasmatch2, 0, nvalues2 * sizeof(bool));
+			hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
+			hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+
+			/*
+			 * If we are doing any variant of JOIN_IN, pretend all the values
+			 * of the righthand relation are unique (ie, act as if it's been
+			 * DISTINCT'd).
+			 *
+			 * NOTE: it might seem that we should unique-ify the lefthand
+			 * input when considering JOIN_REVERSE_IN.  But this is not so,
+			 * because the join clause we've been handed has not been
+			 * commuted from the way the parser originally wrote it.  We know
+			 * that the unique side of the IN clause is *always* on the right.
+			 *
+			 * NOTE: it would be dangerous to try to be smart about JOIN_LEFT
+			 * or JOIN_RIGHT here, because we do not have enough information
+			 * to determine which var is really on which side of the join.
+			 * Perhaps someday we should pass in more information.
+			 */
+			if (jointype == JOIN_IN ||
+				jointype == JOIN_REVERSE_IN ||
+				jointype == JOIN_UNIQUE_INNER ||
+				jointype == JOIN_UNIQUE_OUTER)
+			{
+				float4	oneovern = 1.0 / nd2;
+
+				for (i = 0; i < nvalues2; i++)
+					numbers2[i] = oneovern;
+				nullfrac2 = oneovern;
+			}
 
 			/*
 			 * Note we assume that each MCV will match at most one member
@@ -1496,8 +1533,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
 			 * Compute total frequency of non-null values that are not in
 			 * the MCV lists.
 			 */
-			otherfreq1 = 1.0 - stats1->stanullfrac - matchfreq1 - unmatchfreq1;
-			otherfreq2 = 1.0 - stats2->stanullfrac - matchfreq2 - unmatchfreq2;
+			otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
+			otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
 			CLAMP_PROBABILITY(otherfreq1);
 			CLAMP_PROBABILITY(otherfreq2);
 
@@ -1585,6 +1622,7 @@ neqjoinsel(PG_FUNCTION_ARGS)
 	Query	   *root = (Query *) PG_GETARG_POINTER(0);
 	Oid			operator = PG_GETARG_OID(1);
 	List	   *args = (List *) PG_GETARG_POINTER(2);
+	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
 	Oid			eqop;
 	float8		result;
 
@@ -1595,11 +1633,11 @@ neqjoinsel(PG_FUNCTION_ARGS)
 	eqop = get_negator(operator);
 	if (eqop)
 	{
-		result = DatumGetFloat8(DirectFunctionCall3(eqjoinsel,
+		result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel,
 													PointerGetDatum(root),
-												  ObjectIdGetDatum(eqop),
-												 PointerGetDatum(args)));
-
+													ObjectIdGetDatum(eqop),
+													PointerGetDatum(args),
+													Int16GetDatum(jointype)));
 	}
 	else
 	{
@@ -3784,7 +3822,8 @@ genericcostestimate(Query *root, RelOptInfo *rel,
 
 	/* Estimate the fraction of main-table tuples that will be visited */
 	*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
-											   lfirsti(rel->relids));
+											   lfirsti(rel->relids),
+											   JOIN_INNER);
 
 	/*
 	 * Estimate the number of tuples that will be visited.	We do it in
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index b679fdb5ddc..d234eb32895 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.173 2003/01/23 23:39:04 petere Exp $
+ * $Id: catversion.h,v 1.174 2003/01/28 22:13:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200301241
+#define CATALOG_VERSION_NO	200301281
 
 #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index db907f745fa..d7b13a762eb 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.281 2003/01/09 00:58:41 tgl Exp $
+ * $Id: pg_proc.h,v 1.282 2003/01/28 22:13:36 tgl Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -218,13 +218,13 @@ DATA(insert OID = 103 (  scalarltsel	   PGNSP PGUID 12 f f t f s 4 701 "2281 26
 DESCR("restriction selectivity of < and related operators on scalar datatypes");
 DATA(insert OID = 104 (  scalargtsel	   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  scalargtsel - _null_ ));
 DESCR("restriction selectivity of > and related operators on scalar datatypes");
-DATA(insert OID = 105 (  eqjoinsel		   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  eqjoinsel - _null_ ));
+DATA(insert OID = 105 (  eqjoinsel		   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  eqjoinsel - _null_ ));
 DESCR("join selectivity of = and related operators");
-DATA(insert OID = 106 (  neqjoinsel		   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  neqjoinsel - _null_ ));
+DATA(insert OID = 106 (  neqjoinsel		   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  neqjoinsel - _null_ ));
 DESCR("join selectivity of <> and related operators");
-DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  scalarltjoinsel - _null_ ));
+DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  scalarltjoinsel - _null_ ));
 DESCR("join selectivity of < and related operators on scalar datatypes");
-DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  scalargtjoinsel - _null_ ));
+DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  scalargtjoinsel - _null_ ));
 DESCR("join selectivity of > and related operators on scalar datatypes");
 
 DATA(insert OID =  109 (  unknownin		   PGNSP PGUID 12 f f t f i 1 705 "2275"	unknownin - _null_ ));
@@ -290,7 +290,7 @@ DATA(insert OID = 138 (  box_center		   PGNSP PGUID 12 f f t f i 1 600 "603"  bo
 DESCR("center of");
 DATA(insert OID = 139 (  areasel		   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  areasel - _null_ ));
 DESCR("restriction selectivity for area-comparison operators");
-DATA(insert OID = 140 (  areajoinsel	   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  areajoinsel - _null_ ));
+DATA(insert OID = 140 (  areajoinsel	   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  areajoinsel - _null_ ));
 DESCR("join selectivity for area-comparison operators");
 DATA(insert OID = 141 (  int4mul		   PGNSP PGUID 12 f f t f i 2 23 "23 23"	int4mul - _null_ ));
 DESCR("multiply");
@@ -1590,11 +1590,11 @@ DESCR("current transaction time");
 
 DATA(insert OID = 1300 (  positionsel		   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  positionsel - _null_ ));
 DESCR("restriction selectivity for position-comparison operators");
-DATA(insert OID = 1301 (  positionjoinsel	   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  positionjoinsel - _null_ ));
+DATA(insert OID = 1301 (  positionjoinsel	   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  positionjoinsel - _null_ ));
 DESCR("join selectivity for position-comparison operators");
 DATA(insert OID = 1302 (  contsel		   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  contsel - _null_ ));
 DESCR("restriction selectivity for containment comparison operators");
-DATA(insert OID = 1303 (  contjoinsel	   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  contjoinsel - _null_ ));
+DATA(insert OID = 1303 (  contjoinsel	   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  contjoinsel - _null_ ));
 DESCR("join selectivity for containment comparison operators");
 
 DATA(insert OID = 1304 ( overlaps			 PGNSP PGUID 12 f f f f i 4 16 "1184 1184 1184 1184"	overlaps_timestamp - _null_ ));
@@ -2545,9 +2545,9 @@ DATA(insert OID = 1814 ( iclikesel			PGNSP PGUID 12 f f t f s 4 701 "2281 26 228
 DESCR("restriction selectivity of ILIKE");
 DATA(insert OID = 1815 ( icnlikesel			PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  icnlikesel - _null_ ));
 DESCR("restriction selectivity of NOT ILIKE");
-DATA(insert OID = 1816 ( iclikejoinsel		PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  iclikejoinsel - _null_ ));
+DATA(insert OID = 1816 ( iclikejoinsel		PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  iclikejoinsel - _null_ ));
 DESCR("join selectivity of ILIKE");
-DATA(insert OID = 1817 ( icnlikejoinsel		PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  icnlikejoinsel - _null_ ));
+DATA(insert OID = 1817 ( icnlikejoinsel		PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  icnlikejoinsel - _null_ ));
 DESCR("join selectivity of NOT ILIKE");
 DATA(insert OID = 1818 ( regexeqsel			PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  regexeqsel - _null_ ));
 DESCR("restriction selectivity of regex match");
@@ -2561,17 +2561,17 @@ DATA(insert OID = 1822 ( nlikesel			PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281
 DESCR("restriction selectivity of NOT LIKE");
 DATA(insert OID = 1823 ( icregexnesel		PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  icregexnesel - _null_ ));
 DESCR("restriction selectivity of case-insensitive regex non-match");
-DATA(insert OID = 1824 ( regexeqjoinsel		PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  regexeqjoinsel - _null_ ));
+DATA(insert OID = 1824 ( regexeqjoinsel		PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  regexeqjoinsel - _null_ ));
 DESCR("join selectivity of regex match");
-DATA(insert OID = 1825 ( likejoinsel		PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  likejoinsel - _null_ ));
+DATA(insert OID = 1825 ( likejoinsel		PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  likejoinsel - _null_ ));
 DESCR("join selectivity of LIKE");
-DATA(insert OID = 1826 ( icregexeqjoinsel	PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  icregexeqjoinsel - _null_ ));
+DATA(insert OID = 1826 ( icregexeqjoinsel	PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  icregexeqjoinsel - _null_ ));
 DESCR("join selectivity of case-insensitive regex match");
-DATA(insert OID = 1827 ( regexnejoinsel		PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  regexnejoinsel - _null_ ));
+DATA(insert OID = 1827 ( regexnejoinsel		PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  regexnejoinsel - _null_ ));
 DESCR("join selectivity of regex non-match");
-DATA(insert OID = 1828 ( nlikejoinsel		PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  nlikejoinsel - _null_ ));
+DATA(insert OID = 1828 ( nlikejoinsel		PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  nlikejoinsel - _null_ ));
 DESCR("join selectivity of NOT LIKE");
-DATA(insert OID = 1829 ( icregexnejoinsel	PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  icregexnejoinsel - _null_ ));
+DATA(insert OID = 1829 ( icregexnejoinsel	PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  icregexnejoinsel - _null_ ));
 DESCR("join selectivity of case-insensitive regex non-match");
 
 /* Aggregate-related functions */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index aca6097bc1c..0feb56dd7c9 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.51 2003/01/27 20:51:54 tgl Exp $
+ * $Id: cost.h,v 1.52 2003/01/28 22:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -88,13 +88,16 @@ extern void set_function_size_estimates(Query *root, RelOptInfo *rel);
  *	  routines to compute clause selectivities
  */
 extern Selectivity restrictlist_selectivity(Query *root,
-						 List *restrictinfo_list,
-						 int varRelid);
+											List *restrictinfo_list,
+											int varRelid,
+											JoinType jointype);
 extern Selectivity clauselist_selectivity(Query *root,
-					   List *clauses,
-					   int varRelid);
+										  List *clauses,
+										  int varRelid,
+										  JoinType jointype);
 extern Selectivity clause_selectivity(Query *root,
-				   Node *clause,
-				   int varRelid);
+									  Node *clause,
+									  int varRelid,
+									  JoinType jointype);
 
 #endif   /* COST_H */
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index abd09871feb..255d196d7d7 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: plancat.h,v 1.27 2002/06/20 20:29:51 momjian Exp $
+ * $Id: plancat.h,v 1.28 2003/01/28 22:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,7 +34,8 @@ extern Selectivity restriction_selectivity(Query *root,
 						int varRelid);
 
 extern Selectivity join_selectivity(Query *root,
-				 Oid operator,
-				 List *args);
+									Oid operator,
+									List *args,
+									JoinType jointype);
 
 #endif   /* PLANCAT_H */
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 037c2b2f5e3..757c0e1e1ac 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: selfuncs.h,v 1.11 2003/01/20 18:55:07 tgl Exp $
+ * $Id: selfuncs.h,v 1.12 2003/01/28 22:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,7 +67,7 @@ extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
 extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
 
 extern Selectivity booltestsel(Query *root, BoolTestType booltesttype,
-							   Node *arg, int varRelid);
+							   Node *arg, int varRelid, JoinType jointype);
 extern Selectivity nulltestsel(Query *root, NullTestType nulltesttype,
 							   Node *arg, int varRelid);
 
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 7ef807a95db..dcf295919c9 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -530,16 +530,17 @@ WHERE p1.oprrest = p2.oid AND
 -- If oprjoin is set, the operator must be a binary boolean op,
 -- and it must link to a proc with the right signature
 -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2)
 SELECT p1.oid, p1.oprname, p2.oid, p2.proname
 FROM pg_operator AS p1, pg_proc AS p2
 WHERE p1.oprjoin = p2.oid AND
     (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
      p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 3 OR
+     p2.pronargs != 4 OR
      p2.proargtypes[0] != 'internal'::regtype OR
      p2.proargtypes[1] != 'oid'::regtype OR
-     p2.proargtypes[2] != 'internal'::regtype);
+     p2.proargtypes[2] != 'internal'::regtype OR
+     p2.proargtypes[3] != 'int2'::regtype);
  oid | oprname | oid | proname 
 -----+---------+-----+---------
 (0 rows)
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 8d7597863fc..5a2ef11c21b 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -134,10 +134,10 @@ SELECT '' AS five, f1 AS "Correlated Field"
                      WHERE f3 IS NOT NULL);
  five | Correlated Field 
 ------+------------------
-      |                1
-      |                2
       |                2
       |                3
+      |                1
+      |                2
       |                3
 (5 rows)
 
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index 650073cccc1..8d543932a7c 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -444,17 +444,18 @@ WHERE p1.oprrest = p2.oid AND
 -- If oprjoin is set, the operator must be a binary boolean op,
 -- and it must link to a proc with the right signature
 -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2)
 
 SELECT p1.oid, p1.oprname, p2.oid, p2.proname
 FROM pg_operator AS p1, pg_proc AS p2
 WHERE p1.oprjoin = p2.oid AND
     (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
      p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 3 OR
+     p2.pronargs != 4 OR
      p2.proargtypes[0] != 'internal'::regtype OR
      p2.proargtypes[1] != 'oid'::regtype OR
-     p2.proargtypes[2] != 'internal'::regtype);
+     p2.proargtypes[2] != 'internal'::regtype OR
+     p2.proargtypes[3] != 'int2'::regtype);
 
 -- **************** pg_aggregate ****************
 
-- 
GitLab