From c4d0ff32e9bfc5f9876854c093486e2a9428a617 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 22 Jan 2003 20:16:42 +0000
Subject: [PATCH] =?UTF-8?q?Make=20estimation=20of=20mergejoin=20scan=20sel?=
 =?UTF-8?q?ectivities=20more=20robust,=20per=20recent=20example=20from=20R?=
 =?UTF-8?q?a=C3=87l=20Guti=C3=85rrez.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/backend/optimizer/path/costsize.c | 16 +++++++++++-
 src/backend/utils/adt/selfuncs.c      | 36 +++++++++++++++++++--------
 2 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 5146517132f..d0df5cab113 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.101 2003/01/20 18:54:49 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.102 2003/01/22 20:16:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -791,8 +791,22 @@ cost_mergejoin(Path *path, Query *root,
 		innerscansel = firstclause->left_mergescansel;
 	}
 
+	/* convert selectivity to row count; must scan at least one row */
+
 	outer_rows = ceil(outer_path->parent->rows * outerscansel);
+	if (outer_rows < 1)
+		outer_rows = 1;
 	inner_rows = ceil(inner_path->parent->rows * innerscansel);
+	if (inner_rows < 1)
+		inner_rows = 1;
+
+	/*
+	 * Readjust scan selectivities to account for above rounding.  This is
+	 * normally an insignificant effect, but when there are only a few rows
+	 * in the inputs, failing to do this makes for a large percentage error.
+	 */
+	outerscansel = outer_rows / outer_path->parent->rows;
+	innerscansel = inner_rows / inner_path->parent->rows;
 
 	/* cost of source data */
 
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 42ad9f5f94b..20d353a0a50 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.127 2003/01/20 18:54:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.128 2003/01/22 20:16:42 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1742,7 +1742,9 @@ mergejoinscansel(Query *root, Node *clause,
 				rsortop,
 				ltop,
 				gtop,
-				revltop;
+				leop,
+				revgtop,
+				revleop;
 	Datum		leftmax,
 				rightmax;
 	double		selec;
@@ -1780,35 +1782,49 @@ mergejoinscansel(Query *root, Node *clause,
 	/* Look up the "left < right" and "left > right" operators */
 	op_mergejoin_crossops(opno, &ltop, &gtop, NULL, NULL);
 
-	/* Look up the "right < left" operator */
-	revltop = get_commutator(gtop);
-	if (!OidIsValid(revltop))
-		return;					/* shouldn't happen */
+	/* Look up the "left <= right" operator */
+	leop = get_negator(gtop);
+	if (!OidIsValid(leop))
+		return;					/* insufficient info in catalogs */
+
+	/* Look up the "right > left" operator */
+	revgtop = get_commutator(ltop);
+	if (!OidIsValid(revgtop))
+		return;					/* insufficient info in catalogs */
+
+	/* Look up the "right <= left" operator */
+	revleop = get_negator(revgtop);
+	if (!OidIsValid(revleop))
+		return;					/* insufficient info in catalogs */
 
 	/*
 	 * Now, the fraction of the left variable that will be scanned is the
 	 * fraction that's <= the right-side maximum value.  But only believe
 	 * non-default estimates, else stick with our 1.0.
 	 */
-	selec = scalarineqsel(root, ltop, false, left,
+	selec = scalarineqsel(root, leop, false, left,
 						  rightmax, right->vartype);
 	if (selec != DEFAULT_INEQ_SEL)
 		*leftscan = selec;
 
 	/* And similarly for the right variable. */
-	selec = scalarineqsel(root, revltop, false, right,
+	selec = scalarineqsel(root, revleop, false, right,
 						  leftmax, left->vartype);
 	if (selec != DEFAULT_INEQ_SEL)
 		*rightscan = selec;
 
 	/*
 	 * Only one of the two fractions can really be less than 1.0; believe
-	 * the smaller estimate and reset the other one to exactly 1.0.
+	 * the smaller estimate and reset the other one to exactly 1.0.  If we
+	 * get exactly equal estimates (as can easily happen with self-joins),
+	 * believe neither.
 	 */
 	if (*leftscan > *rightscan)
 		*leftscan = 1.0;
-	else
+	else if (*leftscan < *rightscan)
 		*rightscan = 1.0;
+	else
+		*leftscan = *rightscan = 1.0;
 }
 
 /*
-- 
GitLab