From 7f3eba30c9d622d1981b1368f2d79ba0999cdff2 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 23 Oct 2008 00:24:50 +0000
Subject: [PATCH] When estimating without benefit of MCV lists (suggesting that
 one or both inputs is unique or nearly so), make eqjoinsel() clamp the
 ndistinct estimates to be not more than the estimated number of rows coming
 from the input relations.  This allows the estimate to change in response to
 the selectivity of restriction conditions on the inputs.

This is a pretty narrow patch and maybe we should be more aggressive about
similarly clamping ndistinct in other cases; but I'm worried about
double-counting the effects of the restriction conditions.  However, it seems
to help for the case exhibited by Grzegorz Jaskiewicz (antijoin against a
small subset of a relation), so let's try this for awhile.
---
 src/backend/utils/adt/selfuncs.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index b659013bdec..d5a60256d12 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.256 2008/10/21 20:42:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.257 2008/10/23 00:24:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2143,10 +2143,22 @@ eqjoinsel_inner(Oid operator,
 		 * XXX Can we be smarter if we have an MCV list for just one side? It
 		 * seems that if we assume equal distribution for the other side, we
 		 * end up with the same answer anyway.
+		 *
+		 * An additional hack we use here is to clamp the nd1 and nd2 values
+		 * to not more than what we are estimating the input relation sizes
+		 * to be, providing a crude correction for the selectivity of
+		 * restriction clauses on those relations.  (We don't do that in the
+		 * other path since there we are comparing the nd values to stats for
+		 * the whole relations.)
 		 */
 		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
 		double		nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
 
+		if (vardata1->rel)
+			nd1 = Min(nd1, vardata1->rel->rows);
+		if (vardata2->rel)
+			nd2 = Min(nd2, vardata2->rel->rows);
+
 		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
 		if (nd1 > nd2)
 			selec /= nd1;
@@ -2305,6 +2317,11 @@ eqjoinsel_semi(Oid operator,
 		 */
 		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
 
+		if (vardata1->rel)
+			nd1 = Min(nd1, vardata1->rel->rows);
+		if (vardata2->rel)
+			nd2 = Min(nd2, vardata2->rel->rows);
+
 		if (nd1 <= nd2 || nd2 <= 0)
 			selec = 1.0 - nullfrac1;
 		else
-- 
GitLab