From 5ab15591d9bb5274f2937d5c5524e1b90b5734ed Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Tue, 15 Apr 2003 05:18:12 +0000 Subject: [PATCH] eqjoinsel's logic for case where MCV lists are not present should account for NULLs; in hindsight this is obvious since the code for the MCV-lists case would reduce to this when there are zero entries in both lists. Per example from Alec Mitchell. --- src/backend/utils/adt/selfuncs.c | 36 +++++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 2a5ceb767f4..ca502aa448b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.134 2003/03/23 05:14:36 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.135 2003/04/15 05:18:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1591,27 +1591,33 @@ eqjoinsel(PG_FUNCTION_ARGS) { /* * We do not have MCV lists for both sides. Estimate the join - * selectivity as MIN(1/nd1, 1/nd2). This is plausible if we - * assume that the values are about equally distributed: a - * given tuple of rel1 will join to either 0 or N2/nd2 rows of - * rel2, so total join rows are at most N1*N2/nd2 giving a - * join selectivity of not more than 1/nd2. By the same logic - * it is not more than 1/nd1, so MIN(1/nd1, 1/nd2) is an upper - * bound. Using the MIN() means we estimate from the point of - * view of the relation with smaller nd (since the larger nd - * is determining the MIN). It is reasonable to assume that - * most tuples in this rel will have join partners, so the - * bound is probably reasonably tight and should be taken - * as-is. + * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). + * This is plausible if we assume that the join operator is + * strict and the non-null values are about equally distributed: + * a given non-null tuple of rel1 will join to either zero or + * N2*(1-nullfrac2)/nd2 rows of rel2, so total join rows are at + * most N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join + * selectivity of not more than (1-nullfrac1)*(1-nullfrac2)/nd2. + * By the same logic it is not more than + * (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression with MIN() + * is an upper bound. Using the MIN() means we estimate from the + * point of view of the relation with smaller nd (since the larger + * nd is determining the MIN). It is reasonable to assume that + * most tuples in this rel will have join partners, so the bound + * is probably reasonably tight and should be taken as-is. * * XXX Can we be smarter if we have an MCV list for just one * side? It seems that if we assume equal distribution for the * other side, we end up with the same answer anyway. */ + double nullfrac1 = stats1->stanullfrac; + double nullfrac2 = stats2->stanullfrac; + + selec = (1.0 - nullfrac1) * (1.0 - nullfrac2); if (nd1 > nd2) - selec = 1.0 / nd1; + selec /= nd1; else - selec = 1.0 / nd2; + selec /= nd2; } if (have_mcvs1) -- GitLab