From 954228712310e78e12f48070df066f984dc977f5 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 7 Nov 2007 21:00:37 +0000
Subject: [PATCH] Fix patternsel() and callers to do the right thing for NOT
 LIKE and the other negated-match operators.  patternsel had been using the
 supplied operator as though it were a positive-match operator, and thus
 obtaining a wrong result, which was even more wrong after the caller
 subtracted it from 1.  Seems cleanest to give patternsel an explicit "negate"
 argument so that it knows what's going on.  Also install the same
 factorization scheme for pattern join selectivity estimators; even though
 they are just stubs at the moment, this may keep someone from making the same
 type of mistake when they get filled out.  Per report from Greg Mullane.

Backpatch to 8.2 --- previous releases do not show the problem because
patternsel() doesn't actually use the operator directly.
---
 src/backend/utils/adt/selfuncs.c | 109 +++++++++++++++----------------
 1 file changed, 52 insertions(+), 57 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index ad9f662b68d..5bf04d052a6 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.236 2007/08/31 23:35:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.237 2007/11/07 21:00:37 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -901,7 +901,7 @@ scalargtsel(PG_FUNCTION_ARGS)
  * patternsel			- Generic code for pattern-match selectivity.
  */
 static double
-patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
+patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 {
 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 	Oid			operator = PG_GETARG_OID(1);
@@ -921,23 +921,40 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 	Const	   *rest = NULL;
 	double		result;
 
+	/*
+	 * If this is for a NOT LIKE or similar operator, get the corresponding
+	 * positive-match operator and work with that.  Set result to the
+	 * correct default estimate, too.
+	 */
+	if (negate)
+	{
+		operator = get_negator(operator);
+		if (!OidIsValid(operator))
+			elog(ERROR, "patternsel called for operator without a negator");
+		result = 1.0 - DEFAULT_MATCH_SEL;
+	}
+	else
+	{
+		result = DEFAULT_MATCH_SEL;
+	}
+
 	/*
 	 * If expression is not variable op constant, then punt and return a
 	 * default estimate.
 	 */
 	if (!get_restriction_variable(root, args, varRelid,
 								  &vardata, &other, &varonleft))
-		return DEFAULT_MATCH_SEL;
+		return result;
 	if (!varonleft || !IsA(other, Const))
 	{
 		ReleaseVariableStats(vardata);
-		return DEFAULT_MATCH_SEL;
+		return result;
 	}
 	variable = (Node *) linitial(args);
 
 	/*
 	 * If the constant is NULL, assume operator is strict and return zero, ie,
-	 * operator will never return TRUE.
+	 * operator will never return TRUE.  (It's zero even for a negator op.)
 	 */
 	if (((Const *) other)->constisnull)
 	{
@@ -956,7 +973,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 	if (consttype != TEXTOID && consttype != BYTEAOID)
 	{
 		ReleaseVariableStats(vardata);
-		return DEFAULT_MATCH_SEL;
+		return result;
 	}
 
 	/*
@@ -988,7 +1005,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 			break;
 		default:
 			ReleaseVariableStats(vardata);
-			return DEFAULT_MATCH_SEL;
+			return result;
 	}
 
 	/* divide pattern into fixed prefix and remainder */
@@ -1017,7 +1034,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 				elog(ERROR, "unrecognized consttype: %u",
 					 prefix->consttype);
 				ReleaseVariableStats(vardata);
-				return DEFAULT_MATCH_SEL;
+				return result;
 		}
 		prefix = string_to_const(prefixstr, vartype);
 		pfree(prefixstr);
@@ -1125,7 +1142,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 
 	ReleaseVariableStats(vardata);
 
-	return result;
+	return negate ? (1.0 - result) : result;
 }
 
 /*
@@ -1134,7 +1151,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 Datum
 regexeqsel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex));
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
 }
 
 /*
@@ -1143,7 +1160,7 @@ regexeqsel(PG_FUNCTION_ARGS)
 Datum
 icregexeqsel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC));
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
 }
 
 /*
@@ -1152,7 +1169,7 @@ icregexeqsel(PG_FUNCTION_ARGS)
 Datum
 likesel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like));
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
 }
 
 /*
@@ -1161,7 +1178,7 @@ likesel(PG_FUNCTION_ARGS)
 Datum
 iclikesel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC));
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
 }
 
 /*
@@ -1170,11 +1187,7 @@ iclikesel(PG_FUNCTION_ARGS)
 Datum
 regexnesel(PG_FUNCTION_ARGS)
 {
-	double		result;
-
-	result = patternsel(fcinfo, Pattern_Type_Regex);
-	result = 1.0 - result;
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
 }
 
 /*
@@ -1183,11 +1196,7 @@ regexnesel(PG_FUNCTION_ARGS)
 Datum
 icregexnesel(PG_FUNCTION_ARGS)
 {
-	double		result;
-
-	result = patternsel(fcinfo, Pattern_Type_Regex_IC);
-	result = 1.0 - result;
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
 }
 
 /*
@@ -1196,11 +1205,7 @@ icregexnesel(PG_FUNCTION_ARGS)
 Datum
 nlikesel(PG_FUNCTION_ARGS)
 {
-	double		result;
-
-	result = patternsel(fcinfo, Pattern_Type_Like);
-	result = 1.0 - result;
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
 }
 
 /*
@@ -1209,11 +1214,7 @@ nlikesel(PG_FUNCTION_ARGS)
 Datum
 icnlikesel(PG_FUNCTION_ARGS)
 {
-	double		result;
-
-	result = patternsel(fcinfo, Pattern_Type_Like_IC);
-	result = 1.0 - result;
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
 }
 
 /*
@@ -2081,13 +2082,23 @@ scalargtjoinsel(PG_FUNCTION_ARGS)
 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 }
 
+/*
+ * patternjoinsel		- Generic code for pattern-match join selectivity.
+ */
+static double
+patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+	/* For the moment we just punt. */
+	return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
+}
+
 /*
  *		regexeqjoinsel	- Join selectivity of regular-expression pattern match.
  */
 Datum
 regexeqjoinsel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
 }
 
 /*
@@ -2096,7 +2107,7 @@ regexeqjoinsel(PG_FUNCTION_ARGS)
 Datum
 icregexeqjoinsel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
 }
 
 /*
@@ -2105,7 +2116,7 @@ icregexeqjoinsel(PG_FUNCTION_ARGS)
 Datum
 likejoinsel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
 }
 
 /*
@@ -2114,7 +2125,7 @@ likejoinsel(PG_FUNCTION_ARGS)
 Datum
 iclikejoinsel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
 }
 
 /*
@@ -2123,11 +2134,7 @@ iclikejoinsel(PG_FUNCTION_ARGS)
 Datum
 regexnejoinsel(PG_FUNCTION_ARGS)
 {
-	float8		result;
-
-	result = DatumGetFloat8(regexeqjoinsel(fcinfo));
-	result = 1.0 - result;
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
 }
 
 /*
@@ -2136,11 +2143,7 @@ regexnejoinsel(PG_FUNCTION_ARGS)
 Datum
 icregexnejoinsel(PG_FUNCTION_ARGS)
 {
-	float8		result;
-
-	result = DatumGetFloat8(icregexeqjoinsel(fcinfo));
-	result = 1.0 - result;
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
 }
 
 /*
@@ -2149,11 +2152,7 @@ icregexnejoinsel(PG_FUNCTION_ARGS)
 Datum
 nlikejoinsel(PG_FUNCTION_ARGS)
 {
-	float8		result;
-
-	result = DatumGetFloat8(likejoinsel(fcinfo));
-	result = 1.0 - result;
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
 }
 
 /*
@@ -2162,11 +2161,7 @@ nlikejoinsel(PG_FUNCTION_ARGS)
 Datum
 icnlikejoinsel(PG_FUNCTION_ARGS)
 {
-	float8		result;
-
-	result = DatumGetFloat8(iclikejoinsel(fcinfo));
-	result = 1.0 - result;
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
 }
 
 /*
-- 
GitLab