From ddbe8dca08e57142dd747749d0241efa3dea4869 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 25 Oct 2008 17:19:09 +0000
Subject: [PATCH] Add a heuristic to transformAExprIn() to make it prefer
 expanding "x IN (list)" into an OR of equality comparisons, rather than x =
 ANY(ARRAY[...]), when there are Vars in the right-hand side.  This avoids a
 performance regression compared to pre-8.2 releases, in cases where the OR
 form can be optimized into scans of multiple indexes.  Limit the possible
 downside by preferring this form only when the list isn't very long (I set
 the cutoff at 32 elements, which is a bit arbitrary but in the right
 ballpark).  Per discussion with Jim Nasby.

In passing, also make it try the OR form if it cannot select a common type
for the array elements; we've seen a complaint or two about how the OR form
worked for such cases and ARRAY doesn't.
---
 src/backend/parser/parse_coerce.c | 11 +++++----
 src/backend/parser/parse_expr.c   | 39 ++++++++++++++++++++++---------
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c
index 7c66c638047..6d703ae5718 100644
--- a/src/backend/parser/parse_coerce.c
+++ b/src/backend/parser/parse_coerce.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.169 2008/10/13 16:25:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.170 2008/10/25 17:19:09 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1085,13 +1085,14 @@ parser_coercion_errposition(ParseState *pstate,
 /*
  * select_common_type()
  *		Determine the common supertype of a list of input expressions.
- *		This is used for determining the output type of CASE and UNION
- *		constructs.
+ *		This is used for determining the output type of CASE, UNION,
+ *		and similar constructs.
  *
  * 'exprs' is a *nonempty* list of expressions.  Note that earlier items
  * in the list will be preferred if there is doubt.
  * 'context' is a phrase to use in the error message if we fail to select
- * a usable type.
+ * a usable type.  Pass NULL to have the routine return InvalidOid
+ * rather than throwing an error on failure.
  * 'which_expr': if not NULL, receives a pointer to the particular input
  * expression from which the result type was taken.
  */
@@ -1166,6 +1167,8 @@ select_common_type(ParseState *pstate, List *exprs, const char *context,
 				/*
 				 * both types in different categories? then not much hope...
 				 */
+				if (context == NULL)
+					return InvalidOid;
 				ereport(ERROR,
 						(errcode(ERRCODE_DATATYPE_MISMATCH),
 				/*------
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index ddd041818a5..ced222578db 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.235 2008/10/06 17:39:26 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.236 2008/10/25 17:19:09 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,6 +20,7 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/var.h"
 #include "parser/analyze.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_expr.h"
@@ -974,29 +975,45 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
 	}
 
 	/*
-	 * If not forced by presence of RowExpr, try to resolve a common scalar
-	 * type for all the expressions, and see if it has an array type. (But if
-	 * there's only one righthand expression, we may as well just fall through
-	 * and generate a simple = comparison.)
+	 * We prefer a boolean tree to ScalarArrayOpExpr if any of these are true:
+	 *
+	 * 1. We have a RowExpr anywhere.
+	 *
+	 * 2. There's only one righthand expression --- best to just generate a
+	 * simple = comparison.
+	 *
+	 * 3. There's a reasonably small number of righthand expressions and
+	 * they contain any Vars.  This is a heuristic to support cases like
+	 * WHERE '555-1212' IN (tab.home_phone, tab.work_phone), which can be
+	 * optimized into an OR of indexscans on different indexes so long as
+	 * it's left as an OR tree.  (It'd be better to leave this decision
+	 * to the planner, no doubt, but the amount of code required to reformat
+	 * the expression later on seems out of proportion to the benefit.)
 	 */
-	if (!haveRowExpr && list_length(rexprs) != 1)
+	if (!(haveRowExpr ||
+		  list_length(rexprs) == 1 ||
+		  (list_length(rexprs) <= 32 &&
+		   contain_vars_of_level((Node *) rexprs, 0))))
 	{
 		List	   *allexprs;
 		Oid			scalar_type;
 		Oid			array_type;
 
 		/*
-		 * Select a common type for the array elements.  Note that since the
-		 * LHS' type is first in the list, it will be preferred when there is
-		 * doubt (eg, when all the RHS items are unknown literals).
+		 * Try to select a common type for the array elements.  Note that
+		 * since the LHS' type is first in the list, it will be preferred when
+		 * there is doubt (eg, when all the RHS items are unknown literals).
 		 *
 		 * Note: use list_concat here not lcons, to avoid damaging rexprs.
 		 */
 		allexprs = list_concat(list_make1(lexpr), rexprs);
-		scalar_type = select_common_type(pstate, allexprs, "IN", NULL);
+		scalar_type = select_common_type(pstate, allexprs, NULL, NULL);
 
 		/* Do we have an array type to use? */
-		array_type = get_array_type(scalar_type);
+		if (OidIsValid(scalar_type))
+			array_type = get_array_type(scalar_type);
+		else
+			array_type = InvalidOid;
 		if (array_type != InvalidOid)
 		{
 			/*
-- 
GitLab