From 3e4b3465b6345b75659e8f897976d4c810408762 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 26 Oct 2011 17:52:02 -0400
Subject: [PATCH] Improve planner's ability to recognize cases where an IN's
 RHS is unique.

If the right-hand side of a semijoin is unique, then we can treat it like a
normal join (or another way to say that is: we don't need to explicitly
unique-ify the data before doing it as a normal join).  We were recognizing
such cases when the RHS was a sub-query with appropriate DISTINCT or GROUP
BY decoration, but there's another way: if the RHS is a plain relation with
unique indexes, we can check if any of the indexes prove the output is
unique.  Most of the infrastructure for that was there already in the join
removal code, though I had to rearrange it a bit.  Per reflection about a
recent example in pgsql-performance.
---
 src/backend/optimizer/path/indxpath.c     | 114 +++++++++++++++++++---
 src/backend/optimizer/plan/analyzejoins.c |  39 +-------
 src/backend/optimizer/util/pathnode.c     |  27 ++++-
 src/include/optimizer/paths.h             |   3 +-
 4 files changed, 134 insertions(+), 49 deletions(-)

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 77df5a24ea2..74bc7ac7c63 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -2253,21 +2253,74 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
  *	  a set of equality conditions, because the conditions constrain all
  *	  columns of some unique index.
  *
- * The conditions are provided as a list of RestrictInfo nodes, where the
- * caller has already determined that each condition is a mergejoinable
- * equality with an expression in this relation on one side, and an
- * expression not involving this relation on the other.  The transient
- * outer_is_left flag is used to identify which side we should look at:
- * left side if outer_is_left is false, right side if it is true.
+ * The conditions can be represented in either or both of two ways:
+ * 1. A list of RestrictInfo nodes, where the caller has already determined
+ * that each condition is a mergejoinable equality with an expression in
+ * this relation on one side, and an expression not involving this relation
+ * on the other.  The transient outer_is_left flag is used to identify which
+ * side we should look at: left side if outer_is_left is false, right side
+ * if it is true.
+ * 2. A list of expressions in this relation, and a corresponding list of
+ * equality operators. The caller must have already checked that the operators
+ * represent equality.  (Note: the operators could be cross-type; the
+ * expressions should correspond to their RHS inputs.)
+ *
+ * The caller need only supply equality conditions arising from joins;
+ * this routine automatically adds in any usable baserestrictinfo clauses.
+ * (Note that the passed-in restrictlist will be destructively modified!)
  */
 bool
 relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
-							  List *restrictlist)
+							  List *restrictlist,
+							  List *exprlist, List *oprlist)
 {
 	ListCell   *ic;
 
+	Assert(list_length(exprlist) == list_length(oprlist));
+
+	/* Short-circuit if no indexes... */
+	if (rel->indexlist == NIL)
+		return false;
+
+	/*
+	 * Examine the rel's restriction clauses for usable var = const clauses
+	 * that we can add to the restrictlist.
+	 */
+	foreach(ic, rel->baserestrictinfo)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(ic);
+
+		/*
+		 * Note: can_join won't be set for a restriction clause, but
+		 * mergeopfamilies will be if it has a mergejoinable operator and
+		 * doesn't contain volatile functions.
+		 */
+		if (restrictinfo->mergeopfamilies == NIL)
+			continue;			/* not mergejoinable */
+
+		/*
+		 * The clause certainly doesn't refer to anything but the given rel.
+		 * If either side is pseudoconstant then we can use it.
+		 */
+		if (bms_is_empty(restrictinfo->left_relids))
+		{
+			/* righthand side is inner */
+			restrictinfo->outer_is_left = true;
+		}
+		else if (bms_is_empty(restrictinfo->right_relids))
+		{
+			/* lefthand side is inner */
+			restrictinfo->outer_is_left = false;
+		}
+		else
+			continue;
+
+		/* OK, add to list */
+		restrictlist = lappend(restrictlist, restrictinfo);
+	}
+
 	/* Short-circuit the easy case */
-	if (restrictlist == NIL)
+	if (restrictlist == NIL && exprlist == NIL)
 		return false;
 
 	/* Examine each index of the relation ... */
@@ -2285,12 +2338,14 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 			continue;
 
 		/*
-		 * Try to find each index column in the list of conditions.  This is
-		 * O(n^2) or worse, but we expect all the lists to be short.
+		 * Try to find each index column in the lists of conditions.  This is
+		 * O(N^2) or worse, but we expect all the lists to be short.
 		 */
 		for (c = 0; c < ind->ncolumns; c++)
 		{
+			bool		matched = false;
 			ListCell   *lc;
+			ListCell   *lc2;
 
 			foreach(lc, restrictlist)
 			{
@@ -2319,10 +2374,45 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 					rexpr = get_leftop(rinfo->clause);
 
 				if (match_index_to_operand(rexpr, c, ind))
-					break;		/* found a match; column is unique */
+				{
+					matched = true;		/* column is unique */
+					break;
+				}
+			}
+
+			if (matched)
+				continue;
+
+			forboth(lc, exprlist, lc2, oprlist)
+			{
+				Node	   *expr = (Node *) lfirst(lc);
+				Oid			opr = lfirst_oid(lc2);
+
+				/* See if the expression matches the index key */
+				if (!match_index_to_operand(expr, c, ind))
+					continue;
+
+				/*
+				 * The equality operator must be a member of the index
+				 * opfamily, else it is not asserting the right kind of
+				 * equality behavior for this index.  We assume the caller
+				 * determined it is an equality operator, so we don't need to
+				 * check any more tightly than this.
+				 */
+				if (!op_in_opfamily(opr, ind->opfamily[c]))
+					continue;
+
+				/*
+				 * XXX at some point we may need to check collations here too.
+				 * For the moment we assume all collations reduce to the same
+				 * notion of equality.
+				 */
+
+				matched = true;		/* column is unique */
+				break;
 			}
 
-			if (lc == NULL)
+			if (!matched)
 				break;			/* no match; this index doesn't help us */
 		}
 
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 1784ac2fc5b..b83c936a5e3 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -264,42 +264,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 		clause_list = lappend(clause_list, restrictinfo);
 	}
 
-	/* Now examine the rel's restriction clauses for var = const clauses */
-	foreach(l, innerrel->baserestrictinfo)
-	{
-		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
-
-		/*
-		 * Note: can_join won't be set for a restriction clause, but
-		 * mergeopfamilies will be if it has a mergejoinable operator and
-		 * doesn't contain volatile functions.
-		 */
-		if (restrictinfo->mergeopfamilies == NIL)
-			continue;			/* not mergejoinable */
-
-		/*
-		 * The clause certainly doesn't refer to anything but the given rel.
-		 * If either side is pseudoconstant then we can use it.
-		 */
-		if (bms_is_empty(restrictinfo->left_relids))
-		{
-			/* righthand side is inner */
-			restrictinfo->outer_is_left = true;
-		}
-		else if (bms_is_empty(restrictinfo->right_relids))
-		{
-			/* lefthand side is inner */
-			restrictinfo->outer_is_left = false;
-		}
-		else
-			continue;
-
-		/* OK, add to list */
-		clause_list = lappend(clause_list, restrictinfo);
-	}
+	/*
+	 * relation_has_unique_index_for automatically adds any usable restriction
+	 * clauses for the innerrel, so we needn't do that here.
+	 */
 
 	/* Now examine the indexes to see if we have a matching unique index */
-	if (relation_has_unique_index_for(root, innerrel, clause_list))
+	if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
 		return true;
 
 	/*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 6aa34412def..1e7aac95ef4 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1021,8 +1021,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->path.parent = rel;
 
 	/*
-	 * Treat the output as always unsorted, since we don't necessarily have
-	 * pathkeys to represent it.
+	 * Assume the output is unsorted, since we don't necessarily have pathkeys
+	 * to represent it.  (This might get overridden below.)
 	 */
 	pathnode->path.pathkeys = NIL;
 
@@ -1030,6 +1030,29 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->in_operators = in_operators;
 	pathnode->uniq_exprs = uniq_exprs;
 
+	/*
+	 * If the input is a relation and it has a unique index that proves the
+	 * uniq_exprs are unique, then we don't need to do anything.  Note that
+	 * relation_has_unique_index_for automatically considers restriction
+	 * clauses for the rel, as well.
+	 */
+	if (rel->rtekind == RTE_RELATION && all_btree &&
+		relation_has_unique_index_for(root, rel, NIL,
+									  uniq_exprs, in_operators))
+	{
+		pathnode->umethod = UNIQUE_PATH_NOOP;
+		pathnode->rows = rel->rows;
+		pathnode->path.startup_cost = subpath->startup_cost;
+		pathnode->path.total_cost = subpath->total_cost;
+		pathnode->path.pathkeys = subpath->pathkeys;
+
+		rel->cheapest_unique_path = (Path *) pathnode;
+
+		MemoryContextSwitchTo(oldcontext);
+
+		return pathnode;
+	}
+
 	/*
 	 * If the input is a subquery whose output must be unique already, then we
 	 * don't need to do anything.  The test for uniqueness has to consider
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 7f1353a2a39..c62f4a8122a 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -50,7 +50,8 @@ extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
 					 RelOptInfo *outer_rel, JoinType jointype,
 					 Path **cheapest_startup, Path **cheapest_total);
 extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
-							  List *restrictlist);
+							  List *restrictlist,
+							  List *exprlist, List *oprlist);
 extern bool eclass_matches_any_index(EquivalenceClass *ec,
 						 EquivalenceMember *em,
 						 RelOptInfo *rel);
-- 
GitLab