From 25e46a504b215fb7e11463d628d3477098323367 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 17 Apr 2008 21:22:14 +0000
Subject: [PATCH] Fix a couple of oversights associated with the "physical
 tlist" optimization: we had several code paths where a physical tlist could
 be used for the input to a Sort node, which is a dumb idea because any
 unneeded table columns will increase the volume of data the sort has to push
 around.

(Unfortunately the easy-looking fix of calling disuse_physical_tlist during
make_sort_xxx doesn't work because in most cases we're already committed to
the current input tlist --- it's been marked with sort column numbers, or
we've built grouping column numbers using it, etc.  The tlist has to be
selected properly at the calling level before we start constructing sort-col
information.  This is easy enough to do, we were just failing to take the
point into consideration.)

Back-patch to 8.3.  I believe the problem probably exists clear back to 7.4
when the physical tlist optimization was added, but I'm afraid to back-patch
further than 8.3 without a great deal more study than I want to put into it.
The code in this area has drifted a lot over time.  The real-world importance
of these code paths is uncertain anyway --- I think in many cases we'd
probably prefer hash-based methods.
---
 src/backend/optimizer/plan/createplan.c | 15 +++++++++------
 src/backend/optimizer/plan/planner.c    | 21 +++++++++++++++++----
 src/include/optimizer/planmain.h        |  3 +--
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 6b784cd75c1..109a0145475 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.239 2008/04/13 20:51:20 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.240 2008/04/17 21:22:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -115,6 +115,7 @@ static MergeJoin *make_mergejoin(List *tlist,
 static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
 		  AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst,
 		  double limit_tuples);
+static Material *make_material(Plan *lefttree);
 
 
 /*
@@ -616,12 +617,14 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 	 * add any such expressions to the subplan's tlist.
 	 *
 	 * The subplan may have a "physical" tlist if it is a simple scan plan.
-	 * This should be left as-is if we don't need to add any expressions;
+	 * If we're going to sort, this should be reduced to the regular tlist,
+	 * so that we don't sort more data than we need to.  For hashing, the
+	 * tlist should be left as-is if we don't need to add any expressions;
 	 * but if we do have to add expressions, then a projection step will be
-	 * needed at runtime anyway, and so we may as well remove unneeded items.
+	 * needed at runtime anyway, so we may as well remove unneeded items.
 	 * Therefore newtlist starts from build_relation_tlist() not just a
 	 * copy of the subplan's tlist; and we don't install it into the subplan
-	 * unless stuff has to be added.
+	 * unless we are sorting or stuff has to be added.
 	 *
 	 * To find the correct list of values to unique-ify, we look in the
 	 * information saved for IN expressions.  If this code is ever used in
@@ -669,7 +672,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 		}
 	}
 
-	if (newitems)
+	if (newitems || best_path->umethod == UNIQUE_PATH_SORT)
 	{
 		/*
 		 * If the top plan node can't do projections, we need to add a Result
@@ -2850,7 +2853,7 @@ make_sort_from_groupcols(PlannerInfo *root,
 					 sortColIdx, sortOperators, nullsFirst, -1.0);
 }
 
-Material *
+static Material *
 make_material(Plan *lefttree)
 {
 	Material   *node = makeNode(Material);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 8679e9ecf9a..74e7258bba2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.231 2008/04/01 00:48:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.232 2008/04/17 21:22:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -957,9 +957,23 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 			 * Normal case --- create a plan according to query_planner's
 			 * results.
 			 */
+			bool	need_sort_for_grouping = false;
+
 			result_plan = create_plan(root, best_path);
 			current_pathkeys = best_path->pathkeys;
 
+			/* Detect if we'll need an explicit sort for grouping */
+			if (parse->groupClause && !use_hashed_grouping &&
+				!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+			{
+				need_sort_for_grouping = true;
+				/*
+				 * Always override query_planner's tlist, so that we don't
+				 * sort useless data from a "physical" tlist.
+				 */
+				need_tlist_eval = true;
+			}
+
 			/*
 			 * create_plan() returns a plan with just a "flat" tlist of
 			 * required Vars.  Usually we need to insert the sub_tlist as the
@@ -1054,8 +1068,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 				if (parse->groupClause)
 				{
-					if (!pathkeys_contained_in(group_pathkeys,
-											   current_pathkeys))
+					if (need_sort_for_grouping)
 					{
 						result_plan = (Plan *)
 							make_sort_from_groupcols(root,
@@ -1098,7 +1111,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 				 * Add an explicit sort if we couldn't make the path come out
 				 * the way the GROUP node needs it.
 				 */
-				if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+				if (need_sort_for_grouping)
 				{
 					result_plan = (Plan *)
 						make_sort_from_groupcols(root,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index f51a399caab..74ba5131cc6 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.106 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.107 2008/04/17 21:22:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,7 +53,6 @@ extern Group *make_group(PlannerInfo *root, List *tlist, List *qual,
 		   int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
 		   double numGroups,
 		   Plan *lefttree);
-extern Material *make_material(Plan *lefttree);
 extern Plan *materialize_finished_plan(Plan *subplan);
 extern Unique *make_unique(Plan *lefttree, List *distinctList);
 extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
-- 
GitLab