From 57c9dff9d1a6473042caa0f1065f85c57890d2a0 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 2 Sep 2009 17:52:24 +0000
Subject: [PATCH] Fix subquery pullup to wrap a PlaceHolderVar around the
 entire RowExpr that's generated for a whole-row Var referencing the subquery,
 when the subquery is in the nullable side of an outer join.  The previous
 coding instead put PlaceHolderVars around the elements of the RowExpr.  The
 effect was that when the outer join made the subquery outputs go to null, the
 whole-row Var produced ROW(NULL,NULL,...) rather than just NULL.  There are
 arguments afoot about whether those things ought to be semantically
 indistinguishable, but for the moment they are not entirely so, and the
 planner needs to take care that its machinations preserve the difference. Per
 bug #5025.

Making this feasible required refactoring ResolveNew() to allow more caller
control over what is substituted for a Var.  I chose to make ResolveNew()
a wrapper around a new general-purpose function replace_rte_variables().
I also fixed the ancient bogosity that ResolveNew might fail to set
a query's hasSubLinks field after inserting a SubLink in it.  Although
all current callers make sure that happens anyway, we've had bugs of that
sort before, and it seemed like a good time to install a proper solution.

Back-patch to 8.4.  The problem can be demonstrated clear back to 8.0,
but the fix would be too invasive in earlier branches; not to mention
that people may be depending on the subtly-incorrect behavior.  The
8.4 series is new enough that fixing this probably won't cause complaints,
but it might in older branches.  Also, 8.4 shows the incorrect behavior
in more cases than older branches do, because it is able to flatten
subqueries in more cases.
---
 src/backend/optimizer/path/allpaths.c     |   5 +-
 src/backend/optimizer/prep/prepjointree.c | 393 ++++++++++++++--------
 src/backend/optimizer/prep/prepunion.c    |   6 +-
 src/backend/rewrite/rewriteHandler.c      |  11 +-
 src/backend/rewrite/rewriteManip.c        | 306 ++++++++++-------
 src/include/rewrite/rewriteManip.h        |  28 +-
 src/test/regress/expected/join.out        |  49 +++
 src/test/regress/sql/join.sql             |  21 ++
 8 files changed, 534 insertions(+), 285 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index e8e5b2ecb2d..ada9140552e 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.184 2009/07/06 18:26:30 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.185 2009/09/02 17:52:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1254,7 +1254,8 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
 		 */
 		qual = ResolveNew(qual, rti, 0, rte,
 						  subquery->targetList,
-						  CMD_SELECT, 0);
+						  CMD_SELECT, 0,
+						  &subquery->hasSubLinks);
 
 		/*
 		 * Now attach the qual to the proper place: normally WHERE, but if the
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index fd451b338bf..d7676efbcb6 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -16,12 +16,13 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.66 2009/06/11 14:48:59 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.67 2009/09/02 17:52:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
+#include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
@@ -30,10 +31,23 @@
 #include "optimizer/subselect.h"
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
+#include "parser/parse_relation.h"
 #include "parser/parsetree.h"
 #include "rewrite/rewriteManip.h"
 
 
+typedef struct pullup_replace_vars_context
+{
+	PlannerInfo *root;
+	List	   *targetlist;			/* tlist of subquery being pulled up */
+	RangeTblEntry *target_rte;		/* RTE of subquery */
+	bool	   *outer_hasSubLinks;	/* -> outer query's hasSubLinks */
+	int			varno;				/* varno of subquery */
+	bool		need_phvs;			/* do we need PlaceHolderVars? */
+	bool		wrap_non_vars;		/* do we need 'em on *all* non-Vars? */
+	Node	  **rv_cache;			/* cache for results with PHVs */
+} pullup_replace_vars_context;
+
 typedef struct reduce_outer_joins_state
 {
 	Relids		relids;			/* base relids within this subtree */
@@ -60,12 +74,14 @@ static bool is_simple_subquery(Query *subquery);
 static bool is_simple_union_all(Query *subquery);
 static bool is_simple_union_all_recurse(Node *setOp, Query *setOpQuery,
 							List *colTypes);
-static List *insert_targetlist_placeholders(PlannerInfo *root, List *tlist,
-							   int varno, bool wrap_non_vars);
 static bool is_safe_append_member(Query *subquery);
-static void resolvenew_in_jointree(Node *jtnode, int varno, RangeTblEntry *rte,
-					   List *subtlist, List *subtlist_with_phvs,
-					   JoinExpr *lowest_outer_join);
+static void replace_vars_in_jointree(Node *jtnode,
+						 pullup_replace_vars_context *context,
+						 JoinExpr *lowest_outer_join);
+static Node *pullup_replace_vars(Node *expr,
+								 pullup_replace_vars_context *context);
+static Node *pullup_replace_vars_callback(Var *var,
+							 replace_rte_variables_context *context);
 static reduce_outer_joins_state *reduce_outer_joins_pass1(Node *jtnode);
 static void reduce_outer_joins_pass2(Node *jtnode,
 						 reduce_outer_joins_state *state,
@@ -457,8 +473,8 @@ inline_set_returning_functions(PlannerInfo *root)
  * we are currently processing!  We handle this by being careful not to
  * change the jointree structure while recursing: no nodes other than
  * subquery RangeTblRef entries will be replaced.  Also, we can't turn
- * ResolveNew loose on the whole jointree, because it'll return a mutated
- * copy of the tree; we have to invoke it just on the quals, instead.
+ * pullup_replace_vars loose on the whole jointree, because it'll return a
+ * mutated copy of the tree; we have to invoke it just on the quals, instead.
  * This behavior is what makes it reasonable to pass lowest_outer_join as a
  * pointer rather than some more-indirect way of identifying the lowest OJ.
  * Likewise, we don't replace append_rel_list members but only their
@@ -584,8 +600,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	Query	   *subquery;
 	PlannerInfo *subroot;
 	int			rtoffset;
-	List	   *subtlist;
-	List	   *subtlist_with_phvs;
+	pullup_replace_vars_context rvcontext;
 	ListCell   *lc;
 
 	/*
@@ -692,16 +707,20 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	 * insert into the top query, but if we are under an outer join then
 	 * non-nullable items may have to be turned into PlaceHolderVars.  If we
 	 * are dealing with an appendrel member then anything that's not a simple
-	 * Var has to be turned into a PlaceHolderVar.
+	 * Var has to be turned into a PlaceHolderVar.  Set up appropriate context
+	 * data for pullup_replace_vars.
 	 */
-	subtlist = subquery->targetList;
-	if (lowest_outer_join != NULL || containing_appendrel != NULL)
-		subtlist_with_phvs = insert_targetlist_placeholders(root,
-															subtlist,
-															varno,
-											   containing_appendrel != NULL);
-	else
-		subtlist_with_phvs = subtlist;
+	rvcontext.root = root;
+	rvcontext.targetlist = subquery->targetList;
+	rvcontext.target_rte = rte;
+	rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
+	rvcontext.varno = varno;
+	rvcontext.need_phvs = (lowest_outer_join != NULL ||
+						   containing_appendrel != NULL);
+	rvcontext.wrap_non_vars = (containing_appendrel != NULL);
+	/* initialize cache array with indexes 0 .. length(tlist) */
+	rvcontext.rv_cache = palloc0((list_length(subquery->targetList) + 1) *
+								 sizeof(Node *));
 
 	/*
 	 * Replace all of the top query's references to the subquery's outputs
@@ -709,25 +728,17 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	 * replace any of the jointree structure. (This'd be a lot cleaner if we
 	 * could use query_tree_mutator.)  We have to use PHVs in the targetList,
 	 * returningList, and havingQual, since those are certainly above any
-	 * outer join.	resolvenew_in_jointree tracks its location in the jointree
-	 * and uses PHVs or not appropriately.
+	 * outer join.  replace_vars_in_jointree tracks its location in the
+	 * jointree and uses PHVs or not appropriately.
 	 */
 	parse->targetList = (List *)
-		ResolveNew((Node *) parse->targetList,
-				   varno, 0, rte,
-				   subtlist_with_phvs, CMD_SELECT, 0);
+		pullup_replace_vars((Node *) parse->targetList, &rvcontext);
 	parse->returningList = (List *)
-		ResolveNew((Node *) parse->returningList,
-				   varno, 0, rte,
-				   subtlist_with_phvs, CMD_SELECT, 0);
-	resolvenew_in_jointree((Node *) parse->jointree, varno, rte,
-						   subtlist, subtlist_with_phvs,
-						   lowest_outer_join);
+		pullup_replace_vars((Node *) parse->returningList, &rvcontext);
+	replace_vars_in_jointree((Node *) parse->jointree, &rvcontext,
+							 lowest_outer_join);
 	Assert(parse->setOperations == NULL);
-	parse->havingQual =
-		ResolveNew(parse->havingQual,
-				   varno, 0, rte,
-				   subtlist_with_phvs, CMD_SELECT, 0);
+	parse->havingQual = pullup_replace_vars(parse->havingQual, &rvcontext);
 
 	/*
 	 * Replace references in the translated_vars lists of appendrels. When
@@ -739,13 +750,13 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	foreach(lc, root->append_rel_list)
 	{
 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
+		bool	save_need_phvs = rvcontext.need_phvs;
 
+		if (appinfo == containing_appendrel)
+			rvcontext.need_phvs = false;
 		appinfo->translated_vars = (List *)
-			ResolveNew((Node *) appinfo->translated_vars,
-					   varno, 0, rte,
-					   (appinfo == containing_appendrel) ?
-					   subtlist : subtlist_with_phvs,
-					   CMD_SELECT, 0);
+			pullup_replace_vars((Node *) appinfo->translated_vars, &rvcontext);
+		rvcontext.need_phvs = save_need_phvs;
 	}
 
 	/*
@@ -763,9 +774,8 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 
 		if (otherrte->rtekind == RTE_JOIN)
 			otherrte->joinaliasvars = (List *)
-				ResolveNew((Node *) otherrte->joinaliasvars,
-						   varno, 0, rte,
-						   subtlist_with_phvs, CMD_SELECT, 0);
+				pullup_replace_vars((Node *) otherrte->joinaliasvars,
+									&rvcontext);
 	}
 
 	/*
@@ -783,9 +793,9 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 
 	/*
 	 * We also have to fix the relid sets of any PlaceHolderVar nodes in the
-	 * parent query.  (This could perhaps be done by ResolveNew, but it would
-	 * clutter that routine's API unreasonably.)  Note in particular that any
-	 * PlaceHolderVar nodes just created by insert_targetlist_placeholders()
+	 * parent query.  (This could perhaps be done by pullup_replace_vars(),
+	 * but it seems cleaner to use two passes.)  Note in particular that any
+	 * PlaceHolderVar nodes just created by pullup_replace_vars()
 	 * will be adjusted, so having created them with the subquery's varno is
 	 * correct.
 	 *
@@ -820,6 +830,12 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 
 	/*
 	 * Miscellaneous housekeeping.
+	 *
+	 * Although replace_rte_variables() faithfully updated parse->hasSubLinks
+	 * if it copied any SubLinks out of the subquery's targetlist, we still
+	 * could have SubLinks added to the query in the expressions of FUNCTION
+	 * and VALUES RTEs copied up from the subquery.  So it's necessary to copy
+	 * subquery->hasSubLinks anyway.  Perhaps this can be improved someday.
 	 */
 	parse->hasSubLinks |= subquery->hasSubLinks;
 
@@ -1136,77 +1152,6 @@ is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes)
 	}
 }
 
-/*
- * insert_targetlist_placeholders
- *	  Insert PlaceHolderVar nodes into any non-junk targetlist items that are
- *	  not simple variables or strict functions of simple variables (and hence
- *	  might not correctly go to NULL when examined above the point of an outer
- *	  join).
- *
- * varno is the upper-query relid of the subquery; this is used as the
- * syntactic location of the PlaceHolderVars.
- * If wrap_non_vars is true then *only* simple Var references escape being
- * wrapped with PlaceHolderVars.
- */
-static List *
-insert_targetlist_placeholders(PlannerInfo *root, List *tlist,
-							   int varno, bool wrap_non_vars)
-{
-	List	   *result = NIL;
-	ListCell   *lc;
-
-	foreach(lc, tlist)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(lc);
-		TargetEntry *newtle;
-
-		/* resjunk columns need not be changed */
-		if (tle->resjunk)
-		{
-			result = lappend(result, tle);
-			continue;
-		}
-
-		/*
-		 * Simple Vars always escape being wrapped.  This is common enough to
-		 * deserve a fast path even if we aren't doing wrap_non_vars.
-		 */
-		if (tle->expr && IsA(tle->expr, Var) &&
-			((Var *) tle->expr)->varlevelsup == 0)
-		{
-			result = lappend(result, tle);
-			continue;
-		}
-
-		if (!wrap_non_vars)
-		{
-			/*
-			 * If it contains a Var of current level, and does not contain any
-			 * non-strict constructs, then it's certainly nullable and we
-			 * don't need to insert a PlaceHolderVar.  (Note: in future maybe
-			 * we should insert PlaceHolderVars anyway, when a tlist item is
-			 * expensive to evaluate?
-			 */
-			if (contain_vars_of_level((Node *) tle->expr, 0) &&
-				!contain_nonstrict_functions((Node *) tle->expr))
-			{
-				result = lappend(result, tle);
-				continue;
-			}
-		}
-
-		/* Else wrap it in a PlaceHolderVar */
-		newtle = makeNode(TargetEntry);
-		memcpy(newtle, tle, sizeof(TargetEntry));
-		newtle->expr = (Expr *)
-			make_placeholder_expr(root,
-								  tle->expr,
-								  bms_make_singleton(varno));
-		result = lappend(result, newtle);
-	}
-	return result;
-}
-
 /*
  * is_safe_append_member
  *	  Check a subquery that is a leaf of a UNION ALL appendrel to see if it's
@@ -1244,18 +1189,17 @@ is_safe_append_member(Query *subquery)
 }
 
 /*
- * Helper routine for pull_up_subqueries: do ResolveNew on every expression
- * in the jointree, without changing the jointree structure itself.  Ugly,
- * but there's no other way...
+ * Helper routine for pull_up_subqueries: do pullup_replace_vars on every
+ * expression in the jointree, without changing the jointree structure itself.
+ * Ugly, but there's no other way...
  *
- * If we are above lowest_outer_join then use subtlist_with_phvs; at or
- * below it, use subtlist.	(When no outer joins are in the picture,
- * these will be the same list.)
+ * If we are at or below lowest_outer_join, we can suppress use of
+ * PlaceHolderVars wrapped around the replacement expressions.
  */
 static void
-resolvenew_in_jointree(Node *jtnode, int varno, RangeTblEntry *rte,
-					   List *subtlist, List *subtlist_with_phvs,
-					   JoinExpr *lowest_outer_join)
+replace_vars_in_jointree(Node *jtnode,
+						 pullup_replace_vars_context *context,
+						 JoinExpr *lowest_outer_join)
 {
 	if (jtnode == NULL)
 		return;
@@ -1269,43 +1213,204 @@ resolvenew_in_jointree(Node *jtnode, int varno, RangeTblEntry *rte,
 		ListCell   *l;
 
 		foreach(l, f->fromlist)
-			resolvenew_in_jointree(lfirst(l), varno, rte,
-								   subtlist, subtlist_with_phvs,
-								   lowest_outer_join);
-		f->quals = ResolveNew(f->quals,
-							  varno, 0, rte,
-							  subtlist_with_phvs, CMD_SELECT, 0);
+			replace_vars_in_jointree(lfirst(l), context, lowest_outer_join);
+		f->quals = pullup_replace_vars(f->quals, context);
 	}
 	else if (IsA(jtnode, JoinExpr))
 	{
 		JoinExpr   *j = (JoinExpr *) jtnode;
+		bool	save_need_phvs = context->need_phvs;
 
 		if (j == lowest_outer_join)
 		{
 			/* no more PHVs in or below this join */
-			subtlist_with_phvs = subtlist;
+			context->need_phvs = false;
 			lowest_outer_join = NULL;
 		}
-		resolvenew_in_jointree(j->larg, varno, rte,
-							   subtlist, subtlist_with_phvs,
-							   lowest_outer_join);
-		resolvenew_in_jointree(j->rarg, varno, rte,
-							   subtlist, subtlist_with_phvs,
-							   lowest_outer_join);
-		j->quals = ResolveNew(j->quals,
-							  varno, 0, rte,
-							  subtlist_with_phvs, CMD_SELECT, 0);
+		replace_vars_in_jointree(j->larg, context, lowest_outer_join);
+		replace_vars_in_jointree(j->rarg, context, lowest_outer_join);
+		j->quals = pullup_replace_vars(j->quals, context);
 
 		/*
 		 * We don't bother to update the colvars list, since it won't be used
 		 * again ...
 		 */
+		context->need_phvs = save_need_phvs;
 	}
 	else
 		elog(ERROR, "unrecognized node type: %d",
 			 (int) nodeTag(jtnode));
 }
 
+/*
+ * Apply pullup variable replacement throughout an expression tree
+ *
+ * Returns a modified copy of the tree, so this can't be used where we
+ * need to do in-place replacement.
+ */
+static Node *
+pullup_replace_vars(Node *expr, pullup_replace_vars_context *context)
+{
+	return replace_rte_variables(expr,
+								 context->varno, 0,
+								 pullup_replace_vars_callback,
+								 (void *) context,
+								 context->outer_hasSubLinks);
+}
+
+static Node *
+pullup_replace_vars_callback(Var *var,
+							 replace_rte_variables_context *context)
+{
+	pullup_replace_vars_context *rcon = (pullup_replace_vars_context *) context->callback_arg;
+	int			varattno = var->varattno;
+	Node	   *newnode;
+
+	/*
+	 * If PlaceHolderVars are needed, we cache the modified expressions in
+	 * rcon->rv_cache[].  This is not in hopes of any material speed gain
+	 * within this function, but to avoid generating identical PHVs with
+	 * different IDs.  That would result in duplicate evaluations at runtime,
+	 * and possibly prevent optimizations that rely on recognizing different
+	 * references to the same subquery output as being equal().  So it's worth
+	 * a bit of extra effort to avoid it.
+	 */
+	if (rcon->need_phvs &&
+		varattno >= InvalidAttrNumber &&
+		varattno <= list_length(rcon->targetlist) &&
+		rcon->rv_cache[varattno] != NULL)
+	{
+		/* Just copy the entry and fall through to adjust its varlevelsup */
+		newnode = copyObject(rcon->rv_cache[varattno]);
+	}
+	else if (varattno == InvalidAttrNumber)
+	{
+		/* Must expand whole-tuple reference into RowExpr */
+		RowExpr    *rowexpr;
+		List	   *colnames;
+		List	   *fields;
+		bool		save_need_phvs = rcon->need_phvs;
+
+		/*
+		 * If generating an expansion for a var of a named rowtype (ie, this
+		 * is a plain relation RTE), then we must include dummy items for
+		 * dropped columns.  If the var is RECORD (ie, this is a JOIN), then
+		 * omit dropped columns. Either way, attach column names to the
+		 * RowExpr for use of ruleutils.c.
+		 *
+		 * In order to be able to cache the results, we always generate the
+		 * expansion with varlevelsup = 0, and then adjust if needed.
+		 */
+		expandRTE(rcon->target_rte,
+				  var->varno, 0 /* not varlevelsup */, var->location,
+				  (var->vartype != RECORDOID),
+				  &colnames, &fields);
+		/* Adjust the generated per-field Vars, but don't insert PHVs */
+		rcon->need_phvs = false;
+		fields = (List *) replace_rte_variables_mutator((Node *) fields,
+														context);
+		rcon->need_phvs = save_need_phvs;
+		rowexpr = makeNode(RowExpr);
+		rowexpr->args = fields;
+		rowexpr->row_typeid = var->vartype;
+		rowexpr->row_format = COERCE_IMPLICIT_CAST;
+		rowexpr->colnames = colnames;
+		rowexpr->location = var->location;
+		newnode = (Node *) rowexpr;
+
+		/*
+		 * Insert PlaceHolderVar if needed.  Notice that we are wrapping
+		 * one PlaceHolderVar around the whole RowExpr, rather than putting
+		 * one around each element of the row.  This is because we need
+		 * the expression to yield NULL, not ROW(NULL,NULL,...) when it
+		 * is forced to null by an outer join.
+		 */
+		if (rcon->need_phvs)
+		{
+			/* RowExpr is certainly not strict, so always need PHV */
+			newnode = (Node *)
+				make_placeholder_expr(rcon->root,
+									  (Expr *) newnode,
+									  bms_make_singleton(rcon->varno));
+			/* cache it with the PHV, and with varlevelsup still zero */
+			rcon->rv_cache[InvalidAttrNumber] = copyObject(newnode);
+		}
+	}
+	else
+	{
+		/* Normal case referencing one targetlist element */
+		TargetEntry *tle = get_tle_by_resno(rcon->targetlist, varattno);
+
+		if (tle == NULL)		/* shouldn't happen */
+			elog(ERROR, "could not find attribute %d in subquery targetlist",
+				 varattno);
+
+		/* Make a copy of the tlist item to return */
+		newnode = copyObject(tle->expr);
+
+		/* Insert PlaceHolderVar if needed */
+		if (rcon->need_phvs)
+		{
+			bool	wrap;
+
+			if (newnode && IsA(newnode, Var) &&
+				((Var *) newnode)->varlevelsup == 0)
+			{
+				/* Simple Vars always escape being wrapped */
+				wrap = false;
+			}
+			else if (rcon->wrap_non_vars)
+			{
+				/* Wrap all non-Vars in a PlaceHolderVar */
+				wrap = true;
+			}
+			else
+			{
+				/*
+				 * If it contains a Var of current level, and does not contain
+				 * any non-strict constructs, then it's certainly nullable and
+				 * we don't need to insert a PlaceHolderVar.  (Note: in future
+				 * maybe we should insert PlaceHolderVars anyway, when a tlist
+				 * item is expensive to evaluate?
+				 */
+				if (contain_vars_of_level((Node *) newnode, 0) &&
+					!contain_nonstrict_functions((Node *) newnode))
+				{
+					/* No wrap needed */
+					wrap = false;
+				}
+				else
+				{
+					/* Else wrap it in a PlaceHolderVar */
+					wrap = true;
+				}
+			}
+
+			if (wrap)
+				newnode = (Node *)
+					make_placeholder_expr(rcon->root,
+										  (Expr *) newnode,
+										  bms_make_singleton(rcon->varno));
+
+			/*
+			 * Cache it if possible (ie, if the attno is in range, which it
+			 * probably always should be).  We can cache the value even if
+			 * we decided we didn't need a PHV, since this result will be
+			 * suitable for any request that has need_phvs.
+			 */
+			if (varattno > InvalidAttrNumber &&
+				varattno <= list_length(rcon->targetlist))
+				rcon->rv_cache[varattno] = copyObject(newnode);
+		}
+	}
+
+	/* Must adjust varlevelsup if tlist item is from higher query */
+	if (var->varlevelsup > 0)
+		IncrementVarSublevelsUp(newnode, var->varlevelsup, 0);
+
+	return newnode;
+}
+
 /*
  * reduce_outer_joins
  *		Attempt to reduce outer joins to plain inner joins.
@@ -1724,9 +1829,9 @@ reduce_outer_joins_pass2(Node *jtnode,
  * top query could (yet) contain such a reference.
  *
  * NOTE: although this has the form of a walker, we cheat and modify the
- * nodes in-place.	This should be OK since the tree was copied by ResolveNew
- * earlier.  Avoid scribbling on the original values of the bitmapsets, though,
- * because expression_tree_mutator doesn't copy those.
+ * nodes in-place.  This should be OK since the tree was copied by
+ * pullup_replace_vars earlier.  Avoid scribbling on the original values of
+ * the bitmapsets, though, because expression_tree_mutator doesn't copy those.
  */
 
 typedef struct
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 63bac959a74..a0098836b95 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.173 2009/08/13 16:53:09 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.174 2009/09/02 17:52:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1477,8 +1477,8 @@ translate_col_privs(const Bitmapset *parent_privs,
  * Note: this is only applied after conversion of sublinks to subplans,
  * so we don't need to cope with recursion into sub-queries.
  *
- * Note: this is not hugely different from what ResolveNew() does; maybe
- * we should try to fold the two routines together.
+ * Note: this is not hugely different from what pullup_replace_vars() does;
+ * maybe we should try to fold the two routines together.
  */
 Node *
 adjust_appendrel_attrs(Node *node, AppendRelInfo *appinfo)
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index c1233b63570..8a178c9d3c6 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteHandler.c,v 1.186 2009/06/11 14:49:01 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteHandler.c,v 1.187 2009/09/02 17:52:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -463,7 +463,8 @@ rewriteRuleAction(Query *parsetree,
 												   sub_action->rtable),
 										  parsetree->targetList,
 										  event,
-										  current_varno);
+										  current_varno,
+										  NULL);
 		if (sub_action_ptr)
 			*sub_action_ptr = sub_action;
 		else
@@ -493,7 +494,8 @@ rewriteRuleAction(Query *parsetree,
 								parsetree->rtable),
 					   rule_action->returningList,
 					   CMD_SELECT,
-					   0);
+					   0,
+					   &rule_action->hasSubLinks);
 
 		/*
 		 * There could have been some SubLinks in parsetree's returningList,
@@ -1510,7 +1512,8 @@ CopyAndAddInvertedQual(Query *parsetree,
 							  rt_fetch(rt_index, parsetree->rtable),
 							  parsetree->targetList,
 							  event,
-							  rt_index);
+							  rt_index,
+							  &parsetree->hasSubLinks);
 	/* And attach the fixed qual */
 	AddInvertedQual(parsetree, new_qual);
 
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 570434ed83e..d211a88ba45 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.122 2009/06/11 14:49:01 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.123 2009/09/02 17:52:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1067,16 +1067,14 @@ AddInvertedQual(Query *parsetree, Node *qual)
 
 
 /*
- * ResolveNew - replace Vars with corresponding items from a targetlist
- *
- * Vars matching target_varno and sublevels_up are replaced by the
- * entry with matching resno from targetlist, if there is one.
- * If not, we either change the unmatched Var's varno to update_varno
- * (when event == CMD_UPDATE) or replace it with a constant NULL.
+ * replace_rte_variables() finds all Vars in an expression tree
+ * that reference a particular RTE, and replaces them with substitute
+ * expressions obtained from a caller-supplied callback function.
  *
- * The caller must also provide target_rte, the RTE describing the target
- * relation.  This is needed to handle whole-row Vars referencing the target.
- * We expand such Vars into RowExpr constructs.
+ * When invoking replace_rte_variables on a portion of a Query, pass the
+ * address of the containing Query's hasSubLinks field as outer_hasSubLinks.
+ * Otherwise, pass NULL, but inserting a SubLink into a non-Query expression
+ * will then cause an error.
  *
  * Note: the business with inserted_sublink is needed to update hasSubLinks
  * in subqueries when the replacement adds a subquery inside a subquery.
@@ -1084,122 +1082,88 @@ AddInvertedQual(Query *parsetree, Node *qual)
  * because it isn't possible for this transformation to insert a level-zero
  * aggregate reference into a subquery --- it could only insert outer aggs.
  * Likewise for hasWindowFuncs.
+ *
+ * Note: usually, we'd not expose the mutator function or context struct
+ * for a function like this.  We do so because callbacks often find it
+ * convenient to recurse directly to the mutator on sub-expressions of
+ * what they will return.
  */
-
-typedef struct
+Node *
+replace_rte_variables(Node *node, int target_varno, int sublevels_up,
+					  replace_rte_variables_callback callback,
+					  void *callback_arg,
+					  bool *outer_hasSubLinks)
 {
-	int			target_varno;
-	int			sublevels_up;
-	RangeTblEntry *target_rte;
-	List	   *targetlist;
-	int			event;
-	int			update_varno;
-	bool		inserted_sublink;
-} ResolveNew_context;
+	Node	   *result;
+	replace_rte_variables_context context;
 
-static Node *
-resolve_one_var(Var *var, ResolveNew_context *context)
-{
-	TargetEntry *tle;
+	context.callback = callback;
+	context.callback_arg = callback_arg;
+	context.target_varno = target_varno;
+	context.sublevels_up = sublevels_up;
 
-	tle = get_tle_by_resno(context->targetlist, var->varattno);
+	/*
+	 * We try to initialize inserted_sublink to true if there is no need to
+	 * detect new sublinks because the query already has some.
+	 */
+	if (node && IsA(node, Query))
+		context.inserted_sublink = ((Query *) node)->hasSubLinks;
+	else if (outer_hasSubLinks)
+		context.inserted_sublink = *outer_hasSubLinks;
+	else
+		context.inserted_sublink = false;
 
-	if (tle == NULL)
+	/*
+	 * Must be prepared to start with a Query or a bare expression tree; if
+	 * it's a Query, we don't want to increment sublevels_up.
+	 */
+	result = query_or_expression_tree_mutator(node,
+											  replace_rte_variables_mutator,
+											  (void *) &context,
+											  0);
+
+	if (context.inserted_sublink)
 	{
-		/* Failed to find column in insert/update tlist */
-		if (context->event == CMD_UPDATE)
-		{
-			/* For update, just change unmatched var's varno */
-			var = (Var *) copyObject(var);
-			var->varno = context->update_varno;
-			var->varnoold = context->update_varno;
-			return (Node *) var;
-		}
+		if (result && IsA(result, Query))
+			((Query *) result)->hasSubLinks = true;
+		else if (outer_hasSubLinks)
+			*outer_hasSubLinks = true;
 		else
-		{
-			/* Otherwise replace unmatched var with a null */
-			/* need coerce_to_domain in case of NOT NULL domain constraint */
-			return coerce_to_domain((Node *) makeNullConst(var->vartype,
-														   var->vartypmod),
-									InvalidOid, -1,
-									var->vartype,
-									COERCE_IMPLICIT_CAST,
-									-1,
-									false,
-									false);
-		}
+			elog(ERROR, "replace_rte_variables inserted a SubLink, but has noplace to record it");
 	}
-	else
-	{
-		/* Make a copy of the tlist item to return */
-		Node	   *n = copyObject(tle->expr);
 
-		/* Adjust varlevelsup if tlist item is from higher query */
-		if (var->varlevelsup > 0)
-			IncrementVarSublevelsUp(n, var->varlevelsup, 0);
-		/* Report it if we are adding a sublink to query */
-		if (!context->inserted_sublink)
-			context->inserted_sublink = checkExprHasSubLink(n);
-		return n;
-	}
+	return result;
 }
 
-static Node *
-ResolveNew_mutator(Node *node, ResolveNew_context *context)
+Node *
+replace_rte_variables_mutator(Node *node,
+							  replace_rte_variables_context *context)
 {
 	if (node == NULL)
 		return NULL;
 	if (IsA(node, Var))
 	{
 		Var		   *var = (Var *) node;
-		int			this_varno = (int) var->varno;
-		int			this_varlevelsup = (int) var->varlevelsup;
 
-		if (this_varno == context->target_varno &&
-			this_varlevelsup == context->sublevels_up)
+		if (var->varno == context->target_varno &&
+			var->varlevelsup == context->sublevels_up)
 		{
-			if (var->varattno == InvalidAttrNumber)
-			{
-				/* Must expand whole-tuple reference into RowExpr */
-				RowExpr    *rowexpr;
-				List	   *colnames;
-				List	   *fields;
-
-				/*
-				 * If generating an expansion for a var of a named rowtype
-				 * (ie, this is a plain relation RTE), then we must include
-				 * dummy items for dropped columns.  If the var is RECORD (ie,
-				 * this is a JOIN), then omit dropped columns.	Either way,
-				 * attach column names to the RowExpr for use of ruleutils.c.
-				 */
-				expandRTE(context->target_rte,
-						  this_varno, this_varlevelsup, var->location,
-						  (var->vartype != RECORDOID),
-						  &colnames, &fields);
-				/* Adjust the generated per-field Vars... */
-				fields = (List *) ResolveNew_mutator((Node *) fields,
-													 context);
-				rowexpr = makeNode(RowExpr);
-				rowexpr->args = fields;
-				rowexpr->row_typeid = var->vartype;
-				rowexpr->row_format = COERCE_IMPLICIT_CAST;
-				rowexpr->colnames = colnames;
-				rowexpr->location = -1;
-
-				return (Node *) rowexpr;
-			}
-
-			/* Normal case for scalar variable */
-			return resolve_one_var(var, context);
+			/* Found a matching variable, make the substitution */
+			Node	   *newnode;
+
+			newnode = (*context->callback) (var, context);
+			/* Detect if we are adding a sublink to query */
+			if (!context->inserted_sublink)
+				context->inserted_sublink = checkExprHasSubLink(newnode);
+			return newnode;
 		}
 		/* otherwise fall through to copy the var normally */
 	}
 	else if (IsA(node, CurrentOfExpr))
 	{
 		CurrentOfExpr *cexpr = (CurrentOfExpr *) node;
-		int			this_varno = (int) cexpr->cvarno;
 
-		if (this_varno == context->target_varno &&
+		if (cexpr->cvarno == context->target_varno &&
 			context->sublevels_up == 0)
 		{
 			/*
@@ -1222,9 +1186,9 @@ ResolveNew_mutator(Node *node, ResolveNew_context *context)
 
 		context->sublevels_up++;
 		save_inserted_sublink = context->inserted_sublink;
-		context->inserted_sublink = false;
+		context->inserted_sublink = ((Query *) node)->hasSubLinks;
 		newnode = query_tree_mutator((Query *) node,
-									 ResolveNew_mutator,
+									 replace_rte_variables_mutator,
 									 (void *) context,
 									 0);
 		newnode->hasSubLinks |= context->inserted_sublink;
@@ -1232,46 +1196,128 @@ ResolveNew_mutator(Node *node, ResolveNew_context *context)
 		context->sublevels_up--;
 		return (Node *) newnode;
 	}
-	return expression_tree_mutator(node, ResolveNew_mutator,
+	return expression_tree_mutator(node, replace_rte_variables_mutator,
 								   (void *) context);
 }
 
+
+/*
+ * ResolveNew - replace Vars with corresponding items from a targetlist
+ *
+ * Vars matching target_varno and sublevels_up are replaced by the
+ * entry with matching resno from targetlist, if there is one.
+ * If not, we either change the unmatched Var's varno to update_varno
+ * (when event == CMD_UPDATE) or replace it with a constant NULL.
+ *
+ * The caller must also provide target_rte, the RTE describing the target
+ * relation.  This is needed to handle whole-row Vars referencing the target.
+ * We expand such Vars into RowExpr constructs.
+ *
+ * outer_hasSubLinks works the same as for replace_rte_variables().
+ */
+
+typedef struct
+{
+	RangeTblEntry *target_rte;
+	List	   *targetlist;
+	int			event;
+	int			update_varno;
+} ResolveNew_context;
+
+static Node *
+ResolveNew_callback(Var *var,
+					replace_rte_variables_context *context)
+{
+	ResolveNew_context *rcon = (ResolveNew_context *) context->callback_arg;
+	TargetEntry *tle;
+
+	if (var->varattno == InvalidAttrNumber)
+	{
+		/* Must expand whole-tuple reference into RowExpr */
+		RowExpr    *rowexpr;
+		List	   *colnames;
+		List	   *fields;
+
+		/*
+		 * If generating an expansion for a var of a named rowtype
+		 * (ie, this is a plain relation RTE), then we must include
+		 * dummy items for dropped columns.  If the var is RECORD (ie,
+		 * this is a JOIN), then omit dropped columns.	Either way,
+		 * attach column names to the RowExpr for use of ruleutils.c.
+		 */
+		expandRTE(rcon->target_rte,
+				  var->varno, var->varlevelsup, var->location,
+				  (var->vartype != RECORDOID),
+				  &colnames, &fields);
+		/* Adjust the generated per-field Vars... */
+		fields = (List *) replace_rte_variables_mutator((Node *) fields,
+														context);
+		rowexpr = makeNode(RowExpr);
+		rowexpr->args = fields;
+		rowexpr->row_typeid = var->vartype;
+		rowexpr->row_format = COERCE_IMPLICIT_CAST;
+		rowexpr->colnames = colnames;
+		rowexpr->location = var->location;
+
+		return (Node *) rowexpr;
+	}
+
+	/* Normal case referencing one targetlist element */
+	tle = get_tle_by_resno(rcon->targetlist, var->varattno);
+
+	if (tle == NULL)
+	{
+		/* Failed to find column in insert/update tlist */
+		if (rcon->event == CMD_UPDATE)
+		{
+			/* For update, just change unmatched var's varno */
+			var = (Var *) copyObject(var);
+			var->varno = rcon->update_varno;
+			var->varnoold = rcon->update_varno;
+			return (Node *) var;
+		}
+		else
+		{
+			/* Otherwise replace unmatched var with a null */
+			/* need coerce_to_domain in case of NOT NULL domain constraint */
+			return coerce_to_domain((Node *) makeNullConst(var->vartype,
+														   var->vartypmod),
+									InvalidOid, -1,
+									var->vartype,
+									COERCE_IMPLICIT_CAST,
+									-1,
+									false,
+									false);
+		}
+	}
+	else
+	{
+		/* Make a copy of the tlist item to return */
+		Node	   *newnode = copyObject(tle->expr);
+
+		/* Must adjust varlevelsup if tlist item is from higher query */
+		if (var->varlevelsup > 0)
+			IncrementVarSublevelsUp(newnode, var->varlevelsup, 0);
+
+		return newnode;
+	}
+}
+
 Node *
 ResolveNew(Node *node, int target_varno, int sublevels_up,
 		   RangeTblEntry *target_rte,
-		   List *targetlist, int event, int update_varno)
+		   List *targetlist, int event, int update_varno,
+		   bool *outer_hasSubLinks)
 {
-	Node	   *result;
 	ResolveNew_context context;
 
-	context.target_varno = target_varno;
-	context.sublevels_up = sublevels_up;
 	context.target_rte = target_rte;
 	context.targetlist = targetlist;
 	context.event = event;
 	context.update_varno = update_varno;
-	context.inserted_sublink = false;
-
-	/*
-	 * Must be prepared to start with a Query or a bare expression tree; if
-	 * it's a Query, we don't want to increment sublevels_up.
-	 */
-	result = query_or_expression_tree_mutator(node,
-											  ResolveNew_mutator,
-											  (void *) &context,
-											  0);
 
-	if (context.inserted_sublink)
-	{
-		if (IsA(result, Query))
-			((Query *) result)->hasSubLinks = true;
-
-		/*
-		 * Note: if we're called on a non-Query node then it's the caller's
-		 * responsibility to update hasSubLinks in the ancestor Query. This is
-		 * pretty fragile and perhaps should be rethought ...
-		 */
-	}
-
-	return result;
+	return replace_rte_variables(node, target_varno, sublevels_up,
+								 ResolveNew_callback,
+								 (void *) &context,
+								 outer_hasSubLinks);
 }
diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h
index 51993f3a78b..15150728d9b 100644
--- a/src/include/rewrite/rewriteManip.h
+++ b/src/include/rewrite/rewriteManip.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.50 2009/06/11 14:49:12 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.51 2009/09/02 17:52:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,21 @@
 #include "nodes/parsenodes.h"
 
 
+typedef struct replace_rte_variables_context replace_rte_variables_context;
+
+typedef Node * (*replace_rte_variables_callback) (Var *var,
+									replace_rte_variables_context *context);
+
+struct replace_rte_variables_context
+{
+	replace_rte_variables_callback callback;	/* callback function */
+	void	   *callback_arg;		/* context data for callback function */
+	int			target_varno;		/* RTE index to search for */
+	int			sublevels_up;		/* (current) nesting depth */
+	bool		inserted_sublink;	/* have we inserted a SubLink? */
+};
+
+
 extern void OffsetVarNodes(Node *node, int offset, int sublevels_up);
 extern void ChangeVarNodes(Node *node, int old_varno, int new_varno,
 			   int sublevels_up);
@@ -42,8 +57,17 @@ extern bool checkExprHasAggs(Node *node);
 extern bool checkExprHasWindowFuncs(Node *node);
 extern bool checkExprHasSubLink(Node *node);
 
+extern Node *replace_rte_variables(Node *node,
+					  int target_varno, int sublevels_up,
+					  replace_rte_variables_callback callback,
+					  void *callback_arg,
+					  bool *outer_hasSubLinks);
+extern Node *replace_rte_variables_mutator(Node *node,
+							  replace_rte_variables_context *context);
+
 extern Node *ResolveNew(Node *node, int target_varno, int sublevels_up,
 		   RangeTblEntry *target_rte,
-		   List *targetlist, int event, int update_varno);
+		   List *targetlist, int event, int update_varno,
+		   bool *outer_hasSubLinks);
 
 #endif   /* REWRITEMANIP_H */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index c8c9b960b5b..dd9bf9c43f0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2394,3 +2394,52 @@ select * from a left join b on i = x and i = y and x = i;
 (0 rows)
 
 rollback;
+--
+-- test NULL behavior of whole-row Vars, per bug #5025
+--
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join int8_tbl t2 on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+        q2         | count 
+-------------------+-------
+ -4567890123456789 |     0
+               123 |     2
+               456 |     0
+  4567890123456789 |     6
+(4 rows)
+
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join (select * from int8_tbl) t2 on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+        q2         | count 
+-------------------+-------
+ -4567890123456789 |     0
+               123 |     2
+               456 |     0
+  4567890123456789 |     6
+(4 rows)
+
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join (select * from int8_tbl offset 0) t2 on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+        q2         | count 
+-------------------+-------
+ -4567890123456789 |     0
+               123 |     2
+               456 |     0
+  4567890123456789 |     6
+(4 rows)
+
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join
+  (select q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2
+  on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+        q2         | count 
+-------------------+-------
+ -4567890123456789 |     0
+               123 |     2
+               456 |     0
+  4567890123456789 |     6
+(4 rows)
+
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1516e09b56c..46ff66c6909 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -540,3 +540,24 @@ create temp table b (x integer, y integer);
 select * from a left join b on i = x and i = y and x = i;
 
 rollback;
+
+--
+-- test NULL behavior of whole-row Vars, per bug #5025
+--
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join int8_tbl t2 on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join (select * from int8_tbl) t2 on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join (select * from int8_tbl offset 0) t2 on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join
+  (select q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2
+  on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
-- 
GitLab