From 9f2ee8f287098fb8067593b38da0650df458b20a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 26 Oct 2009 02:26:45 +0000
Subject: [PATCH] Re-implement EvalPlanQual processing to improve its
 performance and eliminate a lot of strange behaviors that occurred in join
 cases.  We now identify the "current" row for every joined relation in
 UPDATE, DELETE, and SELECT FOR UPDATE/SHARE queries.  If an EvalPlanQual
 recheck is necessary, we jam the appropriate row into each scan node in the
 rechecking plan, forcing it to emit only that one row.  The former behavior
 could rescan the whole of each joined relation for each recheck, which was
 terrible for performance, and what's much worse could result in duplicated
 output tuples.

Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested.  To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param.  Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.

This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE.  This is needed to avoid the
duplicate-output-tuple problem.  It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
---
 src/backend/commands/trigger.c            |  32 +-
 src/backend/commands/vacuum.c             |   4 +-
 src/backend/executor/README               |  75 +--
 src/backend/executor/execCurrent.c        |   5 +-
 src/backend/executor/execMain.c           | 758 ++++++++++++----------
 src/backend/executor/execQual.c           |   4 +-
 src/backend/executor/execScan.c           |  96 ++-
 src/backend/executor/execUtils.c          |  10 +-
 src/backend/executor/nodeBitmapHeapscan.c |  80 +--
 src/backend/executor/nodeCtescan.c        |  26 +-
 src/backend/executor/nodeFunctionscan.c   |  29 +-
 src/backend/executor/nodeIndexscan.c      |  77 +--
 src/backend/executor/nodeLockRows.c       | 170 ++---
 src/backend/executor/nodeModifyTable.c    |  68 +-
 src/backend/executor/nodeSeqscan.c        |  80 +--
 src/backend/executor/nodeSubqueryscan.c   |  47 +-
 src/backend/executor/nodeTidscan.c        |  74 +--
 src/backend/executor/nodeValuesscan.c     |  25 +-
 src/backend/executor/nodeWorktablescan.c  |  26 +-
 src/backend/nodes/copyfuncs.c             |  31 +-
 src/backend/nodes/equalfuncs.c            |   5 +-
 src/backend/nodes/outfuncs.c              |  28 +-
 src/backend/nodes/readfuncs.c             |   5 +-
 src/backend/optimizer/path/allpaths.c     |   4 +-
 src/backend/optimizer/plan/createplan.c   |  12 +-
 src/backend/optimizer/plan/initsplan.c    |   5 +-
 src/backend/optimizer/plan/planner.c      | 227 ++++++-
 src/backend/optimizer/plan/setrefs.c      |  28 +-
 src/backend/optimizer/plan/subselect.c    | 114 +++-
 src/backend/optimizer/prep/prepjointree.c |   3 +-
 src/backend/optimizer/prep/preptlist.c    | 107 +--
 src/backend/optimizer/prep/prepunion.c    |  26 +-
 src/backend/parser/analyze.c              |  17 +-
 src/backend/parser/parse_relation.c       |   4 +-
 src/backend/rewrite/rewriteHandler.c      |   6 +-
 src/backend/rewrite/rewriteManip.c        |   5 +-
 src/backend/tcop/utility.c                |   6 +-
 src/backend/utils/cache/plancache.c       |  29 +-
 src/include/catalog/catversion.h          |   4 +-
 src/include/commands/trigger.h            |   6 +-
 src/include/executor/executor.h           |  32 +-
 src/include/nodes/execnodes.h             |  58 +-
 src/include/nodes/nodes.h                 |   5 +-
 src/include/nodes/parsenodes.h            |  21 +-
 src/include/nodes/plannodes.h             |  68 +-
 src/include/nodes/relation.h              |  10 +-
 src/include/optimizer/planmain.h          |   7 +-
 src/include/optimizer/prep.h              |   4 +-
 src/include/optimizer/subselect.h         |   4 +-
 src/include/parser/parsetree.h            |   4 +-
 50 files changed, 1550 insertions(+), 1021 deletions(-)

diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index dd526f6db19..cdd545eeaa6 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.254 2009/10/14 22:14:21 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.255 2009/10/26 02:26:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -61,7 +61,7 @@ int			SessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN;
 static void ConvertTriggerToFK(CreateTrigStmt *stmt, Oid funcoid);
 static void InsertTrigger(TriggerDesc *trigdesc, Trigger *trigger, int indx);
 static HeapTuple GetTupleForTrigger(EState *estate,
-				   PlanState *subplanstate,
+				   EPQState *epqstate,
 				   ResultRelInfo *relinfo,
 				   ItemPointer tid,
 				   TupleTableSlot **newSlot);
@@ -1828,7 +1828,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo)
 }
 
 bool
-ExecBRDeleteTriggers(EState *estate, PlanState *subplanstate,
+ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 					 ResultRelInfo *relinfo,
 					 ItemPointer tupleid)
 {
@@ -1842,7 +1842,7 @@ ExecBRDeleteTriggers(EState *estate, PlanState *subplanstate,
 	TupleTableSlot *newSlot;
 	int			i;
 
-	trigtuple = GetTupleForTrigger(estate, subplanstate, relinfo, tupleid,
+	trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
 								   &newSlot);
 	if (trigtuple == NULL)
 		return false;
@@ -1964,7 +1964,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
 }
 
 HeapTuple
-ExecBRUpdateTriggers(EState *estate, PlanState *subplanstate,
+ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 					 ResultRelInfo *relinfo,
 					 ItemPointer tupleid, HeapTuple newtuple)
 {
@@ -1979,7 +1979,7 @@ ExecBRUpdateTriggers(EState *estate, PlanState *subplanstate,
 	int			i;
 	Bitmapset   *modifiedCols;
 
-	trigtuple = GetTupleForTrigger(estate, subplanstate, relinfo, tupleid,
+	trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
 								   &newSlot);
 	if (trigtuple == NULL)
 		return NULL;
@@ -2107,7 +2107,7 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
 
 static HeapTuple
 GetTupleForTrigger(EState *estate,
-				   PlanState *subplanstate,
+				   EPQState *epqstate,
 				   ResultRelInfo *relinfo,
 				   ItemPointer tid,
 				   TupleTableSlot **newSlot)
@@ -2125,8 +2125,8 @@ GetTupleForTrigger(EState *estate,
 
 		*newSlot = NULL;
 
-		/* caller must pass a subplanstate if EvalPlanQual is possible */
-		Assert(subplanstate != NULL);
+		/* caller must pass an epqstate if EvalPlanQual is possible */
+		Assert(epqstate != NULL);
 
 		/*
 		 * lock tuple for update
@@ -2153,27 +2153,35 @@ ltrmark:;
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
-				else if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
+				if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
 				{
 					/* it was updated, so look at the updated version */
 					TupleTableSlot *epqslot;
 
 					epqslot = EvalPlanQual(estate,
+										   epqstate,
+										   relation,
 										   relinfo->ri_RangeTableIndex,
-										   subplanstate,
 										   &update_ctid,
 										   update_xmax);
 					if (!TupIsNull(epqslot))
 					{
 						*tid = update_ctid;
 						*newSlot = epqslot;
+
+						/*
+						 * EvalPlanQual already locked the tuple, but we
+						 * re-call heap_lock_tuple anyway as an easy way
+						 * of re-fetching the correct tuple.  Speed is
+						 * hardly a criterion in this path anyhow.
+						 */
 						goto ltrmark;
 					}
 				}
 
 				/*
 				 * if tuple was deleted or PlanQual failed for updated tuple -
-				 * we have not process this tuple!
+				 * we must not process this tuple!
 				 */
 				return NULL;
 
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 51c44c80dd5..e375fb4dae4 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.393 2009/09/01 04:46:49 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.394 2009/10/26 02:26:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -102,7 +102,7 @@ typedef VacPageListData *VacPageList;
  * Note: because t_ctid links can be stale (this would only occur if a prior
  * VACUUM crashed partway through), it is possible that new_tid points to an
  * empty slot or unrelated tuple.  We have to check the linkage as we follow
- * it, just as is done in EvalPlanQual.
+ * it, just as is done in EvalPlanQualFetch.
  */
 typedef struct VTupleLinkData
 {
diff --git a/src/backend/executor/README b/src/backend/executor/README
index 06d05d52311..c928186e06c 100644
--- a/src/backend/executor/README
+++ b/src/backend/executor/README
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/executor/README,v 1.10 2009/10/12 18:10:41 tgl Exp $
+$PostgreSQL: pgsql/src/backend/executor/README,v 1.11 2009/10/26 02:26:29 tgl Exp $
 
 The Postgres Executor
 =====================
@@ -160,41 +160,38 @@ modified tuple.  SELECT FOR UPDATE/SHARE behaves similarly, except that its
 action is just to lock the modified tuple and return results based on that
 version of the tuple.
 
-To implement this checking, we actually re-run the entire query from scratch
-for each modified tuple, but with the scan node that sourced the original
-tuple set to return only the modified tuple, not the original tuple or any
-of the rest of the relation.  If this query returns a tuple, then the
-modified tuple passes the quals (and the query output is the suitably
-modified update tuple, if we're doing UPDATE).  If no tuple is returned,
-then the modified tuple fails the quals, so we ignore it and continue the
-original query.  (This is reasonably efficient for simple queries, but may
-be horribly slow for joins.  A better design would be nice; one thought for
-future investigation is to treat the tuple substitution like a parameter,
-so that we can avoid rescanning unrelated nodes.)
-
-Note a fundamental bogosity of this approach: if the relation containing
-the original tuple is being used in a self-join, the other instance(s) of
-the relation will be treated as still containing the original tuple, whereas
-logical consistency would demand that the modified tuple appear in them too.
-But we'd have to actually substitute the modified tuple for the original,
-while still returning all the rest of the relation, to ensure consistent
-answers.  Implementing this correctly is a task for future work.
-
-In UPDATE/DELETE, only the target relation needs to be handled this way,
-so only one special recheck query needs to execute at a time.  In SELECT FOR
-UPDATE, there may be multiple relations flagged FOR UPDATE, so it's possible
-that while we are executing a recheck query for one modified tuple, we will
-hit another modified tuple in another relation.  In this case we "stack up"
-recheck queries: a sub-recheck query is spawned in which both the first and
-second modified tuples will be returned as the only components of their
-relations.  (In event of success, all these modified tuples will be locked.)
-Again, this isn't necessarily quite the right thing ... but in simple cases
-it works.  Potentially, recheck queries could get nested to the depth of the
-number of FOR UPDATE/SHARE relations in the query.
-
-It should be noted also that UPDATE/DELETE expect at most one tuple to
-result from the modified query, whereas in the FOR UPDATE case it's possible
-for multiple tuples to result (since we could be dealing with a join in
-which multiple tuples join to the modified tuple).  We want FOR UPDATE to
-lock all relevant tuples, so we process all tuples output by all the stacked
-recheck queries.
+To implement this checking, we actually re-run the query from scratch for
+each modified tuple (or set of tuples, for SELECT FOR UPDATE), with the
+relation scan nodes tweaked to return only the current tuples --- either
+the original ones, or the updated (and now locked) versions of the modified
+tuple(s).  If this query returns a tuple, then the modified tuple(s) pass
+the quals (and the query output is the suitably modified update tuple, if
+we're doing UPDATE).  If no tuple is returned, then the modified tuple(s)
+fail the quals, so we ignore the current result tuple and continue the
+original query.
+
+In UPDATE/DELETE, only the target relation needs to be handled this way.
+In SELECT FOR UPDATE, there may be multiple relations flagged FOR UPDATE,
+so we obtain lock on the current tuple version in each such relation before
+executing the recheck.
+
+It is also possible that there are relations in the query that are not
+to be locked (they are neither the UPDATE/DELETE target nor specified to
+be locked in SELECT FOR UPDATE/SHARE).  When re-running the test query
+we want to use the same rows from these relations that were joined to
+the locked rows.  For ordinary relations this can be implemented relatively
+cheaply by including the row TID in the join outputs and re-fetching that
+TID.  (The re-fetch is expensive, but we're trying to optimize the normal
+case where no re-test is needed.)  We have also to consider non-table
+relations, such as a ValuesScan or FunctionScan.  For these, since there
+is no equivalent of TID, the only practical solution seems to be to include
+the entire row value in the join output row.
+
+We disallow set-returning functions in the targetlist of SELECT FOR UPDATE,
+so as to ensure that at most one tuple can be returned for any particular
+set of scan tuples.  Otherwise we'd get duplicates due to the original
+query returning the same set of scan tuples multiple times.  (Note: there
+is no explicit prohibition on SRFs in UPDATE, but the net effect will be
+that only the first result row of an SRF counts, because all subsequent
+rows will result in attempts to re-update an already updated target row.
+This is historical behavior and seems not worth changing.)
diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c
index 78ad80db66f..a4103332c40 100644
--- a/src/backend/executor/execCurrent.c
+++ b/src/backend/executor/execCurrent.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- *	$PostgreSQL: pgsql/src/backend/executor/execCurrent.c,v 1.11 2009/10/12 18:10:41 tgl Exp $
+ *	$PostgreSQL: pgsql/src/backend/executor/execCurrent.c,v 1.12 2009/10/26 02:26:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -102,6 +102,9 @@ execCurrentOf(CurrentOfExpr *cexpr,
 		{
 			ExecRowMark *thiserm = (ExecRowMark *) lfirst(lc);
 
+			if (!RowMarkRequiresRowShareLock(thiserm->markType))
+				continue;		/* ignore non-FOR UPDATE/SHARE items */
+
 			if (RelationGetRelid(thiserm->relation) == table_oid)
 			{
 				if (erm)
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index d03ad094184..d7d99bc0aea 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -26,7 +26,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.333 2009/10/12 18:10:41 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.334 2009/10/26 02:26:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -61,17 +61,6 @@ ExecutorStart_hook_type ExecutorStart_hook = NULL;
 ExecutorRun_hook_type ExecutorRun_hook = NULL;
 ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
 
-typedef struct evalPlanQual
-{
-	Index		rti;
-	EState	   *estate;
-	PlanState  *planstate;
-	PlanState  *origplanstate;
-	TupleTableSlot *resultslot;
-	struct evalPlanQual *next;	/* stack of active PlanQual plans */
-	struct evalPlanQual *free;	/* list of free PlanQual plans */
-} evalPlanQual;
-
 /* decls for local routines only used within this module */
 static void InitPlan(QueryDesc *queryDesc, int eflags);
 static void ExecEndPlan(PlanState *planstate, EState *estate);
@@ -81,13 +70,11 @@ static void ExecutePlan(EState *estate, PlanState *planstate,
 			long numberTuples,
 			ScanDirection direction,
 			DestReceiver *dest);
-static void EndEvalPlanQual(EState *estate);
 static void ExecCheckRTPerms(List *rangeTable);
 static void ExecCheckRTEPerms(RangeTblEntry *rte);
 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
-static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
-							  Plan *planTree, evalPlanQual *priorepq);
-static void EvalPlanQualStop(evalPlanQual *epq);
+static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
+							  Plan *planTree);
 static void OpenIntoRel(QueryDesc *queryDesc);
 static void CloseIntoRel(QueryDesc *queryDesc);
 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
@@ -155,7 +142,8 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 
 	/*
-	 * Fill in parameters, if any, from queryDesc
+	 * Fill in external parameters, if any, from queryDesc; and allocate
+	 * workspace for internal parameters
 	 */
 	estate->es_param_list_info = queryDesc->params;
 
@@ -648,6 +636,7 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 	 * initialize the node's execution state
 	 */
 	estate->es_range_table = rangeTable;
+	estate->es_plannedstmt = plannedstmt;
 
 	/*
 	 * initialize result relation stuff, and open/lock the result rels.
@@ -703,7 +692,7 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 	estate->es_rowMarks = NIL;
 	foreach(l, plannedstmt->rowMarks)
 	{
-		RowMarkClause *rc = (RowMarkClause *) lfirst(l);
+		PlanRowMark *rc = (PlanRowMark *) lfirst(l);
 		Oid			relid;
 		Relation	relation;
 		ExecRowMark *erm;
@@ -712,18 +701,36 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 		if (rc->isParent)
 			continue;
 
-		relid = getrelid(rc->rti, rangeTable);
-		relation = heap_open(relid, RowShareLock);
+		switch (rc->markType)
+		{
+			case ROW_MARK_EXCLUSIVE:
+			case ROW_MARK_SHARE:
+				relid = getrelid(rc->rti, rangeTable);
+				relation = heap_open(relid, RowShareLock);
+				break;
+			case ROW_MARK_REFERENCE:
+				relid = getrelid(rc->rti, rangeTable);
+				relation = heap_open(relid, AccessShareLock);
+				break;
+			case ROW_MARK_COPY:
+				/* there's no real table here ... */
+				relation = NULL;
+				break;
+			default:
+				elog(ERROR, "unrecognized markType: %d", rc->markType);
+				relation = NULL;	/* keep compiler quiet */
+				break;
+		}
+
 		erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
 		erm->relation = relation;
 		erm->rti = rc->rti;
 		erm->prti = rc->prti;
-		erm->rowmarkId = rc->rowmarkId;
-		erm->forUpdate = rc->forUpdate;
+		erm->markType = rc->markType;
 		erm->noWait = rc->noWait;
-		/* remaining fields are filled during LockRows plan node init */
-		erm->ctidAttNo = InvalidAttrNumber;
-		erm->toidAttNo = InvalidAttrNumber;
+		erm->ctidAttNo = rc->ctidAttNo;
+		erm->toidAttNo = rc->toidAttNo;
+		erm->wholeAttNo = rc->wholeAttNo;
 		ItemPointerSetInvalid(&(erm->curCtid));
 		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
 	}
@@ -747,10 +754,9 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 	estate->es_trig_tuple_slot = NULL;
 
 	/* mark EvalPlanQual not active */
-	estate->es_plannedstmt = plannedstmt;
-	estate->es_evalPlanQual = NULL;
-	estate->es_evTupleNull = NULL;
-	estate->es_evTuple = NULL;
+	estate->es_epqTuple = NULL;
+	estate->es_epqTupleSet = NULL;
+	estate->es_epqScanDone = NULL;
 
 	/*
 	 * Initialize private state information for each SubPlan.  We must do this
@@ -1076,12 +1082,6 @@ ExecEndPlan(PlanState *planstate, EState *estate)
 	int			i;
 	ListCell   *l;
 
-	/*
-	 * shut down any PlanQual processing we were doing
-	 */
-	if (estate->es_evalPlanQual != NULL)
-		EndEvalPlanQual(estate);
-
 	/*
 	 * shut down the node-type-specific query processing
 	 */
@@ -1133,9 +1133,10 @@ ExecEndPlan(PlanState *planstate, EState *estate)
 	 */
 	foreach(l, estate->es_rowMarks)
 	{
-		ExecRowMark *erm = lfirst(l);
+		ExecRowMark *erm = (ExecRowMark *) lfirst(l);
 
-		heap_close(erm->relation, NoLock);
+		if (erm->relation)
+			heap_close(erm->relation, NoLock);
 	}
 }
 
@@ -1330,15 +1331,23 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
 	}
 }
 
+
 /*
- * Check a modified tuple to see if we want to process its updated version
- * under READ COMMITTED rules.
+ * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
+ * process the updated version under READ COMMITTED rules.
  *
  * See backend/executor/README for some info about how this works.
+ */
+
+
+/*
+ * Check a modified tuple to see if we want to process its updated version
+ * under READ COMMITTED rules.
  *
- *	estate - executor state data
+ *	estate - outer executor state data
+ *	epqstate - state for EvalPlanQual rechecking
+ *	relation - table containing tuple
  *	rti - rangetable index of table containing tuple
- *	subplanstate - portion of plan tree that needs to be re-evaluated
  *	*tid - t_ctid from the outdated tuple (ie, next updated version)
  *	priorXmax - t_xmax from the outdated tuple
  *
@@ -1349,19 +1358,20 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
  * NULL if we determine we shouldn't process the row.
  */
 TupleTableSlot *
-EvalPlanQual(EState *estate, Index rti,
-			 PlanState *subplanstate,
+EvalPlanQual(EState *estate, EPQState *epqstate,
+			 Relation relation, Index rti,
 			 ItemPointer tid, TransactionId priorXmax)
 {
 	TupleTableSlot *slot;
 	HeapTuple	copyTuple;
 
-	Assert(rti != 0);
+	Assert(rti > 0);
 
 	/*
-	 * Get the updated version of the row; if fail, return NULL.
+	 * Get and lock the updated version of the row; if fail, return NULL.
 	 */
-	copyTuple = EvalPlanQualFetch(estate, rti, tid, priorXmax);
+	copyTuple = EvalPlanQualFetch(estate, relation, LockTupleExclusive,
+								  tid, priorXmax);
 
 	if (copyTuple == NULL)
 		return NULL;
@@ -1373,52 +1383,32 @@ EvalPlanQual(EState *estate, Index rti,
 	*tid = copyTuple->t_self;
 
 	/*
-	 * Need to run a recheck subquery.	Find or create a PQ stack entry.
+	 * Need to run a recheck subquery.	Initialize or reinitialize EPQ state.
 	 */
-	EvalPlanQualPush(estate, rti, subplanstate);
+	EvalPlanQualBegin(epqstate, estate);
 
 	/*
-	 * free old RTE' tuple, if any, and store target tuple where relation's
+	 * Free old test tuple, if any, and store new tuple where relation's
 	 * scan node will see it
 	 */
-	EvalPlanQualSetTuple(estate, rti, copyTuple);
+	EvalPlanQualSetTuple(epqstate, rti, copyTuple);
 
 	/*
-	 * Run the EPQ query, but just for one tuple.
+	 * Fetch any non-locked source rows
 	 */
-	slot = EvalPlanQualNext(estate);
+	EvalPlanQualFetchRowMarks(epqstate);
 
 	/*
-	 * If we got a result, we must copy it out of the EPQ query's local
-	 * context before we shut down the EPQ query.
+	 * Run the EPQ query.  We assume it will return at most one tuple.
 	 */
-	if (TupIsNull(slot))
-		slot = NULL;			/* in case we got back an empty slot */
-	else
-	{
-		TupleDesc tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
-		evalPlanQual *epq = estate->es_evalPlanQual;
-
-		if (epq->resultslot == NULL)
-		{
-			epq->resultslot = ExecInitExtraTupleSlot(estate);
-			ExecSetSlotDescriptor(epq->resultslot, tupdesc);
-		}
-		else
-		{
-			TupleDesc oldtupdesc = epq->resultslot->tts_tupleDescriptor;
-
-			ExecSetSlotDescriptor(epq->resultslot, tupdesc);
-			FreeTupleDesc(oldtupdesc);
-		}
-
-		slot = ExecCopySlot(epq->resultslot, slot);
-	}
+	slot = EvalPlanQualNext(epqstate);
 
 	/*
-	 * Shut it down ...
+	 * Clear out the test tuple.  This is needed in case the EPQ query
+	 * is re-used to test a tuple for a different relation.  (Not clear
+	 * that can really happen, but let's be safe.)
 	 */
-	EvalPlanQualPop(estate, subplanstate);
+	EvalPlanQualSetTuple(epqstate, rti, NULL);
 
 	return slot;
 }
@@ -1427,55 +1417,29 @@ EvalPlanQual(EState *estate, Index rti,
  * Fetch a copy of the newest version of an outdated tuple
  *
  *	estate - executor state data
- *	rti - rangetable index of table containing tuple
+ *	relation - table containing tuple
+ *	lockmode - requested tuple lock mode
  *	*tid - t_ctid from the outdated tuple (ie, next updated version)
  *	priorXmax - t_xmax from the outdated tuple
  *
  * Returns a palloc'd copy of the newest tuple version, or NULL if we find
  * that there is no newest version (ie, the row was deleted not updated).
+ * If successful, we have locked the newest tuple version, so caller does not
+ * need to worry about it changing anymore.
  *
- * XXX this does not lock the new row version ... wouldn't it be better if
- * it did?  As-is, caller might have to repeat all its work.
+ * Note: properly, lockmode should be declared as enum LockTupleMode,
+ * but we use "int" to avoid having to include heapam.h in executor.h.
  */
 HeapTuple
-EvalPlanQualFetch(EState *estate, Index rti,
+EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 				  ItemPointer tid, TransactionId priorXmax)
 {
 	HeapTuple	copyTuple = NULL;
-	Relation	relation;
 	HeapTupleData tuple;
 	SnapshotData SnapshotDirty;
 
-	Assert(rti != 0);
-
 	/*
-	 * Find relation containing target tuple --- must be either a result
-	 * relation of the query, or a SELECT FOR UPDATE target
-	 */
-	if (estate->es_result_relation_info != NULL &&
-		estate->es_result_relation_info->ri_RangeTableIndex == rti)
-		relation = estate->es_result_relation_info->ri_RelationDesc;
-	else
-	{
-		ListCell   *l;
-
-		relation = NULL;
-		foreach(l, estate->es_rowMarks)
-		{
-			ExecRowMark *erm = lfirst(l);
-
-			if (erm->rti == rti)
-			{
-				relation = erm->relation;
-				break;
-			}
-		}
-		if (relation == NULL)
-			elog(ERROR, "could not find RowMark for RT index %u", rti);
-	}
-
-	/*
-	 * fetch tid tuple
+	 * fetch target tuple
 	 *
 	 * Loop here to deal with updated or busy tuples
 	 */
@@ -1487,6 +1451,10 @@ EvalPlanQualFetch(EState *estate, Index rti,
 
 		if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
 		{
+			HTSU_Result test;
+			ItemPointerData update_ctid;
+			TransactionId update_xmax;
+
 			/*
 			 * If xmin isn't what we're expecting, the slot must have been
 			 * recycled and reused for an unrelated tuple.	This implies that
@@ -1535,6 +1503,49 @@ EvalPlanQualFetch(EState *estate, Index rti,
 				return NULL;
 			}
 
+			/*
+			 * This is a live tuple, so now try to lock it.
+			 */
+			test = heap_lock_tuple(relation, &tuple, &buffer,
+								   &update_ctid, &update_xmax,
+								   estate->es_output_cid,
+								   lockmode, false);
+			/* We now have two pins on the buffer, get rid of one */
+			ReleaseBuffer(buffer);
+
+			switch (test)
+			{
+				case HeapTupleSelfUpdated:
+					/* treat it as deleted; do not process */
+					ReleaseBuffer(buffer);
+					return NULL;
+
+				case HeapTupleMayBeUpdated:
+					/* successfully locked */
+					break;
+
+				case HeapTupleUpdated:
+					ReleaseBuffer(buffer);
+					if (IsXactIsoLevelSerializable)
+						ereport(ERROR,
+								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+								 errmsg("could not serialize access due to concurrent update")));
+					if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
+					{
+						/* it was updated, so look at the updated version */
+						tuple.t_self = update_ctid;
+						continue;
+					}
+					/* tuple was deleted, so give up */
+					return NULL;
+
+				default:
+					ReleaseBuffer(buffer);
+					elog(ERROR, "unrecognized heap_lock_tuple status: %u",
+						 test);
+					return NULL;	/* keep compiler quiet */
+			}
+
 			/*
 			 * We got tuple - now copy it for use by recheck query.
 			 */
@@ -1570,7 +1581,7 @@ EvalPlanQualFetch(EState *estate, Index rti,
 		 * mean that the row was updated or deleted by either a committed xact
 		 * or our own xact.  If it was deleted, we can ignore it; if it was
 		 * updated then chain up to the next version and repeat the whole
-		 * test.
+		 * process.
 		 *
 		 * As above, it should be safe to examine xmax and t_ctid without the
 		 * buffer content lock, because they can't be changing.
@@ -1597,294 +1608,334 @@ EvalPlanQualFetch(EState *estate, Index rti,
 }
 
 /*
- * Push a new level of EPQ state, and prepare to execute the given subplan
+ * EvalPlanQualInit -- initialize during creation of a plan state node
+ * that might need to invoke EPQ processing.
+ * Note: subplan can be NULL if it will be set later with EvalPlanQualSetPlan.
  */
 void
-EvalPlanQualPush(EState *estate, Index rti, PlanState *subplanstate)
+EvalPlanQualInit(EPQState *epqstate, EState *estate,
+				 Plan *subplan, int epqParam)
 {
-	evalPlanQual *epq;
-	bool		endNode;
+	/* Mark the EPQ state inactive */
+	epqstate->estate = NULL;
+	epqstate->planstate = NULL;
+	epqstate->origslot = NULL;
+	/* ... and remember data that EvalPlanQualBegin will need */
+	epqstate->plan = subplan;
+	epqstate->rowMarks = NIL;
+	epqstate->epqParam = epqParam;
+}
 
-	Assert(rti != 0);
+/*
+ * EvalPlanQualSetPlan -- set or change subplan of an EPQState.
+ *
+ * We need this so that ModifyTuple can deal with multiple subplans.
+ */
+void
+EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan)
+{
+	/* If we have a live EPQ query, shut it down */
+	EvalPlanQualEnd(epqstate);
+	/* And set/change the plan pointer */
+	epqstate->plan = subplan;
+}
 
-	epq = estate->es_evalPlanQual;
-	endNode = true;
+/*
+ * EvalPlanQualAddRowMark -- add an ExecRowMark that EPQ needs to handle.
+ *
+ * Currently, only non-locking RowMarks are supported.
+ */
+void
+EvalPlanQualAddRowMark(EPQState *epqstate, ExecRowMark *erm)
+{
+	if (RowMarkRequiresRowShareLock(erm->markType))
+		elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
+	epqstate->rowMarks = lappend(epqstate->rowMarks, erm);
+}
 
-	if (epq != NULL && epq->rti == 0)
-	{
-		/* Top PQ stack entry is idle, so re-use it */
-		Assert(epq->next == NULL);
-		epq->rti = rti;
-		endNode = false;
-	}
+/*
+ * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
+ *
+ * NB: passed tuple must be palloc'd; it may get freed later
+ */
+void
+EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
+{
+	EState	   *estate = epqstate->estate;
 
-	/*
-	 * If this is request for another RTE - Ra, - then we have to check wasn't
-	 * PlanQual requested for Ra already and if so then Ra' row was updated
-	 * again and we have to re-start old execution for Ra and forget all what
-	 * we done after Ra was suspended. Cool? -:))
-	 */
-	if (epq != NULL && epq->rti != rti &&
-		epq->estate->es_evTuple[rti - 1] != NULL)
-	{
-		do
-		{
-			evalPlanQual *oldepq;
-
-			/* stop execution */
-			EvalPlanQualStop(epq);
-			/* pop previous PlanQual from the stack */
-			oldepq = epq->next;
-			Assert(oldepq && oldepq->rti != 0);
-			/* push current PQ to freePQ stack */
-			oldepq->free = epq;
-			epq = oldepq;
-			estate->es_evalPlanQual = epq;
-		} while (epq->rti != rti);
-	}
+	Assert(rti > 0);
 
 	/*
-	 * If we are requested for another RTE then we have to suspend execution
-	 * of current PlanQual and start execution for new one.
+	 * free old test tuple, if any, and store new tuple where relation's
+	 * scan node will see it
 	 */
-	if (epq == NULL || epq->rti != rti)
-	{
-		/* try to reuse plan used previously */
-		evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
-
-		if (newepq == NULL)		/* first call or freePQ stack is empty */
-		{
-			newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
-			newepq->free = NULL;
-			newepq->estate = NULL;
-			newepq->planstate = NULL;
-			newepq->origplanstate = NULL;
-			newepq->resultslot = NULL;
-		}
-		else
-		{
-			/* recycle previously used PlanQual */
-			Assert(newepq->estate == NULL);
-			epq->free = NULL;
-		}
-		/* push current PQ to the stack */
-		newepq->next = epq;
-		epq = newepq;
-		estate->es_evalPlanQual = epq;
-		epq->rti = rti;
-		endNode = false;
-	}
+	if (estate->es_epqTuple[rti - 1] != NULL)
+		heap_freetuple(estate->es_epqTuple[rti - 1]);
+	estate->es_epqTuple[rti - 1] = tuple;
+	estate->es_epqTupleSet[rti - 1] = true;
+}
 
-	Assert(epq->rti == rti);
-	Assert(estate->es_evalPlanQual == epq);
+/*
+ * Fetch back the current test tuple (if any) for the specified RTI
+ */
+HeapTuple
+EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
+{
+	EState	   *estate = epqstate->estate;
 
-	/*
-	 * Ok - we're requested for the same RTE.  Unfortunately we still have to
-	 * end and restart execution of the plan, because ExecReScan wouldn't
-	 * ensure that upper plan nodes would reset themselves.  We could make
-	 * that work if insertion of the target tuple were integrated with the
-	 * Param mechanism somehow, so that the upper plan nodes know that their
-	 * children's outputs have changed.
-	 *
-	 * Note that the stack of free evalPlanQual nodes is quite useless at the
-	 * moment, since it only saves us from pallocing/releasing the
-	 * evalPlanQual nodes themselves.  But it will be useful once we implement
-	 * ReScan instead of end/restart for re-using PlanQual nodes.
-	 */
-	if (endNode)
-	{
-		/* stop execution */
-		EvalPlanQualStop(epq);
-	}
+	Assert(rti > 0);
 
-	/*
-	 * Initialize new recheck query.
-	 *
-	 * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
-	 * instead copy down changeable state from the top plan (including
-	 * es_result_relation_info) and reset locally changeable
-	 * state in the epq (including es_param_exec_vals, es_evTupleNull).
-	 */
-	epq->origplanstate = subplanstate;
-	EvalPlanQualStart(epq, estate, subplanstate->plan, epq->next);
+	return estate->es_epqTuple[rti - 1];
 }
 
 /*
- * Install one test tuple into current EPQ level
+ * Fetch the current row values for any non-locked relations that need
+ * to be scanned by an EvalPlanQual operation.  origslot must have been set
+ * to contain the current result row (top-level row) that we need to recheck.
  */
 void
-EvalPlanQualSetTuple(EState *estate, Index rti, HeapTuple tuple)
+EvalPlanQualFetchRowMarks(EPQState *epqstate)
 {
-	evalPlanQual *epq = estate->es_evalPlanQual;
-	EState	   *epqstate;
+	ListCell   *l;
 
-	Assert(rti != 0);
+	Assert(epqstate->origslot != NULL);
 
-	/*
-	 * free old RTE' tuple, if any, and store target tuple where relation's
-	 * scan node will see it
-	 */
-	epqstate = epq->estate;
-	if (epqstate->es_evTuple[rti - 1] != NULL)
-		heap_freetuple(epqstate->es_evTuple[rti - 1]);
-	epqstate->es_evTuple[rti - 1] = tuple;
+	foreach(l, epqstate->rowMarks)
+	{
+		ExecRowMark *erm = (ExecRowMark *) lfirst(l);
+		Datum		datum;
+		bool		isNull;
+		HeapTupleData tuple;
+
+		/* clear any leftover test tuple for this rel */
+		EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
+
+		if (erm->relation)
+		{
+			Buffer		buffer;
+
+			Assert(erm->markType == ROW_MARK_REFERENCE);
+
+			/* if child rel, must check whether it produced this row */
+			if (erm->rti != erm->prti)
+			{
+				Oid			tableoid;
+
+				datum = ExecGetJunkAttribute(epqstate->origslot,
+											 erm->toidAttNo,
+											 &isNull);
+				/* non-locked rels could be on the inside of outer joins */
+				if (isNull)
+					continue;
+				tableoid = DatumGetObjectId(datum);
+
+				if (tableoid != RelationGetRelid(erm->relation))
+				{
+					/* this child is inactive right now */
+					continue;
+				}
+			}
+
+			/* fetch the tuple's ctid */
+			datum = ExecGetJunkAttribute(epqstate->origslot,
+										 erm->ctidAttNo,
+										 &isNull);
+			/* non-locked rels could be on the inside of outer joins */
+			if (isNull)
+				continue;
+			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
+
+			/* okay, fetch the tuple */
+			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
+							false, NULL))
+				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
+
+			/* successful, copy and store tuple */
+			EvalPlanQualSetTuple(epqstate, erm->rti,
+								 heap_copytuple(&tuple));
+			ReleaseBuffer(buffer);
+		}
+		else
+		{
+			HeapTupleHeader td;
+
+			Assert(erm->markType == ROW_MARK_COPY);
+
+			/* fetch the whole-row Var for the relation */
+			datum = ExecGetJunkAttribute(epqstate->origslot,
+										 erm->wholeAttNo,
+										 &isNull);
+			/* non-locked rels could be on the inside of outer joins */
+			if (isNull)
+				continue;
+			td = DatumGetHeapTupleHeader(datum);
+
+			/* build a temporary HeapTuple control structure */
+			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
+			ItemPointerSetInvalid(&(tuple.t_self));
+			tuple.t_tableOid = InvalidOid;
+			tuple.t_data = td;
+
+			/* copy and store tuple */
+			EvalPlanQualSetTuple(epqstate, erm->rti,
+								 heap_copytuple(&tuple));
+		}
+	}
 }
 
 /*
  * Fetch the next row (if any) from EvalPlanQual testing
+ *
+ * (In practice, there should never be more than one row...)
  */
 TupleTableSlot *
-EvalPlanQualNext(EState *estate)
+EvalPlanQualNext(EPQState *epqstate)
 {
-	evalPlanQual *epq = estate->es_evalPlanQual;
 	MemoryContext oldcontext;
 	TupleTableSlot *slot;
 
-	Assert(epq->rti != 0);
-
-	oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
-	slot = ExecProcNode(epq->planstate);
+	oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
+	slot = ExecProcNode(epqstate->planstate);
 	MemoryContextSwitchTo(oldcontext);
 
 	return slot;
 }
 
 /*
- * Shut down and pop the specified level of EvalPlanQual machinery,
- * plus any levels nested within it
+ * Initialize or reset an EvalPlanQual state tree
  */
 void
-EvalPlanQualPop(EState *estate, PlanState *subplanstate)
+EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
 {
-	evalPlanQual *epq = estate->es_evalPlanQual;
+	EState	   *estate = epqstate->estate;
 
-	for (;;)
+	if (estate == NULL)
 	{
-		PlanState *epqplanstate = epq->origplanstate;
-		evalPlanQual *oldepq;
-
-		Assert(epq->rti != 0);
-
-		/* stop execution */
-		EvalPlanQualStop(epq);
-		epq->origplanstate = NULL;
-		/* pop old PQ from the stack */
-		oldepq = epq->next;
-		if (oldepq == NULL)
-		{
-			/* this is the first (oldest) PQ - mark as free */
-			epq->rti = 0;
-			break;
-		}
-		Assert(oldepq->rti != 0);
-		/* push current PQ to freePQ stack */
-		oldepq->free = epq;
-		epq = oldepq;
-		estate->es_evalPlanQual = epq;
-		if (epqplanstate == subplanstate)
-			break;
+		/* First time through, so create a child EState */
+		EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
 	}
-}
-
-static void
-EndEvalPlanQual(EState *estate)
-{
-	evalPlanQual *epq = estate->es_evalPlanQual;
-
-	if (epq->rti == 0)			/* plans already shutdowned */
+	else
 	{
-		Assert(epq->next == NULL);
-		return;
-	}
+		/*
+		 * We already have a suitable child EPQ tree, so just reset it.
+		 */
+		int			rtsize = list_length(parentestate->es_range_table);
+		PlanState  *planstate = epqstate->planstate;
 
-	for (;;)
-	{
-		evalPlanQual *oldepq;
-
-		/* stop execution */
-		EvalPlanQualStop(epq);
-		epq->origplanstate = NULL;
-		/* pop old PQ from the stack */
-		oldepq = epq->next;
-		if (oldepq == NULL)
+		MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
+
+		/* Recopy current values of parent parameters */
+		if (parentestate->es_plannedstmt->nParamExec > 0)
 		{
-			/* this is the first (oldest) PQ - mark as free */
-			epq->rti = 0;
-			break;
+			int		i = parentestate->es_plannedstmt->nParamExec;
+
+			while (--i >= 0)
+			{
+				/* copy value if any, but not execPlan link */
+				estate->es_param_exec_vals[i].value =
+					parentestate->es_param_exec_vals[i].value;
+				estate->es_param_exec_vals[i].isnull =
+					parentestate->es_param_exec_vals[i].isnull;
+			}
 		}
-		Assert(oldepq->rti != 0);
-		/* push current PQ to freePQ stack */
-		oldepq->free = epq;
-		epq = oldepq;
-		estate->es_evalPlanQual = epq;
+
+		/*
+		 * Mark child plan tree as needing rescan at all scan nodes.  The
+		 * first ExecProcNode will take care of actually doing the rescan.
+		 */
+		planstate->chgParam = bms_add_member(planstate->chgParam,
+											 epqstate->epqParam);
 	}
 }
 
 /*
- * Start execution of one level of PlanQual.
+ * Start execution of an EvalPlanQual plan tree.
  *
  * This is a cut-down version of ExecutorStart(): we copy some state from
  * the top-level estate rather than initializing it fresh.
  */
 static void
-EvalPlanQualStart(evalPlanQual *epq, EState *estate, Plan *planTree,
-				  evalPlanQual *priorepq)
+EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
 {
-	EState	   *epqstate;
+	EState	   *estate;
 	int			rtsize;
 	MemoryContext oldcontext;
 	ListCell   *l;
 
-	rtsize = list_length(estate->es_range_table);
+	rtsize = list_length(parentestate->es_range_table);
 
-	epq->estate = epqstate = CreateExecutorState();
+	epqstate->estate = estate = CreateExecutorState();
 
-	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
+	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 
 	/*
-	 * The epqstates share the top query's copy of unchanging state such as
+	 * Child EPQ EStates share the parent's copy of unchanging state such as
 	 * the snapshot, rangetable, result-rel info, and external Param info.
 	 * They need their own copies of local state, including a tuple table,
 	 * es_param_exec_vals, etc.
 	 */
-	epqstate->es_direction = ForwardScanDirection;
-	epqstate->es_snapshot = estate->es_snapshot;
-	epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
-	epqstate->es_range_table = estate->es_range_table;
-	epqstate->es_junkFilter = estate->es_junkFilter;
-	epqstate->es_output_cid = estate->es_output_cid;
-	epqstate->es_result_relations = estate->es_result_relations;
-	epqstate->es_num_result_relations = estate->es_num_result_relations;
-	epqstate->es_result_relation_info = estate->es_result_relation_info;
+	estate->es_direction = ForwardScanDirection;
+	estate->es_snapshot = parentestate->es_snapshot;
+	estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
+	estate->es_range_table = parentestate->es_range_table;
+	estate->es_plannedstmt = parentestate->es_plannedstmt;
+	estate->es_junkFilter = parentestate->es_junkFilter;
+	estate->es_output_cid = parentestate->es_output_cid;
+	estate->es_result_relations = parentestate->es_result_relations;
+	estate->es_num_result_relations = parentestate->es_num_result_relations;
+	estate->es_result_relation_info = parentestate->es_result_relation_info;
 	/* es_trig_target_relations must NOT be copied */
-	epqstate->es_param_list_info = estate->es_param_list_info;
-	if (estate->es_plannedstmt->nParamExec > 0)
-		epqstate->es_param_exec_vals = (ParamExecData *)
-			palloc0(estate->es_plannedstmt->nParamExec * sizeof(ParamExecData));
-	epqstate->es_rowMarks = estate->es_rowMarks;
-	epqstate->es_instrument = estate->es_instrument;
-	epqstate->es_select_into = estate->es_select_into;
-	epqstate->es_into_oids = estate->es_into_oids;
-	epqstate->es_plannedstmt = estate->es_plannedstmt;
-
-	/*
-	 * Each epqstate must have its own es_evTupleNull state, but all the stack
-	 * entries share es_evTuple state.	This allows sub-rechecks to inherit
-	 * the value being examined by an outer recheck.
-	 */
-	epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
-	if (priorepq == NULL)
-		/* first PQ stack entry */
-		epqstate->es_evTuple = (HeapTuple *)
-			palloc0(rtsize * sizeof(HeapTuple));
+	estate->es_rowMarks = parentestate->es_rowMarks;
+	estate->es_instrument = parentestate->es_instrument;
+	estate->es_select_into = parentestate->es_select_into;
+	estate->es_into_oids = parentestate->es_into_oids;
+
+	/*
+	 * The external param list is simply shared from parent.  The internal
+	 * param workspace has to be local state, but we copy the initial values
+	 * from the parent, so as to have access to any param values that were
+	 * already set from other parts of the parent's plan tree.
+	 */
+	estate->es_param_list_info = parentestate->es_param_list_info;
+	if (parentestate->es_plannedstmt->nParamExec > 0)
+	{
+		int		i = parentestate->es_plannedstmt->nParamExec;
+
+		estate->es_param_exec_vals = (ParamExecData *)
+			palloc0(i * sizeof(ParamExecData));
+		while (--i >= 0)
+		{
+			/* copy value if any, but not execPlan link */
+			estate->es_param_exec_vals[i].value =
+				parentestate->es_param_exec_vals[i].value;
+			estate->es_param_exec_vals[i].isnull =
+				parentestate->es_param_exec_vals[i].isnull;
+		}
+	}
+
+	/*
+	 * Each EState must have its own es_epqScanDone state, but if we have
+	 * nested EPQ checks they should share es_epqTuple arrays.  This allows
+	 * sub-rechecks to inherit the values being examined by an outer recheck.
+	 */
+	estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
+	if (parentestate->es_epqTuple != NULL)
+	{
+		estate->es_epqTuple = parentestate->es_epqTuple;
+		estate->es_epqTupleSet = parentestate->es_epqTupleSet;
+	}
 	else
-		/* later stack entries share the same storage */
-		epqstate->es_evTuple = priorepq->estate->es_evTuple;
+	{
+		estate->es_epqTuple = (HeapTuple *)
+			palloc0(rtsize * sizeof(HeapTuple));
+		estate->es_epqTupleSet = (bool *)
+			palloc0(rtsize * sizeof(bool));
+	}
 
 	/*
-	 * Each epqstate also has its own tuple table.
+	 * Each estate also has its own tuple table.
 	 */
-	epqstate->es_tupleTable = NIL;
+	estate->es_tupleTable = NIL;
 
 	/*
 	 * Initialize private state information for each SubPlan.  We must do this
@@ -1894,16 +1945,16 @@ EvalPlanQualStart(evalPlanQual *epq, EState *estate, Plan *planTree,
 	 * we intend to run, but since it's not easy to tell which, we just
 	 * initialize them all.
 	 */
-	Assert(epqstate->es_subplanstates == NIL);
-	foreach(l, estate->es_plannedstmt->subplans)
+	Assert(estate->es_subplanstates == NIL);
+	foreach(l, parentestate->es_plannedstmt->subplans)
 	{
 		Plan	   *subplan = (Plan *) lfirst(l);
 		PlanState  *subplanstate;
 
-		subplanstate = ExecInitNode(subplan, epqstate, 0);
+		subplanstate = ExecInitNode(subplan, estate, 0);
 
-		epqstate->es_subplanstates = lappend(epqstate->es_subplanstates,
-											 subplanstate);
+		estate->es_subplanstates = lappend(estate->es_subplanstates,
+										   subplanstate);
 	}
 
 	/*
@@ -1911,48 +1962,47 @@ EvalPlanQualStart(evalPlanQual *epq, EState *estate, Plan *planTree,
 	 * part of the plan tree we need to run.  This opens files, allocates
 	 * storage and leaves us ready to start processing tuples.
 	 */
-	epq->planstate = ExecInitNode(planTree, epqstate, 0);
+	epqstate->planstate = ExecInitNode(planTree, estate, 0);
 
 	MemoryContextSwitchTo(oldcontext);
 }
 
 /*
- * End execution of one level of PlanQual.
+ * EvalPlanQualEnd -- shut down at termination of parent plan state node,
+ * or if we are done with the current EPQ child.
  *
  * This is a cut-down version of ExecutorEnd(); basically we want to do most
  * of the normal cleanup, but *not* close result relations (which we are
  * just sharing from the outer query).	We do, however, have to close any
  * trigger target relations that got opened, since those are not shared.
+ * (There probably shouldn't be any of the latter, but just in case...)
  */
-static void
-EvalPlanQualStop(evalPlanQual *epq)
+void
+EvalPlanQualEnd(EPQState *epqstate)
 {
-	EState	   *epqstate = epq->estate;
+	EState	   *estate = epqstate->estate;
 	MemoryContext oldcontext;
 	ListCell   *l;
 
-	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
+	if (estate == NULL)
+		return;					/* idle, so nothing to do */
 
-	ExecEndNode(epq->planstate);
+	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+	ExecEndNode(epqstate->planstate);
 
-	foreach(l, epqstate->es_subplanstates)
+	foreach(l, estate->es_subplanstates)
 	{
 		PlanState  *subplanstate = (PlanState *) lfirst(l);
 
 		ExecEndNode(subplanstate);
 	}
 
-	/* throw away the per-epqstate tuple table completely */
-	ExecResetTupleTable(epqstate->es_tupleTable, true);
-	epqstate->es_tupleTable = NIL;
-
-	if (epqstate->es_evTuple[epq->rti - 1] != NULL)
-	{
-		heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
-		epqstate->es_evTuple[epq->rti - 1] = NULL;
-	}
+	/* throw away the per-estate tuple table */
+	ExecResetTupleTable(estate->es_tupleTable, false);
 
-	foreach(l, epqstate->es_trig_target_relations)
+	/* close any trigger target relations attached to this EState */
+	foreach(l, estate->es_trig_target_relations)
 	{
 		ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
 
@@ -1963,10 +2013,12 @@ EvalPlanQualStop(evalPlanQual *epq)
 
 	MemoryContextSwitchTo(oldcontext);
 
-	FreeExecutorState(epqstate);
+	FreeExecutorState(estate);
 
-	epq->estate = NULL;
-	epq->planstate = NULL;
+	/* Mark EPQState idle */
+	epqstate->estate = NULL;
+	epqstate->planstate = NULL;
+	epqstate->origslot = NULL;
 }
 
 
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index b9b67da26f0..fdfbd999f4f 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.252 2009/10/08 22:34:57 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.253 2009/10/26 02:26:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -660,7 +660,7 @@ ExecEvalVar(ExprState *exprstate, ExprContext *econtext,
 			exprstate->evalfunc = ExecEvalWholeRowVar;
 
 		/* Fetch the value */
-		return ExecEvalWholeRowVar(exprstate, econtext, isNull, isDone);
+		return (*exprstate->evalfunc) (exprstate, econtext, isNull, isDone);
 	}
 }
 
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index 32386accbbd..f7733569ef9 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -12,7 +12,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/execScan.c,v 1.46 2009/04/02 20:59:10 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/execScan.c,v 1.47 2009/10/26 02:26:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,6 +26,62 @@
 static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, TupleDesc tupdesc);
 
 
+/*
+ * ExecScanFetch -- fetch next potential tuple
+ *
+ * This routine is concerned with substituting a test tuple if we are
+ * inside an EvalPlanQual recheck.  If we aren't, just execute
+ * the access method's next-tuple routine.
+ */
+static inline TupleTableSlot *
+ExecScanFetch(ScanState *node,
+			  ExecScanAccessMtd accessMtd,
+			  ExecScanRecheckMtd recheckMtd)
+{
+	EState	   *estate = node->ps.state;
+
+	if (estate->es_epqTuple != NULL)
+	{
+		/*
+		 * We are inside an EvalPlanQual recheck.  Return the test tuple if
+		 * one is available, after rechecking any access-method-specific
+		 * conditions.
+		 */
+		Index		scanrelid = ((Scan *) node->ps.plan)->scanrelid;
+
+		Assert(scanrelid > 0);
+		if (estate->es_epqTupleSet[scanrelid - 1])
+		{
+			TupleTableSlot *slot = node->ss_ScanTupleSlot;
+
+			/* Return empty slot if we already returned a tuple */
+			if (estate->es_epqScanDone[scanrelid - 1])
+				return ExecClearTuple(slot);
+			/* Else mark to remember that we shouldn't return more */
+			estate->es_epqScanDone[scanrelid - 1] = true;
+
+			/* Return empty slot if we haven't got a test tuple */
+			if (estate->es_epqTuple[scanrelid - 1] == NULL)
+				return ExecClearTuple(slot);
+
+			/* Store test tuple in the plan node's scan slot */
+			ExecStoreTuple(estate->es_epqTuple[scanrelid - 1],
+						   slot, InvalidBuffer, false);
+
+			/* Check if it meets the access-method conditions */
+			if (!(*recheckMtd) (node, slot))
+				ExecClearTuple(slot);	/* would not be returned by scan */
+
+			return slot;
+		}
+	}
+
+	/*
+	 * Run the node-type-specific access method function to get the next tuple
+	 */
+	return (*accessMtd) (node);
+}
+
 /* ----------------------------------------------------------------
  *		ExecScan
  *
@@ -35,6 +91,10 @@ static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, Tuple
  *		The access method returns the next tuple and execScan() is
  *		responsible for checking the tuple returned against the qual-clause.
  *
+ *		A 'recheck method' must also be provided that can check an
+ *		arbitrary tuple of the relation against any qual conditions
+ *		that are implemented internal to the access method.
+ *
  *		Conditions:
  *		  -- the "cursor" maintained by the AMI is positioned at the tuple
  *			 returned previously.
@@ -46,7 +106,8 @@ static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, Tuple
  */
 TupleTableSlot *
 ExecScan(ScanState *node,
-		 ExecScanAccessMtd accessMtd)	/* function returning a tuple */
+		 ExecScanAccessMtd accessMtd,	/* function returning a tuple */
+		 ExecScanRecheckMtd recheckMtd)
 {
 	ExprContext *econtext;
 	List	   *qual;
@@ -65,7 +126,7 @@ ExecScan(ScanState *node,
 	 * all the overhead and return the raw scan tuple.
 	 */
 	if (!qual && !projInfo)
-		return (*accessMtd) (node);
+		return ExecScanFetch(node, accessMtd, recheckMtd);
 
 	/*
 	 * Check to see if we're still projecting out tuples from a previous scan
@@ -91,7 +152,7 @@ ExecScan(ScanState *node,
 	ResetExprContext(econtext);
 
 	/*
-	 * get a tuple from the access method loop until we obtain a tuple which
+	 * get a tuple from the access method.  Loop until we obtain a tuple that
 	 * passes the qualification.
 	 */
 	for (;;)
@@ -100,7 +161,7 @@ ExecScan(ScanState *node,
 
 		CHECK_FOR_INTERRUPTS();
 
-		slot = (*accessMtd) (node);
+		slot = ExecScanFetch(node, accessMtd, recheckMtd);
 
 		/*
 		 * if the slot returned by the accessMtd contains NULL, then it means
@@ -249,3 +310,28 @@ tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, TupleDesc tupdesc
 
 	return true;
 }
+
+/*
+ * ExecScanReScan
+ *
+ * This must be called within the ReScan function of any plan node type
+ * that uses ExecScan().
+ */
+void
+ExecScanReScan(ScanState *node)
+{
+	EState	   *estate = node->ps.state;
+
+	/* Stop projecting any tuples from SRFs in the targetlist */
+	node->ps.ps_TupFromTlist = false;
+
+	/* Rescan EvalPlanQual tuple if we're inside an EvalPlanQual recheck */
+	if (estate->es_epqScanDone != NULL)
+	{
+		Index		scanrelid = ((Scan *) node->ps.plan)->scanrelid;
+
+		Assert(scanrelid > 0);
+
+		estate->es_epqScanDone[scanrelid - 1] = false;
+	}
+}
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 4afce5b9526..d3352f1f5d4 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.164 2009/10/12 18:10:41 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.165 2009/10/26 02:26:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -105,6 +105,7 @@ CreateExecutorState(void)
 	estate->es_snapshot = SnapshotNow;
 	estate->es_crosscheck_snapshot = InvalidSnapshot;	/* no crosscheck */
 	estate->es_range_table = NIL;
+	estate->es_plannedstmt = NULL;
 
 	estate->es_junkFilter = NULL;
 
@@ -139,10 +140,9 @@ CreateExecutorState(void)
 
 	estate->es_per_tuple_exprcontext = NULL;
 
-	estate->es_plannedstmt = NULL;
-	estate->es_evalPlanQual = NULL;
-	estate->es_evTupleNull = NULL;
-	estate->es_evTuple = NULL;
+	estate->es_epqTuple = NULL;
+	estate->es_epqTupleSet = NULL;
+	estate->es_epqScanDone = NULL;
 
 	/*
 	 * Return the executor state structure
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 6adc7d66ee9..98d9219e478 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -21,7 +21,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.36 2009/09/27 21:10:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.37 2009/10/26 02:26:30 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -60,10 +60,8 @@ static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
 static TupleTableSlot *
 BitmapHeapNext(BitmapHeapScanState *node)
 {
-	EState	   *estate;
 	ExprContext *econtext;
 	HeapScanDesc scan;
-	Index		scanrelid;
 	TIDBitmap  *tbm;
 	TBMIterator *tbmiterator;
 	TBMIterateResult *tbmres;
@@ -74,45 +72,14 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	/*
 	 * extract necessary information from index scan node
 	 */
-	estate = node->ss.ps.state;
 	econtext = node->ss.ps.ps_ExprContext;
 	slot = node->ss.ss_ScanTupleSlot;
 	scan = node->ss.ss_currentScanDesc;
-	scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid;
 	tbm = node->tbm;
 	tbmiterator = node->tbmiterator;
 	tbmres = node->tbmres;
 	prefetch_iterator = node->prefetch_iterator;
 
-	/*
-	 * Check if we are evaluating PlanQual for tuple of this relation.
-	 * Additional checking is not good, but no other way for now. We could
-	 * introduce new nodes for this case and handle IndexScan --> NewNode
-	 * switching in Init/ReScan plan...
-	 */
-	if (estate->es_evTuple != NULL &&
-		estate->es_evTuple[scanrelid - 1] != NULL)
-	{
-		if (estate->es_evTupleNull[scanrelid - 1])
-			return ExecClearTuple(slot);
-
-		ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
-					   slot, InvalidBuffer, false);
-
-		/* Does the tuple meet the original qual conditions? */
-		econtext->ecxt_scantuple = slot;
-
-		ResetExprContext(econtext);
-
-		if (!ExecQual(node->bitmapqualorig, econtext, false))
-			ExecClearTuple(slot);		/* would not be returned by scan */
-
-		/* Flag for the next call that no more tuples */
-		estate->es_evTupleNull[scanrelid - 1] = true;
-
-		return slot;
-	}
-
 	/*
 	 * If we haven't yet performed the underlying index scan, do it, and begin
 	 * the iteration over the bitmap.
@@ -419,6 +386,27 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
 	scan->rs_ntuples = ntup;
 }
 
+/*
+ * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
+{
+	ExprContext *econtext;
+
+	/*
+	 * extract necessary information from index scan node
+	 */
+	econtext = node->ss.ps.ps_ExprContext;
+
+	/* Does the tuple meet the original qual conditions? */
+	econtext->ecxt_scantuple = slot;
+
+	ResetExprContext(econtext);
+
+	return ExecQual(node->bitmapqualorig, econtext, false);
+}
+
 /* ----------------------------------------------------------------
  *		ExecBitmapHeapScan(node)
  * ----------------------------------------------------------------
@@ -426,10 +414,9 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
 TupleTableSlot *
 ExecBitmapHeapScan(BitmapHeapScanState *node)
 {
-	/*
-	 * use BitmapHeapNext as access method
-	 */
-	return ExecScan(&node->ss, (ExecScanAccessMtd) BitmapHeapNext);
+	return ExecScan(&node->ss,
+					(ExecScanAccessMtd) BitmapHeapNext,
+					(ExecScanRecheckMtd) BitmapHeapRecheck);
 }
 
 /* ----------------------------------------------------------------
@@ -439,14 +426,6 @@ ExecBitmapHeapScan(BitmapHeapScanState *node)
 void
 ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
 {
-	EState	   *estate;
-	Index		scanrelid;
-
-	estate = node->ss.ps.state;
-	scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid;
-
-	node->ss.ps.ps_TupFromTlist = false;
-
 	/*
 	 * If we are being passed an outer tuple, link it into the "regular"
 	 * per-tuple econtext for possible qual eval.
@@ -459,13 +438,6 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
 		stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
 	}
 
-	/* If this is re-scanning of PlanQual ... */
-	if (estate->es_evTuple != NULL &&
-		estate->es_evTuple[scanrelid - 1] != NULL)
-	{
-		estate->es_evTupleNull[scanrelid - 1] = false;
-	}
-
 	/* rescan to release any page pin */
 	heap_rescan(node->ss.ss_currentScanDesc, NULL);
 
@@ -480,6 +452,8 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
 	node->tbmres = NULL;
 	node->prefetch_iterator = NULL;
 
+	ExecScanReScan(&node->ss);
+
 	/*
 	 * Always rescan the input immediately, to ensure we can pass down any
 	 * outer tuple that might be used in index quals.
diff --git a/src/backend/executor/nodeCtescan.c b/src/backend/executor/nodeCtescan.c
index 725840fef9c..7d5be7ffaba 100644
--- a/src/backend/executor/nodeCtescan.c
+++ b/src/backend/executor/nodeCtescan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeCtescan.c,v 1.6 2009/09/27 21:10:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeCtescan.c,v 1.7 2009/10/26 02:26:30 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -131,21 +131,30 @@ CteScanNext(CteScanState *node)
 	return ExecClearTuple(slot);
 }
 
+/*
+ * CteScanRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+CteScanRecheck(CteScanState *node, TupleTableSlot *slot)
+{
+	/* nothing to check */
+	return true;
+}
+
 /* ----------------------------------------------------------------
  *		ExecCteScan(node)
  *
  *		Scans the CTE sequentially and returns the next qualifying tuple.
- *		It calls the ExecScan() routine and passes it the access method
- *		which retrieves tuples sequentially.
+ *		We call the ExecScan() routine and pass it the appropriate
+ *		access method functions.
  * ----------------------------------------------------------------
  */
 TupleTableSlot *
 ExecCteScan(CteScanState *node)
 {
-	/*
-	 * use CteScanNext as access method
-	 */
-	return ExecScan(&node->ss, (ExecScanAccessMtd) CteScanNext);
+	return ExecScan(&node->ss,
+					(ExecScanAccessMtd) CteScanNext,
+					(ExecScanRecheckMtd) CteScanRecheck);
 }
 
 
@@ -300,7 +309,8 @@ ExecCteScanReScan(CteScanState *node, ExprContext *exprCtxt)
 	Tuplestorestate *tuplestorestate = node->leader->cte_table;
 
 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
-	node->ss.ps.ps_TupFromTlist = false;
+
+	ExecScanReScan(&node->ss);
 
 	if (node->leader == node)
 	{
diff --git a/src/backend/executor/nodeFunctionscan.c b/src/backend/executor/nodeFunctionscan.c
index 5e81283a4c3..1bedb738890 100644
--- a/src/backend/executor/nodeFunctionscan.c
+++ b/src/backend/executor/nodeFunctionscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeFunctionscan.c,v 1.53 2009/09/27 21:10:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeFunctionscan.c,v 1.54 2009/10/26 02:26:30 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -79,23 +79,31 @@ FunctionNext(FunctionScanState *node)
 	return slot;
 }
 
+/*
+ * FunctionRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+FunctionRecheck(FunctionScanState *node, TupleTableSlot *slot)
+{
+	/* nothing to check */
+	return true;
+}
+
 /* ----------------------------------------------------------------
  *		ExecFunctionScan(node)
  *
  *		Scans the function sequentially and returns the next qualifying
  *		tuple.
- *		It calls the ExecScan() routine and passes it the access method
- *		which retrieves tuples sequentially.
- *
+ *		We call the ExecScan() routine and pass it the appropriate
+ *		access method functions.
+ * ----------------------------------------------------------------
  */
-
 TupleTableSlot *
 ExecFunctionScan(FunctionScanState *node)
 {
-	/*
-	 * use FunctionNext as access method
-	 */
-	return ExecScan(&node->ss, (ExecScanAccessMtd) FunctionNext);
+	return ExecScan(&node->ss,
+					(ExecScanAccessMtd) FunctionNext,
+					(ExecScanRecheckMtd) FunctionRecheck);
 }
 
 /* ----------------------------------------------------------------
@@ -256,7 +264,8 @@ void
 ExecFunctionReScan(FunctionScanState *node, ExprContext *exprCtxt)
 {
 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
-	node->ss.ps.ps_TupFromTlist = false;
+
+	ExecScanReScan(&node->ss);
 
 	/*
 	 * If we haven't materialized yet, just return.
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 0520b726cfa..b136825dc8f 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeIndexscan.c,v 1.135 2009/09/27 21:10:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeIndexscan.c,v 1.136 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -52,7 +52,6 @@ IndexNext(IndexScanState *node)
 	ExprContext *econtext;
 	ScanDirection direction;
 	IndexScanDesc scandesc;
-	Index		scanrelid;
 	HeapTuple	tuple;
 	TupleTableSlot *slot;
 
@@ -72,36 +71,6 @@ IndexNext(IndexScanState *node)
 	scandesc = node->iss_ScanDesc;
 	econtext = node->ss.ps.ps_ExprContext;
 	slot = node->ss.ss_ScanTupleSlot;
-	scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
-
-	/*
-	 * Check if we are evaluating PlanQual for tuple of this relation.
-	 * Additional checking is not good, but no other way for now. We could
-	 * introduce new nodes for this case and handle IndexScan --> NewNode
-	 * switching in Init/ReScan plan...
-	 */
-	if (estate->es_evTuple != NULL &&
-		estate->es_evTuple[scanrelid - 1] != NULL)
-	{
-		if (estate->es_evTupleNull[scanrelid - 1])
-			return ExecClearTuple(slot);
-
-		ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
-					   slot, InvalidBuffer, false);
-
-		/* Does the tuple meet the indexqual condition? */
-		econtext->ecxt_scantuple = slot;
-
-		ResetExprContext(econtext);
-
-		if (!ExecQual(node->indexqualorig, econtext, false))
-			ExecClearTuple(slot);		/* would not be returned by scan */
-
-		/* Flag for the next call that no more tuples */
-		estate->es_evTupleNull[scanrelid - 1] = true;
-
-		return slot;
-	}
 
 	/*
 	 * ok, now that we have what we need, fetch the next tuple.
@@ -140,6 +109,27 @@ IndexNext(IndexScanState *node)
 	return ExecClearTuple(slot);
 }
 
+/*
+ * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+IndexRecheck(IndexScanState *node, TupleTableSlot *slot)
+{
+	ExprContext *econtext;
+
+	/*
+	 * extract necessary information from index scan node
+	 */
+	econtext = node->ss.ps.ps_ExprContext;
+
+	/* Does the tuple meet the indexqual condition? */
+	econtext->ecxt_scantuple = slot;
+
+	ResetExprContext(econtext);
+
+	return ExecQual(node->indexqualorig, econtext, false);
+}
+
 /* ----------------------------------------------------------------
  *		ExecIndexScan(node)
  * ----------------------------------------------------------------
@@ -153,10 +143,9 @@ ExecIndexScan(IndexScanState *node)
 	if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
 		ExecReScan((PlanState *) node, NULL);
 
-	/*
-	 * use IndexNext as access method
-	 */
-	return ExecScan(&node->ss, (ExecScanAccessMtd) IndexNext);
+	return ExecScan(&node->ss,
+					(ExecScanAccessMtd) IndexNext,
+					(ExecScanRecheckMtd) IndexRecheck);
 }
 
 /* ----------------------------------------------------------------
@@ -172,15 +161,9 @@ ExecIndexScan(IndexScanState *node)
 void
 ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
 {
-	EState	   *estate;
 	ExprContext *econtext;
-	Index		scanrelid;
 
-	estate = node->ss.ps.state;
 	econtext = node->iss_RuntimeContext;		/* context for runtime keys */
-	scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
-
-	node->ss.ps.ps_TupFromTlist = false;
 
 	if (econtext)
 	{
@@ -216,16 +199,10 @@ ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
 								 node->iss_NumRuntimeKeys);
 	node->iss_RuntimeKeysReady = true;
 
-	/* If this is re-scanning of PlanQual ... */
-	if (estate->es_evTuple != NULL &&
-		estate->es_evTuple[scanrelid - 1] != NULL)
-	{
-		estate->es_evTupleNull[scanrelid - 1] = false;
-		return;
-	}
-
 	/* reset index scan */
 	index_rescan(node->iss_ScanDesc, node->iss_ScanKeys);
+
+	ExecScanReScan(&node->ss);
 }
 
 
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index 80f7e3cdafb..f38d34a0475 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeLockRows.c,v 1.1 2009/10/12 18:10:43 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeLockRows.c,v 1.2 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -25,6 +25,7 @@
 #include "executor/executor.h"
 #include "executor/nodeLockRows.h"
 #include "storage/bufmgr.h"
+#include "utils/tqual.h"
 
 
 /* ----------------------------------------------------------------
@@ -37,7 +38,7 @@ ExecLockRows(LockRowsState *node)
 	TupleTableSlot *slot;
 	EState	   *estate;
 	PlanState  *outerPlan;
-	bool		epq_pushed;
+	bool		epq_started;
 	ListCell   *lc;
 
 	/*
@@ -47,30 +48,19 @@ ExecLockRows(LockRowsState *node)
 	outerPlan = outerPlanState(node);
 
 	/*
-	 * Get next tuple from subplan, if any; but if we are evaluating
-	 * an EvalPlanQual substitution, first finish that.
+	 * Get next tuple from subplan, if any.
 	 */
 lnext:
-	if (node->lr_useEvalPlan)
-	{
-		slot = EvalPlanQualNext(estate);
-		if (TupIsNull(slot))
-		{
-			EvalPlanQualPop(estate, outerPlan);
-			node->lr_useEvalPlan = false;
-			slot = ExecProcNode(outerPlan);
-		}
-	}
-	else
-		slot = ExecProcNode(outerPlan);
+	slot = ExecProcNode(outerPlan);
 
 	if (TupIsNull(slot))
 		return NULL;
 
 	/*
-	 * Attempt to lock the source tuple(s).
+	 * Attempt to lock the source tuple(s).  (Note we only have locking
+	 * rowmarks in lr_rowMarks.)
 	 */
-	epq_pushed = false;
+	epq_started = false;
 	foreach(lc, node->lr_rowMarks)
 	{
 		ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
@@ -84,6 +74,10 @@ lnext:
 		HTSU_Result test;
 		HeapTuple	copyTuple;
 
+		/* clear any leftover test tuple for this rel */
+		if (node->lr_epqstate.estate != NULL)
+			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, NULL);
+
 		/* if child rel, must check whether it produced this row */
 		if (erm->rti != erm->prti)
 		{
@@ -115,7 +109,7 @@ lnext:
 		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
 
 		/* okay, try to lock the tuple */
-		if (erm->forUpdate)
+		if (erm->markType == ROW_MARK_EXCLUSIVE)
 			lockmode = LockTupleExclusive;
 		else
 			lockmode = LockTupleShared;
@@ -129,8 +123,6 @@ lnext:
 		{
 			case HeapTupleSelfUpdated:
 				/* treat it as deleted; do not process */
-				if (epq_pushed)
-					EvalPlanQualPop(estate, outerPlan);
 				goto lnext;
 
 			case HeapTupleMayBeUpdated:
@@ -146,35 +138,33 @@ lnext:
 									  &tuple.t_self))
 				{
 					/* Tuple was deleted, so don't return it */
-					if (epq_pushed)
-						EvalPlanQualPop(estate, outerPlan);
 					goto lnext;
 				}
 
-				/* updated, so look at updated version */
-				copyTuple = EvalPlanQualFetch(estate, erm->rti,
+				/* updated, so fetch and lock the updated version */
+				copyTuple = EvalPlanQualFetch(estate, erm->relation, lockmode,
 											  &update_ctid, update_xmax);
 
 				if (copyTuple == NULL)
 				{
 					/* Tuple was deleted, so don't return it */
-					if (epq_pushed)
-						EvalPlanQualPop(estate, outerPlan);
 					goto lnext;
 				}
+				/* remember the actually locked tuple's TID */
+				tuple.t_self = copyTuple->t_self;
 
 				/*
-				 * Need to run a recheck subquery.
-				 * Find or create a PQ stack entry.
+				 * Need to run a recheck subquery.  Initialize EPQ state
+				 * if we didn't do so already.
 				 */
-				if (!epq_pushed)
+				if (!epq_started)
 				{
-					EvalPlanQualPush(estate, erm->rti, outerPlan);
-					epq_pushed = true;
+					EvalPlanQualBegin(&node->lr_epqstate, estate);
+					epq_started = true;
 				}
 
 				/* Store target tuple for relation's scan node */
-				EvalPlanQualSetTuple(estate, erm->rti, copyTuple);
+				EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, copyTuple);
 
 				/* Continue loop until we have all target tuples */
 				break;
@@ -188,11 +178,52 @@ lnext:
 		erm->curCtid = tuple.t_self;
 	}
 
-	/* If we need to do EvalPlanQual testing, loop back to do that */
-	if (epq_pushed)
+	/*
+	 * If we need to do EvalPlanQual testing, do so.
+	 */
+	if (epq_started)
 	{
-		node->lr_useEvalPlan = true;
-		goto lnext;
+		/*
+		 * First, fetch a copy of any rows that were successfully locked
+		 * without any update having occurred.  (We do this in a separate
+		 * pass so as to avoid overhead in the common case where there are
+		 * no concurrent updates.)
+		 */
+		foreach(lc, node->lr_rowMarks)
+		{
+			ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
+			HeapTupleData tuple;
+			Buffer		buffer;
+
+			if (EvalPlanQualGetTuple(&node->lr_epqstate, erm->rti) != NULL)
+				continue;		/* it was updated and fetched above */
+
+			/* okay, fetch the tuple */
+			tuple.t_self = erm->curCtid;
+			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
+							false, NULL))
+				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
+
+			/* successful, copy and store tuple */
+			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti,
+								 heap_copytuple(&tuple));
+			ReleaseBuffer(buffer);
+		}
+		/*
+		 * Now fetch any non-locked source rows --- the EPQ logic knows
+		 * how to do that.
+		 */
+		EvalPlanQualSetSlot(&node->lr_epqstate, slot);
+		EvalPlanQualFetchRowMarks(&node->lr_epqstate);
+		/*
+		 * And finally we can re-evaluate the tuple.
+		 */
+		slot = EvalPlanQualNext(&node->lr_epqstate);
+		if (TupIsNull(slot))
+		{
+			/* Updated tuple fails qual, so ignore it and go on */
+			goto lnext;
+		}
 	}
 
 	/* Got all locks, so return the current tuple */
@@ -210,8 +241,7 @@ LockRowsState *
 ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 {
 	LockRowsState *lrstate;
-	Plan	   *outerPlan;
-	JunkFilter *j;
+	Plan	   *outerPlan = outerPlan(node);
 	ListCell   *lc;
 
 	/* check for unsupported flags */
@@ -223,7 +253,7 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 	lrstate = makeNode(LockRowsState);
 	lrstate->ps.plan = (Plan *) node;
 	lrstate->ps.state = estate;
-	lrstate->lr_useEvalPlan = false;
+	EvalPlanQualInit(&lrstate->lr_epqstate, estate, outerPlan, node->epqParam);
 
 	/*
 	 * Miscellaneous initialization
@@ -239,7 +269,6 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 	/*
 	 * then initialize outer plan
 	 */
-	outerPlan = outerPlan(node);
 	outerPlanState(lrstate) = ExecInitNode(outerPlan, estate, eflags);
 
 	/*
@@ -249,17 +278,6 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 	ExecAssignResultTypeFromTL(&lrstate->ps);
 	lrstate->ps.ps_ProjInfo = NULL;
 
-	/*
-	 * Initialize a junkfilter that we'll use to extract the ctid junk
-	 * attributes.  (We won't actually apply the filter to remove the
-	 * junk, we just pass the rows on as-is.  This is because the
-	 * junkfilter isn't smart enough to not remove junk attrs that
-	 * might be needed further up.)
-	 */
-	j = ExecInitJunkFilter(outerPlan->targetlist, false,
-						   ExecInitExtraTupleSlot(estate));
-	lrstate->lr_junkFilter = j;
-
 	/*
 	 * Locate the ExecRowMark(s) that this node is responsible for.
 	 * (InitPlan should already have built the global list of ExecRowMarks.)
@@ -267,11 +285,12 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 	lrstate->lr_rowMarks = NIL;
 	foreach(lc, node->rowMarks)
 	{
-		RowMarkClause *rc = (RowMarkClause *) lfirst(lc);
+		PlanRowMark *rc = (PlanRowMark *) lfirst(lc);
 		ExecRowMark *erm = NULL;
-		char		resname[32];
 		ListCell   *lce;
 
+		Assert(IsA(rc, PlanRowMark));
+
 		/* ignore "parent" rowmarks; they are irrelevant at runtime */
 		if (rc->isParent)
 			continue;
@@ -279,36 +298,24 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 		foreach(lce, estate->es_rowMarks)
 		{
 			erm = (ExecRowMark *) lfirst(lce);
-			if (erm->rti == rc->rti &&
-				erm->prti == rc->prti &&
-				erm->rowmarkId == rc->rowmarkId)
+			if (erm->rti == rc->rti)
 				break;
 			erm = NULL;
 		}
 		if (erm == NULL)
-			elog(ERROR, "failed to find ExecRowMark for RowMarkClause");
-		if (AttributeNumberIsValid(erm->ctidAttNo))
-			elog(ERROR, "ExecRowMark is already claimed");
-
-		/* Locate the junk attribute columns in the subplan output */
-
-		/* always need the ctid */
-		snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
-		erm->ctidAttNo = ExecFindJunkAttribute(j, resname);
-		if (!AttributeNumberIsValid(erm->ctidAttNo))
-			elog(ERROR, "could not find junk \"%s\" column",
-				 resname);
-		/* if child relation, need tableoid too */
-		if (erm->rti != erm->prti)
-		{
-			snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId);
-			erm->toidAttNo = ExecFindJunkAttribute(j, resname);
-			if (!AttributeNumberIsValid(erm->toidAttNo))
-				elog(ERROR, "could not find junk \"%s\" column",
-					 resname);
-		}
-
-		lrstate->lr_rowMarks = lappend(lrstate->lr_rowMarks, erm);
+			elog(ERROR, "failed to find ExecRowMark for PlanRowMark %u",
+				 rc->rti);
+
+		/*
+		 * Only locking rowmarks go into our own list.  Non-locking marks
+		 * are passed off to the EvalPlanQual machinery.  This is because
+		 * we don't want to bother fetching non-locked rows unless we
+		 * actually have to do an EPQ recheck.
+		 */
+		if (RowMarkRequiresRowShareLock(erm->markType))
+			lrstate->lr_rowMarks = lappend(lrstate->lr_rowMarks, erm);
+		else
+			EvalPlanQualAddRowMark(&lrstate->lr_epqstate, erm);
 	}
 
 	return lrstate;
@@ -324,6 +331,7 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 void
 ExecEndLockRows(LockRowsState *node)
 {
+	EvalPlanQualEnd(&node->lr_epqstate);
 	ExecEndNode(outerPlanState(node));
 }
 
@@ -331,8 +339,6 @@ ExecEndLockRows(LockRowsState *node)
 void
 ExecReScanLockRows(LockRowsState *node, ExprContext *exprCtxt)
 {
-	node->lr_useEvalPlan = false;
-
 	/*
 	 * if chgParam of subnode is not null then plan will be re-scanned by
 	 * first ExecProcNode.
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index a9fd8c4974f..3f1f9c093ee 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeModifyTable.c,v 1.1 2009/10/10 01:43:47 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeModifyTable.c,v 1.2 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -273,7 +273,7 @@ ExecInsert(TupleTableSlot *slot,
 static TupleTableSlot *
 ExecDelete(ItemPointer tupleid,
 		   TupleTableSlot *planSlot,
-		   PlanState *subplanstate,
+		   EPQState *epqstate,
 		   EState *estate)
 {
 	ResultRelInfo *resultRelInfo;
@@ -294,7 +294,7 @@ ExecDelete(ItemPointer tupleid,
 	{
 		bool		dodelete;
 
-		dodelete = ExecBRDeleteTriggers(estate, subplanstate, resultRelInfo,
+		dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
 										tupleid);
 
 		if (!dodelete)			/* "do nothing" */
@@ -329,13 +329,14 @@ ldelete:;
 				ereport(ERROR,
 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 						 errmsg("could not serialize access due to concurrent update")));
-			else if (!ItemPointerEquals(tupleid, &update_ctid))
+			if (!ItemPointerEquals(tupleid, &update_ctid))
 			{
 				TupleTableSlot *epqslot;
 
 				epqslot = EvalPlanQual(estate,
+									   epqstate,
+									   resultRelationDesc,
 									   resultRelInfo->ri_RangeTableIndex,
-									   subplanstate,
 									   &update_ctid,
 									   update_xmax);
 				if (!TupIsNull(epqslot))
@@ -416,7 +417,7 @@ static TupleTableSlot *
 ExecUpdate(ItemPointer tupleid,
 		   TupleTableSlot *slot,
 		   TupleTableSlot *planSlot,
-		   PlanState *subplanstate,
+		   EPQState *epqstate,
 		   EState *estate)
 {
 	HeapTuple	tuple;
@@ -451,7 +452,7 @@ ExecUpdate(ItemPointer tupleid,
 	{
 		HeapTuple	newtuple;
 
-		newtuple = ExecBRUpdateTriggers(estate, subplanstate, resultRelInfo,
+		newtuple = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
 										tupleid, tuple);
 
 		if (newtuple == NULL)	/* "do nothing" */
@@ -515,13 +516,14 @@ lreplace:;
 				ereport(ERROR,
 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 						 errmsg("could not serialize access due to concurrent update")));
-			else if (!ItemPointerEquals(tupleid, &update_ctid))
+			if (!ItemPointerEquals(tupleid, &update_ctid))
 			{
 				TupleTableSlot *epqslot;
 
 				epqslot = EvalPlanQual(estate,
+									   epqstate,
+									   resultRelationDesc,
 									   resultRelInfo->ri_RangeTableIndex,
-									   subplanstate,
 									   &update_ctid,
 									   update_xmax);
 				if (!TupIsNull(epqslot))
@@ -685,12 +687,14 @@ ExecModifyTable(ModifyTableState *node)
 				estate->es_result_relation_info++;
 				subplanstate = node->mt_plans[node->mt_whichplan];
 				junkfilter = estate->es_result_relation_info->ri_junkFilter;
+				EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan);
 				continue;
 			}
 			else
 				break;
 		}
 
+		EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
 		slot = planSlot;
 
 		if (junkfilter != NULL)
@@ -728,11 +732,11 @@ ExecModifyTable(ModifyTableState *node)
 				break;
 			case CMD_UPDATE:
 				slot = ExecUpdate(tupleid, slot, planSlot,
-								  subplanstate, estate);
+								  &node->mt_epqstate, estate);
 				break;
 			case CMD_DELETE:
 				slot = ExecDelete(tupleid, planSlot,
-								  subplanstate, estate);
+								  &node->mt_epqstate, estate);
 				break;
 			default:
 				elog(ERROR, "unknown operation");
@@ -785,7 +789,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	 * a subplan tree to EvalPlanQual, instead.  Use a runtime test not just
 	 * Assert because this condition is easy to miss in testing ...
 	 */
-	if (estate->es_evTuple != NULL)
+	if (estate->es_epqTuple != NULL)
 		elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
 
 	/*
@@ -799,6 +803,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);
 	mtstate->mt_nplans = nplans;
 	mtstate->operation = operation;
+	/* set up epqstate with dummy subplan pointer for the moment */
+	EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, node->epqParam);
 	mtstate->fireBSTriggers = true;
 
 	/* For the moment, assume our targets are exactly the global result rels */
@@ -823,6 +829,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	/* select first subplan */
 	mtstate->mt_whichplan = 0;
 	subplan = (Plan *) linitial(node->plans);
+	EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan);
 
 	/*
 	 * Initialize RETURNING projections if needed.
@@ -878,6 +885,38 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 		mtstate->ps.ps_ExprContext = NULL;
 	}
 
+	/*
+	 * If we have any secondary relations in an UPDATE or DELETE, they need
+	 * to be treated like non-locked relations in SELECT FOR UPDATE, ie,
+	 * the EvalPlanQual mechanism needs to be told about them.  Locate
+	 * the relevant ExecRowMarks.
+	 */
+	foreach(l, node->rowMarks)
+	{
+		PlanRowMark *rc = (PlanRowMark *) lfirst(l);
+		ExecRowMark *erm = NULL;
+		ListCell   *lce;
+
+		Assert(IsA(rc, PlanRowMark));
+
+		/* ignore "parent" rowmarks; they are irrelevant at runtime */
+		if (rc->isParent)
+			continue;
+
+		foreach(lce, estate->es_rowMarks)
+		{
+			erm = (ExecRowMark *) lfirst(lce);
+			if (erm->rti == rc->rti)
+				break;
+			erm = NULL;
+		}
+		if (erm == NULL)
+			elog(ERROR, "failed to find ExecRowMark for PlanRowMark %u",
+				 rc->rti);
+
+		EvalPlanQualAddRowMark(&mtstate->mt_epqstate, erm);
+	}
+
 	/*
 	 * Initialize the junk filter(s) if needed.  INSERT queries need a filter
 	 * if there are any junk attrs in the tlist.  UPDATE and DELETE
@@ -987,6 +1026,11 @@ ExecEndModifyTable(ModifyTableState *node)
 	 */
 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
 
+	/*
+	 * Terminate EPQ execution if active
+	 */
+	EvalPlanQualEnd(&node->mt_epqstate);
+
 	/*
 	 * shut down subplans
 	 */
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index f20cc058498..22d3ec76487 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeSeqscan.c,v 1.67 2009/09/27 21:10:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeSeqscan.c,v 1.68 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,6 +36,7 @@ static TupleTableSlot *SeqNext(SeqScanState *node);
  *						Scan Support
  * ----------------------------------------------------------------
  */
+
 /* ----------------------------------------------------------------
  *		SeqNext
  *
@@ -47,7 +48,6 @@ SeqNext(SeqScanState *node)
 {
 	HeapTuple	tuple;
 	HeapScanDesc scandesc;
-	Index		scanrelid;
 	EState	   *estate;
 	ScanDirection direction;
 	TupleTableSlot *slot;
@@ -55,40 +55,13 @@ SeqNext(SeqScanState *node)
 	/*
 	 * get information from the estate and scan state
 	 */
-	estate = node->ps.state;
 	scandesc = node->ss_currentScanDesc;
-	scanrelid = ((SeqScan *) node->ps.plan)->scanrelid;
+	estate = node->ps.state;
 	direction = estate->es_direction;
 	slot = node->ss_ScanTupleSlot;
 
 	/*
-	 * Check if we are evaluating PlanQual for tuple of this relation.
-	 * Additional checking is not good, but no other way for now. We could
-	 * introduce new nodes for this case and handle SeqScan --> NewNode
-	 * switching in Init/ReScan plan...
-	 */
-	if (estate->es_evTuple != NULL &&
-		estate->es_evTuple[scanrelid - 1] != NULL)
-	{
-		if (estate->es_evTupleNull[scanrelid - 1])
-			return ExecClearTuple(slot);
-
-		ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
-					   slot, InvalidBuffer, false);
-
-		/*
-		 * Note that unlike IndexScan, SeqScan never use keys in
-		 * heap_beginscan (and this is very bad) - so, here we do not check
-		 * are keys ok or not.
-		 */
-
-		/* Flag for the next call that no more tuples */
-		estate->es_evTupleNull[scanrelid - 1] = true;
-		return slot;
-	}
-
-	/*
-	 * get the next tuple from the access methods
+	 * get the next tuple from the table
 	 */
 	tuple = heap_getnext(scandesc, direction);
 
@@ -112,23 +85,35 @@ SeqNext(SeqScanState *node)
 	return slot;
 }
 
+/*
+ * SeqRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+SeqRecheck(SeqScanState *node, TupleTableSlot *slot)
+{
+	/*
+	 * Note that unlike IndexScan, SeqScan never use keys in
+	 * heap_beginscan (and this is very bad) - so, here we do not check
+	 * are keys ok or not.
+	 */
+	return true;
+}
+
 /* ----------------------------------------------------------------
  *		ExecSeqScan(node)
  *
  *		Scans the relation sequentially and returns the next qualifying
  *		tuple.
- *		It calls the ExecScan() routine and passes it the access method
- *		which retrieve tuples sequentially.
- *
+ *		We call the ExecScan() routine and pass it the appropriate
+ *		access method functions.
+ * ----------------------------------------------------------------
  */
-
 TupleTableSlot *
 ExecSeqScan(SeqScanState *node)
 {
-	/*
-	 * use SeqNext as access method
-	 */
-	return ExecScan((ScanState *) node, (ExecScanAccessMtd) SeqNext);
+	return ExecScan((ScanState *) node,
+					(ExecScanAccessMtd) SeqNext,
+					(ExecScanRecheckMtd) SeqRecheck);
 }
 
 /* ----------------------------------------------------------------
@@ -279,27 +264,14 @@ ExecEndSeqScan(SeqScanState *node)
 void
 ExecSeqReScan(SeqScanState *node, ExprContext *exprCtxt)
 {
-	EState	   *estate;
-	Index		scanrelid;
 	HeapScanDesc scan;
 
-	estate = node->ps.state;
-	scanrelid = ((SeqScan *) node->ps.plan)->scanrelid;
-
-	node->ps.ps_TupFromTlist = false;
-
-	/* If this is re-scanning of PlanQual ... */
-	if (estate->es_evTuple != NULL &&
-		estate->es_evTuple[scanrelid - 1] != NULL)
-	{
-		estate->es_evTupleNull[scanrelid - 1] = false;
-		return;
-	}
-
 	scan = node->ss_currentScanDesc;
 
 	heap_rescan(scan,			/* scan desc */
 				NULL);			/* new scan keys */
+
+	ExecScanReScan((ScanState *) node);
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/executor/nodeSubqueryscan.c b/src/backend/executor/nodeSubqueryscan.c
index 15929dedffe..402c24e6285 100644
--- a/src/backend/executor/nodeSubqueryscan.c
+++ b/src/backend/executor/nodeSubqueryscan.c
@@ -12,7 +12,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeSubqueryscan.c,v 1.42 2009/10/12 18:10:43 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeSubqueryscan.c,v 1.43 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -47,42 +47,44 @@ SubqueryNext(SubqueryScanState *node)
 {
 	TupleTableSlot *slot;
 
-	/*
-	 * We need not support EvalPlanQual here, since we are not scanning a real
-	 * relation.
-	 */
-
 	/*
 	 * Get the next tuple from the sub-query.
 	 */
 	slot = ExecProcNode(node->subplan);
 
 	/*
-	 * We just overwrite our ScanTupleSlot with the subplan's result slot,
-	 * rather than expending the cycles for ExecCopySlot().
+	 * We just return the subplan's result slot, rather than expending
+	 * extra cycles for ExecCopySlot().  (Our own ScanTupleSlot is used
+	 * only for EvalPlanQual rechecks.)
 	 */
-	node->ss.ss_ScanTupleSlot = slot;
-
 	return slot;
 }
 
+/*
+ * SubqueryRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+SubqueryRecheck(SubqueryScanState *node, TupleTableSlot *slot)
+{
+	/* nothing to check */
+	return true;
+}
+
 /* ----------------------------------------------------------------
  *		ExecSubqueryScan(node)
  *
  *		Scans the subquery sequentially and returns the next qualifying
  *		tuple.
- *		It calls the ExecScan() routine and passes it the access method
- *		which retrieve tuples sequentially.
- *
+ *		We call the ExecScan() routine and pass it the appropriate
+ *		access method functions.
+ * ----------------------------------------------------------------
  */
-
 TupleTableSlot *
 ExecSubqueryScan(SubqueryScanState *node)
 {
-	/*
-	 * use SubqueryNext as access method
-	 */
-	return ExecScan(&node->ss, (ExecScanAccessMtd) SubqueryNext);
+	return ExecScan(&node->ss,
+					(ExecScanAccessMtd) SubqueryNext,
+					(ExecScanRecheckMtd) SubqueryRecheck);
 }
 
 /* ----------------------------------------------------------------
@@ -176,7 +178,7 @@ ExecEndSubqueryScan(SubqueryScanState *node)
 	 * clean out the upper tuple table
 	 */
 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
-	node->ss.ss_ScanTupleSlot = NULL;	/* not ours to clear */
+	ExecClearTuple(node->ss.ss_ScanTupleSlot);
 
 	/*
 	 * close down subquery
@@ -193,9 +195,7 @@ ExecEndSubqueryScan(SubqueryScanState *node)
 void
 ExecSubqueryReScan(SubqueryScanState *node, ExprContext *exprCtxt)
 {
-	EState	   *estate;
-
-	estate = node->ss.ps.state;
+	ExecScanReScan(&node->ss);
 
 	/*
 	 * ExecReScan doesn't know about my subplan, so I have to do
@@ -211,7 +211,4 @@ ExecSubqueryReScan(SubqueryScanState *node, ExprContext *exprCtxt)
 	 */
 	if (node->subplan->chgParam == NULL)
 		ExecReScan(node->subplan, NULL);
-
-	node->ss.ss_ScanTupleSlot = NULL;
-	node->ss.ps.ps_TupFromTlist = false;
 }
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 1fc74695eec..7e4a5c7a077 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeTidscan.c,v 1.63 2009/09/27 21:10:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeTidscan.c,v 1.64 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -258,7 +258,6 @@ TidNext(TidScanState *node)
 	Relation	heapRelation;
 	HeapTuple	tuple;
 	TupleTableSlot *slot;
-	Index		scanrelid;
 	Buffer		buffer = InvalidBuffer;
 	ItemPointerData *tidList;
 	int			numTids;
@@ -272,33 +271,6 @@ TidNext(TidScanState *node)
 	snapshot = estate->es_snapshot;
 	heapRelation = node->ss.ss_currentRelation;
 	slot = node->ss.ss_ScanTupleSlot;
-	scanrelid = ((TidScan *) node->ss.ps.plan)->scan.scanrelid;
-
-	/*
-	 * Check if we are evaluating PlanQual for tuple of this relation.
-	 * Additional checking is not good, but no other way for now. We could
-	 * introduce new nodes for this case and handle TidScan --> NewNode
-	 * switching in Init/ReScan plan...
-	 */
-	if (estate->es_evTuple != NULL &&
-		estate->es_evTuple[scanrelid - 1] != NULL)
-	{
-		if (estate->es_evTupleNull[scanrelid - 1])
-			return ExecClearTuple(slot);
-
-		/*
-		 * XXX shouldn't we check here to make sure tuple matches TID list? In
-		 * runtime-key case this is not certain, is it?  However, in the WHERE
-		 * CURRENT OF case it might not match anyway ...
-		 */
-
-		ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
-					   slot, InvalidBuffer, false);
-
-		/* Flag for the next call that no more tuples */
-		estate->es_evTupleNull[scanrelid - 1] = true;
-		return slot;
-	}
 
 	/*
 	 * First time through, compute the list of TIDs to be visited
@@ -384,13 +356,28 @@ TidNext(TidScanState *node)
 	return ExecClearTuple(slot);
 }
 
+/*
+ * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+TidRecheck(TidScanState *node, TupleTableSlot *slot)
+{
+	/*
+	 * XXX shouldn't we check here to make sure tuple matches TID list? In
+	 * runtime-key case this is not certain, is it?  However, in the WHERE
+	 * CURRENT OF case it might not match anyway ...
+	 */
+	return true;
+}
+
+
 /* ----------------------------------------------------------------
  *		ExecTidScan(node)
  *
  *		Scans the relation using tids and returns
  *		   the next qualifying tuple in the direction specified.
- *		It calls ExecScan() and passes it the access methods which returns
- *		the next tuple using the tids.
+ *		We call the ExecScan() routine and pass it the appropriate
+ *		access method functions.
  *
  *		Conditions:
  *		  -- the "cursor" maintained by the AMI is positioned at the tuple
@@ -405,10 +392,9 @@ TidNext(TidScanState *node)
 TupleTableSlot *
 ExecTidScan(TidScanState *node)
 {
-	/*
-	 * use TidNext as access method
-	 */
-	return ExecScan(&node->ss, (ExecScanAccessMtd) TidNext);
+	return ExecScan(&node->ss,
+					(ExecScanAccessMtd) TidNext,
+					(ExecScanRecheckMtd) TidRecheck);
 }
 
 /* ----------------------------------------------------------------
@@ -418,32 +404,18 @@ ExecTidScan(TidScanState *node)
 void
 ExecTidReScan(TidScanState *node, ExprContext *exprCtxt)
 {
-	EState	   *estate;
-	Index		scanrelid;
-
-	estate = node->ss.ps.state;
-	scanrelid = ((TidScan *) node->ss.ps.plan)->scan.scanrelid;
-
-	node->ss.ps.ps_TupFromTlist = false;
-
 	/* If we are being passed an outer tuple, save it for runtime key calc */
 	if (exprCtxt != NULL)
 		node->ss.ps.ps_ExprContext->ecxt_outertuple =
 			exprCtxt->ecxt_outertuple;
 
-	/* If this is re-scanning of PlanQual ... */
-	if (estate->es_evTuple != NULL &&
-		estate->es_evTuple[scanrelid - 1] != NULL)
-	{
-		estate->es_evTupleNull[scanrelid - 1] = false;
-		return;
-	}
-
 	if (node->tss_TidList)
 		pfree(node->tss_TidList);
 	node->tss_TidList = NULL;
 	node->tss_NumTids = 0;
 	node->tss_TidPtr = -1;
+
+	ExecScanReScan(&node->ss);
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/executor/nodeValuesscan.c b/src/backend/executor/nodeValuesscan.c
index 90b5594f4ed..55a0d53265b 100644
--- a/src/backend/executor/nodeValuesscan.c
+++ b/src/backend/executor/nodeValuesscan.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeValuesscan.c,v 1.10 2009/09/27 21:10:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeValuesscan.c,v 1.11 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -154,23 +154,31 @@ ValuesNext(ValuesScanState *node)
 	return slot;
 }
 
+/*
+ * ValuesRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+ValuesRecheck(ValuesScanState *node, TupleTableSlot *slot)
+{
+	/* nothing to check */
+	return true;
+}
 
 /* ----------------------------------------------------------------
  *		ExecValuesScan(node)
  *
  *		Scans the values lists sequentially and returns the next qualifying
  *		tuple.
- *		It calls the ExecScan() routine and passes it the access method
- *		which retrieves tuples sequentially.
+ *		We call the ExecScan() routine and pass it the appropriate
+ *		access method functions.
  * ----------------------------------------------------------------
  */
 TupleTableSlot *
 ExecValuesScan(ValuesScanState *node)
 {
-	/*
-	 * use ValuesNext as access method
-	 */
-	return ExecScan(&node->ss, (ExecScanAccessMtd) ValuesNext);
+	return ExecScan(&node->ss,
+					(ExecScanAccessMtd) ValuesNext,
+					(ExecScanRecheckMtd) ValuesRecheck);
 }
 
 /* ----------------------------------------------------------------
@@ -320,7 +328,8 @@ void
 ExecValuesReScan(ValuesScanState *node, ExprContext *exprCtxt)
 {
 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
-	node->ss.ps.ps_TupFromTlist = false;
+
+	ExecScanReScan(&node->ss);
 
 	node->curr_idx = -1;
 }
diff --git a/src/backend/executor/nodeWorktablescan.c b/src/backend/executor/nodeWorktablescan.c
index 545747b2307..3c18a3eccc3 100644
--- a/src/backend/executor/nodeWorktablescan.c
+++ b/src/backend/executor/nodeWorktablescan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeWorktablescan.c,v 1.8 2009/09/27 21:10:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeWorktablescan.c,v 1.9 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -61,12 +61,22 @@ WorkTableScanNext(WorkTableScanState *node)
 	return slot;
 }
 
+/*
+ * WorkTableScanRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+WorkTableScanRecheck(WorkTableScanState *node, TupleTableSlot *slot)
+{
+	/* nothing to check */
+	return true;
+}
+
 /* ----------------------------------------------------------------
  *		ExecWorkTableScan(node)
  *
  *		Scans the worktable sequentially and returns the next qualifying tuple.
- *		It calls the ExecScan() routine and passes it the access method
- *		which retrieves tuples sequentially.
+ *		We call the ExecScan() routine and pass it the appropriate
+ *		access method functions.
  * ----------------------------------------------------------------
  */
 TupleTableSlot *
@@ -106,10 +116,9 @@ ExecWorkTableScan(WorkTableScanState *node)
 		ExecAssignScanProjectionInfo(&node->ss);
 	}
 
-	/*
-	 * use WorkTableScanNext as access method
-	 */
-	return ExecScan(&node->ss, (ExecScanAccessMtd) WorkTableScanNext);
+	return ExecScan(&node->ss,
+					(ExecScanAccessMtd) WorkTableScanNext,
+					(ExecScanRecheckMtd) WorkTableScanRecheck);
 }
 
 
@@ -203,7 +212,8 @@ void
 ExecWorkTableScanReScan(WorkTableScanState *node, ExprContext *exprCtxt)
 {
 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
-	node->ss.ps.ps_TupFromTlist = false;
+
+	ExecScanReScan(&node->ss);
 
 	/* No need (or way) to rescan if ExecWorkTableScan not called yet */
 	if (node->rustate)
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 92da5324c24..deee9994170 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.448 2009/10/14 22:14:21 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.449 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -174,6 +174,8 @@ _copyModifyTable(ModifyTable *from)
 	COPY_NODE_FIELD(resultRelations);
 	COPY_NODE_FIELD(plans);
 	COPY_NODE_FIELD(returningLists);
+	COPY_NODE_FIELD(rowMarks);
+	COPY_SCALAR_FIELD(epqParam);
 
 	return newnode;
 }
@@ -812,6 +814,7 @@ _copyLockRows(LockRows *from)
 	 * copy remainder of node
 	 */
 	COPY_NODE_FIELD(rowMarks);
+	COPY_SCALAR_FIELD(epqParam);
 
 	return newnode;
 }
@@ -838,6 +841,26 @@ _copyLimit(Limit *from)
 	return newnode;
 }
 
+/*
+ * _copyPlanRowMark
+ */
+static PlanRowMark *
+_copyPlanRowMark(PlanRowMark *from)
+{
+	PlanRowMark *newnode = makeNode(PlanRowMark);
+
+	COPY_SCALAR_FIELD(rti);
+	COPY_SCALAR_FIELD(prti);
+	COPY_SCALAR_FIELD(markType);
+	COPY_SCALAR_FIELD(noWait);
+	COPY_SCALAR_FIELD(isParent);
+	COPY_SCALAR_FIELD(ctidAttNo);
+	COPY_SCALAR_FIELD(toidAttNo);
+	COPY_SCALAR_FIELD(wholeAttNo);
+
+	return newnode;
+}
+
 /*
  * _copyPlanInvalItem
  */
@@ -1834,11 +1857,8 @@ _copyRowMarkClause(RowMarkClause *from)
 	RowMarkClause *newnode = makeNode(RowMarkClause);
 
 	COPY_SCALAR_FIELD(rti);
-	COPY_SCALAR_FIELD(prti);
-	COPY_SCALAR_FIELD(rowmarkId);
 	COPY_SCALAR_FIELD(forUpdate);
 	COPY_SCALAR_FIELD(noWait);
-	COPY_SCALAR_FIELD(isParent);
 
 	return newnode;
 }
@@ -3621,6 +3641,9 @@ copyObject(void *from)
 		case T_Limit:
 			retval = _copyLimit(from);
 			break;
+		case T_PlanRowMark:
+			retval = _copyPlanRowMark(from);
+			break;
 		case T_PlanInvalItem:
 			retval = _copyPlanInvalItem(from);
 			break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 0c30d6aa9d7..f7e9547a1f8 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -22,7 +22,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.370 2009/10/14 22:14:21 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.371 2009/10/26 02:26:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2196,11 +2196,8 @@ static bool
 _equalRowMarkClause(RowMarkClause *a, RowMarkClause *b)
 {
 	COMPARE_SCALAR_FIELD(rti);
-	COMPARE_SCALAR_FIELD(prti);
-	COMPARE_SCALAR_FIELD(rowmarkId);
 	COMPARE_SCALAR_FIELD(forUpdate);
 	COMPARE_SCALAR_FIELD(noWait);
-	COMPARE_SCALAR_FIELD(isParent);
 
 	return true;
 }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 45caaea850a..ae7a859bd55 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.369 2009/10/13 00:53:08 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.370 2009/10/26 02:26:31 tgl Exp $
  *
  * NOTES
  *	  Every node type that can appear in stored rules' parsetrees *must*
@@ -329,6 +329,8 @@ _outModifyTable(StringInfo str, ModifyTable *node)
 	WRITE_NODE_FIELD(resultRelations);
 	WRITE_NODE_FIELD(plans);
 	WRITE_NODE_FIELD(returningLists);
+	WRITE_NODE_FIELD(rowMarks);
+	WRITE_INT_FIELD(epqParam);
 }
 
 static void
@@ -729,6 +731,7 @@ _outLockRows(StringInfo str, LockRows *node)
 	_outPlanInfo(str, (Plan *) node);
 
 	WRITE_NODE_FIELD(rowMarks);
+	WRITE_INT_FIELD(epqParam);
 }
 
 static void
@@ -742,6 +745,21 @@ _outLimit(StringInfo str, Limit *node)
 	WRITE_NODE_FIELD(limitCount);
 }
 
+static void
+_outPlanRowMark(StringInfo str, PlanRowMark *node)
+{
+	WRITE_NODE_TYPE("PLANROWMARK");
+
+	WRITE_UINT_FIELD(rti);
+	WRITE_UINT_FIELD(prti);
+	WRITE_ENUM_FIELD(markType, RowMarkType);
+	WRITE_BOOL_FIELD(noWait);
+	WRITE_BOOL_FIELD(isParent);
+	WRITE_INT_FIELD(ctidAttNo);
+	WRITE_INT_FIELD(toidAttNo);
+	WRITE_INT_FIELD(wholeAttNo);
+}
+
 static void
 _outPlanInvalItem(StringInfo str, PlanInvalItem *node)
 {
@@ -1512,7 +1530,6 @@ _outPlannerGlobal(StringInfo str, PlannerGlobal *node)
 	WRITE_NODE_FIELD(relationOids);
 	WRITE_NODE_FIELD(invalItems);
 	WRITE_UINT_FIELD(lastPHId);
-	WRITE_UINT_FIELD(lastRowmarkId);
 	WRITE_BOOL_FIELD(transientPlan);
 }
 
@@ -1536,6 +1553,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
 	WRITE_NODE_FIELD(full_join_clauses);
 	WRITE_NODE_FIELD(join_info_list);
 	WRITE_NODE_FIELD(append_rel_list);
+	WRITE_NODE_FIELD(rowMarks);
 	WRITE_NODE_FIELD(placeholder_list);
 	WRITE_NODE_FIELD(query_pathkeys);
 	WRITE_NODE_FIELD(group_pathkeys);
@@ -2016,11 +2034,8 @@ _outRowMarkClause(StringInfo str, RowMarkClause *node)
 	WRITE_NODE_TYPE("ROWMARKCLAUSE");
 
 	WRITE_UINT_FIELD(rti);
-	WRITE_UINT_FIELD(prti);
-	WRITE_UINT_FIELD(rowmarkId);
 	WRITE_BOOL_FIELD(forUpdate);
 	WRITE_BOOL_FIELD(noWait);
-	WRITE_BOOL_FIELD(isParent);
 }
 
 static void
@@ -2526,6 +2541,9 @@ _outNode(StringInfo str, void *obj)
 			case T_Limit:
 				_outLimit(str, obj);
 				break;
+			case T_PlanRowMark:
+				_outPlanRowMark(str, obj);
+				break;
 			case T_PlanInvalItem:
 				_outPlanInvalItem(str, obj);
 				break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 7cffedb73b4..a1520276e22 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.225 2009/10/12 18:10:45 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.226 2009/10/26 02:26:32 tgl Exp $
  *
  * NOTES
  *	  Path and Plan nodes do not have any readfuncs support, because we
@@ -293,11 +293,8 @@ _readRowMarkClause(void)
 	READ_LOCALS(RowMarkClause);
 
 	READ_UINT_FIELD(rti);
-	READ_UINT_FIELD(prti);
-	READ_UINT_FIELD(rowmarkId);
 	READ_BOOL_FIELD(forUpdate);
 	READ_BOOL_FIELD(noWait);
-	READ_BOOL_FIELD(isParent);
 
 	READ_DONE();
 }
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index f6fffec902e..4d402ca7202 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.187 2009/10/12 18:10:45 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.188 2009/10/26 02:26:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -632,7 +632,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 									false, tuple_fraction,
 									&subroot);
 	rel->subrtable = subroot->parse->rtable;
-	rel->subrowmark = subroot->parse->rowMarks;
+	rel->subrowmark = subroot->rowMarks;
 
 	/* Copy number of output rows from subplan */
 	rel->tuples = rel->subplan->plan_rows;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 1452bdd035c..b068d2f3f83 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.265 2009/10/12 18:10:45 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.266 2009/10/26 02:26:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -951,7 +951,7 @@ create_indexscan_plan(PlannerInfo *root,
 			if (best_path->indexinfo->indpred)
 			{
 				if (baserelid != root->parse->resultRelation &&
-					get_rowmark(root->parse, baserelid) == NULL)
+					get_parse_rowmark(root->parse, baserelid) == NULL)
 					if (predicate_implied_by(clausel,
 											 best_path->indexinfo->indpred))
 						continue;
@@ -3598,7 +3598,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
  *	  Build a LockRows plan node
  */
 LockRows *
-make_lockrows(Plan *lefttree, List *rowMarks)
+make_lockrows(Plan *lefttree, List *rowMarks, int epqParam)
 {
 	LockRows   *node = makeNode(LockRows);
 	Plan	   *plan = &node->plan;
@@ -3614,6 +3614,7 @@ make_lockrows(Plan *lefttree, List *rowMarks)
 	plan->righttree = NULL;
 
 	node->rowMarks = rowMarks;
+	node->epqParam = epqParam;
 
 	return node;
 }
@@ -3750,7 +3751,8 @@ make_result(PlannerInfo *root,
  */
 ModifyTable *
 make_modifytable(CmdType operation, List *resultRelations,
-				 List *subplans, List *returningLists)
+				 List *subplans, List *returningLists,
+				 List *rowMarks, int epqParam)
 {
 	ModifyTable *node = makeNode(ModifyTable);
 	Plan	   *plan = &node->plan;
@@ -3801,6 +3803,8 @@ make_modifytable(CmdType operation, List *resultRelations,
 	node->resultRelations = resultRelations;
 	node->plans = subplans;
 	node->returningLists = returningLists;
+	node->rowMarks = rowMarks;
+	node->epqParam = epqParam;
 
 	return node;
 }
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 9ea928ed9d4..f10bf1c85c5 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.155 2009/07/21 02:02:44 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.156 2009/10/26 02:26:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -559,6 +559,9 @@ make_outerjoininfo(PlannerInfo *root,
 	 * parser.	It's because the parser hasn't got enough info --- consider
 	 * FOR UPDATE applied to a view.  Only after rewriting and flattening do
 	 * we know whether the view contains an outer join.
+	 *
+	 * We use the original RowMarkClause list here; the PlanRowMark list
+	 * would list everything.
 	 */
 	foreach(l, root->parse->rowMarks)
 	{
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index c17fe5f63f6..0b396b29bcc 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.259 2009/10/12 18:10:48 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.260 2009/10/26 02:26:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -35,6 +35,7 @@
 #ifdef OPTIMIZER_DEBUG
 #include "nodes/print.h"
 #endif
+#include "parser/analyze.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_oper.h"
 #include "parser/parsetree.h"
@@ -63,6 +64,7 @@ static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
 static Plan *inheritance_planner(PlannerInfo *root);
 static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction);
 static bool is_dummy_plan(Plan *plan);
+static void preprocess_rowmarks(PlannerInfo *root);
 static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
@@ -159,7 +161,6 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 	glob->relationOids = NIL;
 	glob->invalItems = NIL;
 	glob->lastPHId = 0;
-	glob->lastRowmarkId = 0;
 	glob->transientPlan = false;
 
 	/* Determine what fraction of the plan is likely to be scanned */
@@ -209,7 +210,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 	Assert(glob->finalrowmarks == NIL);
 	top_plan = set_plan_references(glob, top_plan,
 								   root->parse->rtable,
-								   root->parse->rowMarks);
+								   root->rowMarks);
 	/* ... and the subplans (both regular subplans and initplans) */
 	Assert(list_length(glob->subplans) == list_length(glob->subrtables));
 	Assert(list_length(glob->subplans) == list_length(glob->subrowmarks));
@@ -301,10 +302,11 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	root->cte_plan_ids = NIL;
 	root->eq_classes = NIL;
 	root->append_rel_list = NIL;
+	root->rowMarks = NIL;
 
 	root->hasRecursion = hasRecursion;
 	if (hasRecursion)
-		root->wt_param_id = SS_assign_worktable_param(root);
+		root->wt_param_id = SS_assign_special_param(root);
 	else
 		root->wt_param_id = -1;
 	root->non_recursive_plan = NULL;
@@ -364,19 +366,12 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	}
 
 	/*
-	 * Assign unique IDs (unique within this planner run) to RowMarkClauses.
-	 * We can't identify them just by RT index because that will change
-	 * during final rtable flattening, and we don't want to have to go back
-	 * and change the resnames assigned to junk CTID tlist entries at that
-	 * point.  Do it now before expanding inheritance sets, because child
-	 * relations should inherit their parents' rowmarkId.
+	 * Preprocess RowMark information.  We need to do this after subquery
+	 * pullup (so that all non-inherited RTEs are present) and before
+	 * inheritance expansion (so that the info is available for
+	 * expand_inherited_tables to examine and modify).
 	 */
-	foreach(l, parse->rowMarks)
-	{
-		RowMarkClause *rc = (RowMarkClause *) lfirst(l);
-
-		rc->rowmarkId = ++(root->glob->lastRowmarkId);
-	}
+	preprocess_rowmarks(root);
 
 	/*
 	 * Expand any rangetable entries that are inheritance sets into "append
@@ -512,14 +507,15 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 		/* If it's not SELECT, we need a ModifyTable node */
 		if (parse->commandType != CMD_SELECT)
 		{
+			List   *returningLists;
+			List   *rowMarks;
+
 			/*
 			 * Deal with the RETURNING clause if any.  It's convenient to pass
 			 * the returningList through setrefs.c now rather than at top
 			 * level (if we waited, handling inherited UPDATE/DELETE would be
 			 * much harder).
 			 */
-			List   *returningLists;
-
 			if (parse->returningList)
 			{
 				List	   *rlist;
@@ -534,20 +530,32 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 			else
 				returningLists = NIL;
 
+			/*
+			 * If there was a FOR UPDATE/SHARE clause, the LockRows node will
+			 * have dealt with fetching non-locked marked rows, else we need
+			 * to have ModifyTable do that.
+			 */
+			if (parse->rowMarks)
+				rowMarks = NIL;
+			else
+				rowMarks = root->rowMarks;
+
 			plan = (Plan *) make_modifytable(parse->commandType,
 											 copyObject(root->resultRelations),
 											 list_make1(plan),
-											 returningLists);
+											 returningLists,
+											 rowMarks,
+											 SS_assign_special_param(root));
 		}
 	}
 
 	/*
-	 * If any subplans were generated, or if we're inside a subplan, build
-	 * initPlan list and extParam/allParam sets for plan nodes, and attach the
-	 * initPlans to the top plan node.
+	 * If any subplans were generated, or if there are any parameters to worry
+	 * about, build initPlan list and extParam/allParam sets for plan nodes,
+	 * and attach the initPlans to the top plan node.
 	 */
 	if (list_length(glob->subplans) != num_old_subplans ||
-		root->query_level > 1)
+		root->glob->paramlist != NIL)
 		SS_finalize_plan(root, plan, true);
 
 	/* Return internal info if caller wants it */
@@ -701,6 +709,7 @@ inheritance_planner(PlannerInfo *root)
 	List	   *resultRelations = NIL;
 	List	   *returningLists = NIL;
 	List	   *rtable = NIL;
+	List	   *rowMarks;
 	List	   *tlist;
 	PlannerInfo subroot;
 	ListCell   *l;
@@ -797,11 +806,23 @@ inheritance_planner(PlannerInfo *root)
 	 */
 	parse->rtable = rtable;
 
+	/*
+	 * If there was a FOR UPDATE/SHARE clause, the LockRows node will
+	 * have dealt with fetching non-locked marked rows, else we need
+	 * to have ModifyTable do that.
+	 */
+	if (parse->rowMarks)
+		rowMarks = NIL;
+	else
+		rowMarks = root->rowMarks;
+
 	/* And last, tack on a ModifyTable node to do the UPDATE/DELETE work */
 	return (Plan *) make_modifytable(parse->commandType,
 									 copyObject(root->resultRelations),
 									 subplans, 
-									 returningLists);
+									 returningLists,
+									 rowMarks,
+									 SS_assign_special_param(root));
 }
 
 /*--------------------
@@ -1630,11 +1651,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 	/*
 	 * Finally, if there is a FOR UPDATE/SHARE clause, add the LockRows node.
+	 * (Note: we intentionally test parse->rowMarks not root->rowMarks here.
+	 * If there are only non-locking rowmarks, they should be handled by
+	 * the ModifyTable node instead.)
 	 */
 	if (parse->rowMarks)
 	{
 		result_plan = (Plan *) make_lockrows(result_plan,
-											 parse->rowMarks);
+											 root->rowMarks,
+											 SS_assign_special_param(root));
 	}
 
 	/* Compute result-relations list if needed */
@@ -1681,6 +1706,158 @@ is_dummy_plan(Plan *plan)
 	return false;
 }
 
+/*
+ * Create a bitmapset of the RT indexes of live base relations
+ *
+ * Helper for preprocess_rowmarks ... at this point in the proceedings,
+ * the only good way to distinguish baserels from appendrel children
+ * is to see what is in the join tree.
+ */
+static Bitmapset *
+get_base_rel_indexes(Node *jtnode)
+{
+	Bitmapset  *result;
+
+	if (jtnode == NULL)
+		return NULL;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+
+		result = bms_make_singleton(varno);
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		ListCell   *l;
+
+		result = NULL;
+		foreach(l, f->fromlist)
+			result = bms_join(result,
+							  get_base_rel_indexes(lfirst(l)));
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+
+		result = bms_join(get_base_rel_indexes(j->larg),
+						  get_base_rel_indexes(j->rarg));
+	}
+	else
+	{
+		elog(ERROR, "unrecognized node type: %d",
+			 (int) nodeTag(jtnode));
+		result = NULL;			/* keep compiler quiet */
+	}
+	return result;
+}
+
+/*
+ * preprocess_rowmarks - set up PlanRowMarks if needed
+ */
+static void
+preprocess_rowmarks(PlannerInfo *root)
+{
+	Query	   *parse = root->parse;
+	Bitmapset  *rels;
+	List	   *prowmarks;
+	ListCell   *l;
+	int			i;
+
+	if (parse->rowMarks)
+	{
+		/*
+		 * We've got trouble if FOR UPDATE/SHARE appears inside grouping,
+		 * since grouping renders a reference to individual tuple CTIDs
+		 * invalid.  This is also checked at parse time, but that's
+		 * insufficient because of rule substitution, query pullup, etc.
+		 */
+		CheckSelectLocking(parse);
+	}
+	else
+	{
+		/*
+		 * We only need rowmarks for UPDATE, DELETE, or FOR UPDATE/SHARE.
+		 */
+		if (parse->commandType != CMD_UPDATE &&
+			parse->commandType != CMD_DELETE)
+			return;
+	}
+
+	/*
+	 * We need to have rowmarks for all base relations except the target.
+	 * We make a bitmapset of all base rels and then remove the items we
+	 * don't need or have FOR UPDATE/SHARE marks for.
+	 */
+	rels = get_base_rel_indexes((Node *) parse->jointree);
+	if (parse->resultRelation)
+		rels = bms_del_member(rels, parse->resultRelation);
+
+	/*
+	 * Convert RowMarkClauses to PlanRowMark representation.
+	 *
+	 * Note: currently, it is syntactically impossible to have FOR UPDATE
+	 * applied to an update/delete target rel.  If that ever becomes
+	 * possible, we should drop the target from the PlanRowMark list.
+	 */
+	prowmarks = NIL;
+	foreach(l, parse->rowMarks)
+	{
+		RowMarkClause *rc = (RowMarkClause *) lfirst(l);
+		PlanRowMark *newrc = makeNode(PlanRowMark);
+
+		Assert(rc->rti != parse->resultRelation);
+		rels = bms_del_member(rels, rc->rti);
+
+		newrc->rti = newrc->prti = rc->rti;
+		if (rc->forUpdate)
+			newrc->markType = ROW_MARK_EXCLUSIVE;
+		else
+			newrc->markType = ROW_MARK_SHARE;
+		newrc->noWait = rc->noWait;
+		newrc->isParent = false;
+		/* attnos will be assigned in preprocess_targetlist */
+		newrc->ctidAttNo = InvalidAttrNumber;
+		newrc->toidAttNo = InvalidAttrNumber;
+		newrc->wholeAttNo = InvalidAttrNumber;
+
+		prowmarks = lappend(prowmarks, newrc);
+	}
+
+	/*
+	 * Now, add rowmarks for any non-target, non-locked base relations.
+	 */
+	i = 0;
+	foreach(l, parse->rtable)
+	{
+		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+		PlanRowMark *newrc;
+
+		i++;
+		if (!bms_is_member(i, rels))
+			continue;
+
+		newrc = makeNode(PlanRowMark);
+
+		newrc->rti = newrc->prti = i;
+		/* real tables support REFERENCE, anything else needs COPY */
+		if (rte->rtekind == RTE_RELATION)
+			newrc->markType = ROW_MARK_REFERENCE;
+		else
+			newrc->markType = ROW_MARK_COPY;
+		newrc->noWait = false;			/* doesn't matter */
+		newrc->isParent = false;
+		/* attnos will be assigned in preprocess_targetlist */
+		newrc->ctidAttNo = InvalidAttrNumber;
+		newrc->toidAttNo = InvalidAttrNumber;
+		newrc->wholeAttNo = InvalidAttrNumber;
+
+		prowmarks = lappend(prowmarks, newrc);
+	}
+
+	root->rowMarks = prowmarks;
+}
+
 /*
  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
  *
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 4fec749eba7..454c3363918 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.153 2009/10/14 22:14:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.154 2009/10/26 02:26:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -166,7 +166,7 @@ static bool extract_query_dependencies_walker(Node *node,
  *	glob: global data for planner run
  *	plan: the topmost node of the plan
  *	rtable: the rangetable for the current subquery
- *	rowmarks: the RowMarkClause list for the current subquery
+ *	rowmarks: the PlanRowMark list for the current subquery
  *
  * The return value is normally the same Plan node passed in, but can be
  * different when the passed-in Plan is a SubqueryScan we decide isn't needed.
@@ -235,21 +235,22 @@ set_plan_references(PlannerGlobal *glob, Plan *plan,
 	}
 
 	/*
-	 * Adjust RT indexes of RowMarkClauses and add to final rowmarks list
+	 * Adjust RT indexes of PlanRowMarks and add to final rowmarks list
 	 */
 	foreach(lc, rowmarks)
 	{
-		RowMarkClause *rc = (RowMarkClause *) lfirst(lc);
-		RowMarkClause *newrc;
+		PlanRowMark *rc = (PlanRowMark *) lfirst(lc);
+		PlanRowMark *newrc;
 
-		/* flat copy to duplicate all the scalar fields */
-		newrc = (RowMarkClause *) palloc(sizeof(RowMarkClause));
-		memcpy(newrc, rc, sizeof(RowMarkClause));
+		Assert(IsA(rc, PlanRowMark));
+
+		/* flat copy is enough since all fields are scalars */
+		newrc = (PlanRowMark *) palloc(sizeof(PlanRowMark));
+		memcpy(newrc, rc, sizeof(PlanRowMark));
 
 		/* adjust indexes */
 		newrc->rti += rtoffset;
 		newrc->prti += rtoffset;
-		/* rowmarkId must NOT be adjusted */
 
 		glob->finalrowmarks = lappend(glob->finalrowmarks, newrc);
 	}
@@ -434,7 +435,7 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset)
 
 				foreach(l, splan->rowMarks)
 				{
-					RowMarkClause *rc = (RowMarkClause *) lfirst(l);
+					PlanRowMark *rc = (PlanRowMark *) lfirst(l);
 
 					rc->rti += rtoffset;
 					rc->prti += rtoffset;
@@ -502,6 +503,13 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset)
 				{
 					lfirst_int(l) += rtoffset;
 				}
+				foreach(l, splan->rowMarks)
+				{
+					PlanRowMark *rc = (PlanRowMark *) lfirst(l);
+
+					rc->rti += rtoffset;
+					rc->prti += rtoffset;
+				}
 				foreach(l, splan->plans)
 				{
 					lfirst(l) = set_plan_refs(glob,
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 7b0dd75e7fe..6c6122e1485 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.155 2009/10/12 18:10:48 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.156 2009/10/26 02:26:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -77,7 +77,8 @@ static Node *process_sublinks_mutator(Node *node,
 						 process_sublinks_context *context);
 static Bitmapset *finalize_plan(PlannerInfo *root,
 			  Plan *plan,
-			  Bitmapset *valid_params);
+			  Bitmapset *valid_params,
+			  Bitmapset *scan_params);
 static bool finalize_primnode(Node *node, finalize_primnode_context *context);
 
 
@@ -215,10 +216,14 @@ generate_new_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod)
 }
 
 /*
- * Assign a (nonnegative) PARAM_EXEC ID for a recursive query's worktable.
+ * Assign a (nonnegative) PARAM_EXEC ID for a special parameter (one that
+ * is not actually used to carry a value at runtime).  Such parameters are
+ * used for special runtime signaling purposes, such as connecting a
+ * recursive union node to its worktable scan node or forcing plan
+ * re-evaluation within the EvalPlanQual mechanism.
  */
 int
-SS_assign_worktable_param(PlannerInfo *root)
+SS_assign_special_param(PlannerInfo *root)
 {
 	Param	   *param;
 
@@ -335,7 +340,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType,
 
 	/* And convert to SubPlan or InitPlan format. */
 	result = build_subplan(root, plan,
-						   subroot->parse->rtable, subroot->parse->rowMarks,
+						   subroot->parse->rtable, subroot->rowMarks,
 						   subLinkType, testexpr, true, isTopQual);
 
 	/*
@@ -377,7 +382,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType,
 				/* OK, convert to SubPlan format. */
 				hashplan = (SubPlan *) build_subplan(root, plan,
 													 subroot->parse->rtable,
-													 subroot->parse->rowMarks,
+													 subroot->rowMarks,
 													 ANY_SUBLINK, newtestexpr,
 													 false, true);
 				/* Check we got what we expected */
@@ -949,7 +954,7 @@ SS_process_ctes(PlannerInfo *root)
 		root->glob->subrtables = lappend(root->glob->subrtables,
 										 subroot->parse->rtable);
 		root->glob->subrowmarks = lappend(root->glob->subrowmarks,
-										  subroot->parse->rowMarks);
+										  subroot->rowMarks);
 		splan->plan_id = list_length(root->glob->subplans);
 
 		root->init_plans = lappend(root->init_plans, splan);
@@ -1702,7 +1707,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
 }
 
 /*
- * SS_finalize_plan - do final sublink processing for a completed Plan.
+ * SS_finalize_plan - do final sublink and parameter processing for a
+ * completed Plan.
  *
  * This recursively computes the extParam and allParam sets for every Plan
  * node in the given plan tree.  It also optionally attaches any previously
@@ -1751,7 +1757,8 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans)
 	 * output parameters of any initPlans.	(We do not include output
 	 * parameters of regular subplans.	Those should only appear within the
 	 * testexpr of SubPlan nodes, and are taken care of locally within
-	 * finalize_primnode.)
+	 * finalize_primnode.  Likewise, special parameters that are generated
+	 * by nodes such as ModifyTable are handled within finalize_plan.)
 	 *
 	 * Note: this is a bit overly generous since some parameters of upper
 	 * query levels might belong to query subtrees that don't include this
@@ -1772,14 +1779,11 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans)
 
 		paramid++;
 	}
-	/* Also include the recursion working table, if any */
-	if (root->wt_param_id >= 0)
-		valid_params = bms_add_member(valid_params, root->wt_param_id);
 
 	/*
 	 * Now recurse through plan tree.
 	 */
-	(void) finalize_plan(root, plan, valid_params);
+	(void) finalize_plan(root, plan, valid_params, NULL);
 
 	bms_free(valid_params);
 
@@ -1819,19 +1823,28 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans)
 /*
  * Recursive processing of all nodes in the plan tree
  *
+ * valid_params is the set of param IDs considered valid to reference in
+ * this plan node or its children.
+ * scan_params is a set of param IDs to force scan plan nodes to reference.
+ * This is for EvalPlanQual support, and is always NULL at the top of the
+ * recursion.
+ *
  * The return value is the computed allParam set for the given Plan node.
  * This is just an internal notational convenience.
  */
 static Bitmapset *
-finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
+finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
+			  Bitmapset *scan_params)
 {
 	finalize_primnode_context context;
+	int			locally_added_param;
 
 	if (plan == NULL)
 		return NULL;
 
 	context.root = root;
 	context.paramids = NULL;	/* initialize set to empty */
+	locally_added_param = -1;	/* there isn't one */
 
 	/*
 	 * When we call finalize_primnode, context.paramids sets are automatically
@@ -1852,6 +1865,10 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 							  &context);
 			break;
 
+		case T_SeqScan:
+			context.paramids = bms_add_members(context.paramids, scan_params);
+			break;
+
 		case T_IndexScan:
 			finalize_primnode((Node *) ((IndexScan *) plan)->indexqual,
 							  &context);
@@ -1860,6 +1877,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 			 * we need not look at indexqualorig, since it will have the same
 			 * param references as indexqual.
 			 */
+			context.paramids = bms_add_members(context.paramids, scan_params);
 			break;
 
 		case T_BitmapIndexScan:
@@ -1875,11 +1893,13 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 		case T_BitmapHeapScan:
 			finalize_primnode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig,
 							  &context);
+			context.paramids = bms_add_members(context.paramids, scan_params);
 			break;
 
 		case T_TidScan:
 			finalize_primnode((Node *) ((TidScan *) plan)->tidquals,
 							  &context);
+			context.paramids = bms_add_members(context.paramids, scan_params);
 			break;
 
 		case T_SubqueryScan:
@@ -1893,16 +1913,20 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 			 */
 			context.paramids = bms_add_members(context.paramids,
 								 ((SubqueryScan *) plan)->subplan->extParam);
+			/* We need scan_params too, though */
+			context.paramids = bms_add_members(context.paramids, scan_params);
 			break;
 
 		case T_FunctionScan:
 			finalize_primnode(((FunctionScan *) plan)->funcexpr,
 							  &context);
+			context.paramids = bms_add_members(context.paramids, scan_params);
 			break;
 
 		case T_ValuesScan:
 			finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists,
 							  &context);
+			context.paramids = bms_add_members(context.paramids, scan_params);
 			break;
 
 		case T_CteScan:
@@ -1934,6 +1958,9 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 					bms_add_member(context.paramids,
 								   ((CteScan *) plan)->cteParam);
 #endif
+
+				context.paramids = bms_add_members(context.paramids,
+												   scan_params);
 			}
 			break;
 
@@ -1941,21 +1968,30 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 			context.paramids =
 				bms_add_member(context.paramids,
 							   ((WorkTableScan *) plan)->wtParam);
+			context.paramids = bms_add_members(context.paramids, scan_params);
 			break;
 
 		case T_ModifyTable:
 			{
+				ModifyTable *mtplan = (ModifyTable *) plan;
 				ListCell   *l;
 
-				finalize_primnode((Node *) ((ModifyTable *) plan)->returningLists,
+				/* Force descendant scan nodes to reference epqParam */
+				locally_added_param = mtplan->epqParam;
+				valid_params = bms_add_member(bms_copy(valid_params),
+											  locally_added_param);
+				scan_params = bms_add_member(bms_copy(scan_params),
+											 locally_added_param);
+				finalize_primnode((Node *) mtplan->returningLists,
 								  &context);
-				foreach(l, ((ModifyTable *) plan)->plans)
+				foreach(l, mtplan->plans)
 				{
 					context.paramids =
 						bms_add_members(context.paramids,
 										finalize_plan(root,
 													  (Plan *) lfirst(l),
-													  valid_params));
+													  valid_params,
+													  scan_params));
 				}
 			}
 			break;
@@ -1970,7 +2006,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 						bms_add_members(context.paramids,
 										finalize_plan(root,
 													  (Plan *) lfirst(l),
-													  valid_params));
+													  valid_params,
+													  scan_params));
 				}
 			}
 			break;
@@ -1985,7 +2022,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 						bms_add_members(context.paramids,
 										finalize_plan(root,
 													  (Plan *) lfirst(l),
-													  valid_params));
+													  valid_params,
+													  scan_params));
 				}
 			}
 			break;
@@ -2000,7 +2038,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 						bms_add_members(context.paramids,
 										finalize_plan(root,
 													  (Plan *) lfirst(l),
-													  valid_params));
+													  valid_params,
+													  scan_params));
 				}
 			}
 			break;
@@ -2032,16 +2071,30 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 			break;
 
 		case T_RecursiveUnion:
+			/* child nodes are allowed to reference wtParam */
+			locally_added_param = ((RecursiveUnion *) plan)->wtParam;
+			valid_params = bms_add_member(bms_copy(valid_params),
+										  locally_added_param);
+			/* wtParam does *not* get added to scan_params */
+			break;
+
+		case T_LockRows:
+			/* Force descendant scan nodes to reference epqParam */
+			locally_added_param = ((LockRows *) plan)->epqParam;
+			valid_params = bms_add_member(bms_copy(valid_params),
+										  locally_added_param);
+			scan_params = bms_add_member(bms_copy(scan_params),
+										 locally_added_param);
+			break;
+
 		case T_Hash:
 		case T_Agg:
 		case T_WindowAgg:
-		case T_SeqScan:
 		case T_Material:
 		case T_Sort:
 		case T_Unique:
 		case T_SetOp:
 		case T_Group:
-		case T_LockRows:
 			break;
 
 		default:
@@ -2053,20 +2106,25 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 	context.paramids = bms_add_members(context.paramids,
 									   finalize_plan(root,
 													 plan->lefttree,
-													 valid_params));
+													 valid_params,
+													 scan_params));
 
 	context.paramids = bms_add_members(context.paramids,
 									   finalize_plan(root,
 													 plan->righttree,
-													 valid_params));
+													 valid_params,
+													 scan_params));
 
 	/*
-	 * RecursiveUnion *generates* its worktable param, so don't bubble that up
+	 * Any locally generated parameter doesn't count towards its generating
+	 * plan node's external dependencies.  (Note: if we changed valid_params
+	 * and/or scan_params, we leak those bitmapsets; not worth the notational
+	 * trouble to clean them up.)
 	 */
-	if (IsA(plan, RecursiveUnion))
+	if (locally_added_param >= 0)
 	{
 		context.paramids = bms_del_member(context.paramids,
-										  ((RecursiveUnion *) plan)->wtParam);
+										  locally_added_param);
 	}
 
 	/* Now we have all the paramids */
@@ -2199,7 +2257,7 @@ SS_make_initplan_from_plan(PlannerInfo *root, Plan *plan,
 	root->glob->subrtables = lappend(root->glob->subrtables,
 									 root->parse->rtable);
 	root->glob->subrowmarks = lappend(root->glob->subrowmarks,
-									  root->parse->rowMarks);
+									  root->rowMarks);
 
 	/*
 	 * Create a SubPlan node and add it to the outer list of InitPlans. Note
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index d7676efbcb6..f48bd31151c 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -16,7 +16,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.67 2009/09/02 17:52:24 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.68 2009/10/26 02:26:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -628,6 +628,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	subroot->cte_plan_ids = NIL;
 	subroot->eq_classes = NIL;
 	subroot->append_rel_list = NIL;
+	subroot->rowMarks = NIL;
 	subroot->hasRecursion = false;
 	subroot->wt_param_id = -1;
 	subroot->non_recursive_plan = NULL;
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index 0e6010aefa9..d17671462cf 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -9,14 +9,15 @@
  * relation in the correct order.  For both UPDATE and DELETE queries,
  * we need a junk targetlist entry holding the CTID attribute --- the
  * executor relies on this to find the tuple to be replaced/deleted.
- * We may also need junk tlist entries for Vars used in the RETURNING list.
+ * We may also need junk tlist entries for Vars used in the RETURNING list
+ * and row ID information needed for EvalPlanQual checking.
  *
  *
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/preptlist.c,v 1.97 2009/10/12 18:10:48 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/preptlist.c,v 1.98 2009/10/26 02:26:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,7 +32,6 @@
 #include "optimizer/subselect.h"
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
-#include "parser/analyze.h"
 #include "parser/parsetree.h"
 #include "parser/parse_coerce.h"
 #include "utils/rel.h"
@@ -54,6 +54,7 @@ preprocess_targetlist(PlannerInfo *root, List *tlist)
 	int			result_relation = parse->resultRelation;
 	List	   *range_table = parse->rtable;
 	CmdType		command_type = parse->commandType;
+	ListCell   *lc;
 
 	/*
 	 * Sanity check: if there is a result relation, it'd better be a real
@@ -108,51 +109,47 @@ preprocess_targetlist(PlannerInfo *root, List *tlist)
 	}
 
 	/*
-	 * Add TID targets for rels selected FOR UPDATE/SHARE.	The executor uses
-	 * the TID to know which rows to lock, much as for UPDATE or DELETE.
+	 * Add necessary junk columns for rowmarked rels.  These values are
+	 * needed for locking of rels selected FOR UPDATE/SHARE, and to do
+	 * EvalPlanQual rechecking.  While we are at it, store these junk attnos
+	 * in the PlanRowMark list so that we don't have to redetermine them
+	 * at runtime.
 	 */
-	if (parse->rowMarks)
+	foreach(lc, root->rowMarks)
 	{
-		ListCell   *l;
-
-		/*
-		 * We've got trouble if the FOR UPDATE/SHARE appears inside grouping,
-		 * since grouping renders a reference to individual tuple CTIDs
-		 * invalid.  This is also checked at parse time, but that's
-		 * insufficient because of rule substitution, query pullup, etc.
-		 */
-		CheckSelectLocking(parse);
+		PlanRowMark *rc = (PlanRowMark *) lfirst(lc);
+		Var		   *var;
+		char		resname[32];
+		TargetEntry *tle;
 
-		foreach(l, parse->rowMarks)
+		/* child rels should just use the same junk attrs as their parents */
+		if (rc->rti != rc->prti)
 		{
-			RowMarkClause *rc = (RowMarkClause *) lfirst(l);
-			Var		   *var;
-			char		resname[32];
-			TargetEntry *tle;
-
-			/* ignore child rels */
-			if (rc->rti != rc->prti)
-				continue;
-
-			/* we should have an ID for the RowMarkClause */
-			Assert(rc->rowmarkId != 0);
+			PlanRowMark *prc = get_plan_rowmark(root->rowMarks, rc->prti);
+
+			/* parent should have appeared earlier in list */
+			if (prc == NULL || prc->toidAttNo == InvalidAttrNumber)
+				elog(ERROR, "parent PlanRowMark not processed yet");
+			rc->ctidAttNo = prc->ctidAttNo;
+			rc->toidAttNo = prc->toidAttNo;
+			continue;
+		}
 
-			/* always need the ctid */
+		if (rc->markType != ROW_MARK_COPY)
+		{
+			/* It's a regular table, so fetch its TID */
 			var = makeVar(rc->rti,
 						  SelfItemPointerAttributeNumber,
 						  TIDOID,
 						  -1,
 						  0);
-
-			snprintf(resname, sizeof(resname),
-					 "ctid%u", rc->rowmarkId);
-
+			snprintf(resname, sizeof(resname), "ctid%u", rc->rti);
 			tle = makeTargetEntry((Expr *) var,
 								  list_length(tlist) + 1,
 								  pstrdup(resname),
 								  true);
-
 			tlist = lappend(tlist, tle);
+			rc->ctidAttNo = tle->resno;
 
 			/* if parent of inheritance tree, need the tableoid too */
 			if (rc->isParent)
@@ -162,18 +159,31 @@ preprocess_targetlist(PlannerInfo *root, List *tlist)
 							  OIDOID,
 							  -1,
 							  0);
-
-				snprintf(resname, sizeof(resname),
-						 "tableoid%u", rc->rowmarkId);
-
+				snprintf(resname, sizeof(resname), "tableoid%u", rc->rti);
 				tle = makeTargetEntry((Expr *) var,
 									  list_length(tlist) + 1,
 									  pstrdup(resname),
 									  true);
-
 				tlist = lappend(tlist, tle);
+				rc->toidAttNo = tle->resno;
 			}
 		}
+		else
+		{
+			/* Not a table, so we need the whole row as a junk var */
+			var = makeVar(rc->rti,
+						  InvalidAttrNumber,
+						  RECORDOID,
+						  -1,
+						  0);
+			snprintf(resname, sizeof(resname), "wholerow%u", rc->rti);
+			tle = makeTargetEntry((Expr *) var,
+								  list_length(tlist) + 1,
+								  pstrdup(resname),
+								  true);
+			tlist = lappend(tlist, tle);
+			rc->wholeAttNo = tle->resno;
+		}
 	}
 
 	/*
@@ -394,3 +404,24 @@ expand_targetlist(List *tlist, int command_type,
 
 	return new_tlist;
 }
+
+
+/*
+ * Locate PlanRowMark for given RT index, or return NULL if none
+ *
+ * This probably ought to be elsewhere, but there's no very good place
+ */
+PlanRowMark *
+get_plan_rowmark(List *rowmarks, Index rtindex)
+{
+	ListCell   *l;
+
+	foreach(l, rowmarks)
+	{
+		PlanRowMark *rc = (PlanRowMark *) lfirst(l);
+
+		if (rc->rti == rtindex)
+			return rc;
+	}
+	return NULL;
+}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index e4fe0db5479..93a3e25b178 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.177 2009/10/23 05:24:52 petere Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.178 2009/10/26 02:26:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -248,7 +248,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 							  rtr->rtindex,
 							  subplan,
 							  subroot->parse->rtable,
-							  subroot->parse->rowMarks);
+							  subroot->rowMarks);
 
 		/*
 		 * We don't bother to determine the subquery's output ordering since
@@ -1133,7 +1133,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
 {
 	Query	   *parse = root->parse;
 	Oid			parentOID;
-	RowMarkClause *oldrc;
+	PlanRowMark *oldrc;
 	Relation	oldrelation;
 	LOCKMODE	lockmode;
 	List	   *inhOIDs;
@@ -1171,10 +1171,10 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
 	 * the lock, leading to possible deadlocks.  (This code should match the
 	 * parser and rewriter.)
 	 */
-	oldrc = get_rowmark(parse, rti);
+	oldrc = get_plan_rowmark(root->rowMarks, rti);
 	if (rti == parse->resultRelation)
 		lockmode = RowExclusiveLock;
-	else if (oldrc)
+	else if (oldrc && RowMarkRequiresRowShareLock(oldrc->markType))
 		lockmode = RowShareLock;
 	else
 		lockmode = AccessShareLock;
@@ -1196,7 +1196,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
 
 	/*
 	 * If parent relation is selected FOR UPDATE/SHARE, we need to mark its
-	 * RowMarkClause as isParent = true, and generate a new RowMarkClause for
+	 * PlanRowMark as isParent = true, and generate a new PlanRowMark for
 	 * each child.
 	 */
 	if (oldrc)
@@ -1275,21 +1275,23 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
 		}
 
 		/*
-		 * Build a RowMarkClause if parent is marked FOR UPDATE/SHARE.
+		 * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
 		 */
 		if (oldrc)
 		{
-			RowMarkClause *newrc = makeNode(RowMarkClause);
+			PlanRowMark *newrc = makeNode(PlanRowMark);
 
 			newrc->rti = childRTindex;
 			newrc->prti = rti;
-			/* children use the same rowmarkId as their parent */
-			newrc->rowmarkId = oldrc->rowmarkId;
-			newrc->forUpdate = oldrc->forUpdate;
+			newrc->markType = oldrc->markType;
 			newrc->noWait = oldrc->noWait;
 			newrc->isParent = false;
+			/* junk attrs for children are not identified yet */
+			newrc->ctidAttNo = InvalidAttrNumber;
+			newrc->toidAttNo = InvalidAttrNumber;
+			newrc->wholeAttNo = InvalidAttrNumber;
 
-			parse->rowMarks = lappend(parse->rowMarks, newrc);
+			root->rowMarks = lappend(root->rowMarks, newrc);
 		}
 
 		/* Close child relations, but keep locks */
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 6aad2c79a23..5fb1b31688d 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -17,7 +17,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- *	$PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.392 2009/10/12 18:10:48 tgl Exp $
+ *	$PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.393 2009/10/26 02:26:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2007,7 +2007,11 @@ transformExplainStmt(ParseState *pstate, ExplainStmt *stmt)
 }
 
 
-/* exported so planner can check again after rewriting, query pullup, etc */
+/*
+ * Check for features that are not supported together with FOR UPDATE/SHARE.
+ *
+ * exported so planner can check again after rewriting, query pullup, etc
+ */
 void
 CheckSelectLocking(Query *qry)
 {
@@ -2035,6 +2039,10 @@ CheckSelectLocking(Query *qry)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("SELECT FOR UPDATE/SHARE is not allowed with window functions")));
+	if (expression_returns_set((Node *) qry->targetList))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("SELECT FOR UPDATE/SHARE is not allowed with set-returning functions in the target list")));
 }
 
 /*
@@ -2229,7 +2237,7 @@ applyLockingClause(Query *qry, Index rtindex, bool forUpdate, bool noWait)
 	RowMarkClause *rc;
 
 	/* Check for pre-existing entry for same rtindex */
-	if ((rc = get_rowmark(qry, rtindex)) != NULL)
+	if ((rc = get_parse_rowmark(qry, rtindex)) != NULL)
 	{
 		/*
 		 * If the same RTE is specified both FOR UPDATE and FOR SHARE, treat
@@ -2250,11 +2258,8 @@ applyLockingClause(Query *qry, Index rtindex, bool forUpdate, bool noWait)
 	/* Make a new RowMarkClause */
 	rc = makeNode(RowMarkClause);
 	rc->rti = rtindex;
-	rc->prti = rtindex;
-	rc->rowmarkId = 0;			/* not used until plan time */
 	rc->forUpdate = forUpdate;
 	rc->noWait = noWait;
-	rc->isParent = false;
 	qry->rowMarks = lappend(qry->rowMarks, rc);
 }
 
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 4a0a50504bb..1a5f77d272d 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_relation.c,v 1.144 2009/10/21 20:22:38 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_relation.c,v 1.145 2009/10/26 02:26:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2254,7 +2254,7 @@ get_tle_by_resno(List *tlist, AttrNumber resno)
  * Returns NULL if relation is not selected FOR UPDATE/SHARE
  */
 RowMarkClause *
-get_rowmark(Query *qry, Index rtindex)
+get_parse_rowmark(Query *qry, Index rtindex)
 {
 	ListCell   *l;
 
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 8a178c9d3c6..a4d9ae55604 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteHandler.c,v 1.187 2009/09/02 17:52:24 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteHandler.c,v 1.188 2009/10/26 02:26:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -129,7 +129,7 @@ AcquireRewriteLocks(Query *parsetree)
 				 */
 				if (rt_index == parsetree->resultRelation)
 					lockmode = RowExclusiveLock;
-				else if (get_rowmark(parsetree, rt_index))
+				else if (get_parse_rowmark(parsetree, rt_index) != NULL)
 					lockmode = RowShareLock;
 				else
 					lockmode = AccessShareLock;
@@ -1191,7 +1191,7 @@ ApplyRetrieveRule(Query *parsetree,
 	/*
 	 * FOR UPDATE/SHARE of view?
 	 */
-	if ((rc = get_rowmark(parsetree, rt_index)) != NULL)
+	if ((rc = get_parse_rowmark(parsetree, rt_index)) != NULL)
 	{
 		/*
 		 * Remove the view from the list of rels that will actually be marked
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index d211a88ba45..b450fec5027 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.123 2009/09/02 17:52:24 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.124 2009/10/26 02:26:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -428,7 +428,6 @@ OffsetVarNodes(Node *node, int offset, int sublevels_up)
 				RowMarkClause *rc = (RowMarkClause *) lfirst(l);
 
 				rc->rti += offset;
-				rc->prti += offset;
 			}
 		}
 		query_tree_walker(qry, OffsetVarNodes_walker,
@@ -598,8 +597,6 @@ ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up)
 
 				if (rc->rti == rt_index)
 					rc->rti = new_index;
-				if (rc->prti == rt_index)
-					rc->prti = new_index;
 			}
 		}
 		query_tree_walker(qry, ChangeVarNodes_walker,
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 4d7b3c2e9de..025a03f19a9 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.315 2009/10/05 19:24:41 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.316 2009/10/26 02:26:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2012,7 +2012,8 @@ CreateCommandTag(Node *parsetree)
 							tag = "SELECT INTO";
 						else if (stmt->rowMarks != NIL)
 						{
-							if (((RowMarkClause *) linitial(stmt->rowMarks))->forUpdate)
+							/* not 100% but probably close enough */
+							if (((PlanRowMark *) linitial(stmt->rowMarks))->markType == ROW_MARK_EXCLUSIVE)
 								tag = "SELECT FOR UPDATE";
 							else
 								tag = "SELECT FOR SHARE";
@@ -2061,6 +2062,7 @@ CreateCommandTag(Node *parsetree)
 							tag = "SELECT INTO";
 						else if (stmt->rowMarks != NIL)
 						{
+							/* not 100% but probably close enough */
 							if (((RowMarkClause *) linitial(stmt->rowMarks))->forUpdate)
 								tag = "SELECT FOR UPDATE";
 							else
diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c
index 6cd138b288c..eb196ebb56a 100644
--- a/src/backend/utils/cache/plancache.c
+++ b/src/backend/utils/cache/plancache.c
@@ -35,7 +35,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/plancache.c,v 1.29 2009/10/10 01:43:50 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/plancache.c,v 1.30 2009/10/26 02:26:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -48,6 +48,8 @@
 #include "executor/spi.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/planmain.h"
+#include "optimizer/prep.h"
+#include "parser/parsetree.h"
 #include "storage/lmgr.h"
 #include "tcop/pquery.h"
 #include "tcop/tcopprot.h"
@@ -67,7 +69,6 @@ static void AcquireExecutorLocks(List *stmt_list, bool acquire);
 static void AcquirePlannerLocks(List *stmt_list, bool acquire);
 static void ScanQueryForLocks(Query *parsetree, bool acquire);
 static bool ScanQueryWalker(Node *node, bool *acquire);
-static bool rowmark_member(List *rowMarks, int rt_index);
 static bool plan_list_is_transient(List *stmt_list);
 static void PlanCacheRelCallback(Datum arg, Oid relid);
 static void PlanCacheFuncCallback(Datum arg, int cacheid, ItemPointer tuplePtr);
@@ -658,6 +659,7 @@ AcquireExecutorLocks(List *stmt_list, bool acquire)
 		{
 			RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2);
 			LOCKMODE	lockmode;
+			PlanRowMark *rc;
 
 			rt_index++;
 
@@ -672,7 +674,8 @@ AcquireExecutorLocks(List *stmt_list, bool acquire)
 			 */
 			if (list_member_int(plannedstmt->resultRelations, rt_index))
 				lockmode = RowExclusiveLock;
-			else if (rowmark_member(plannedstmt->rowMarks, rt_index))
+			else if ((rc = get_plan_rowmark(plannedstmt->rowMarks, rt_index)) != NULL &&
+					 RowMarkRequiresRowShareLock(rc->markType))
 				lockmode = RowShareLock;
 			else
 				lockmode = AccessShareLock;
@@ -732,7 +735,7 @@ ScanQueryForLocks(Query *parsetree, bool acquire)
 				/* Acquire or release the appropriate type of lock */
 				if (rt_index == parsetree->resultRelation)
 					lockmode = RowExclusiveLock;
-				else if (rowmark_member(parsetree->rowMarks, rt_index))
+				else if (get_parse_rowmark(parsetree, rt_index) != NULL)
 					lockmode = RowShareLock;
 				else
 					lockmode = AccessShareLock;
@@ -798,24 +801,6 @@ ScanQueryWalker(Node *node, bool *acquire)
 								  (void *) acquire);
 }
 
-/*
- * rowmark_member: check whether an RT index appears in a RowMarkClause list.
- */
-static bool
-rowmark_member(List *rowMarks, int rt_index)
-{
-	ListCell   *l;
-
-	foreach(l, rowMarks)
-	{
-		RowMarkClause *rc = (RowMarkClause *) lfirst(l);
-
-		if (rc->rti == rt_index)
-			return true;
-	}
-	return false;
-}
-
 /*
  * plan_list_is_transient: check if any of the plans in the list are transient.
  */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index ea053041bf0..aa9348a336f 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.546 2009/10/14 22:14:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.547 2009/10/26 02:26:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200910141
+#define CATALOG_VERSION_NO	200910251
 
 #endif
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h
index 94cb061959f..2bf40b76af4 100644
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/commands/trigger.h,v 1.76 2009/10/10 01:43:50 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/commands/trigger.h,v 1.77 2009/10/26 02:26:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -139,7 +139,7 @@ extern void ExecBSDeleteTriggers(EState *estate,
 extern void ExecASDeleteTriggers(EState *estate,
 					 ResultRelInfo *relinfo);
 extern bool ExecBRDeleteTriggers(EState *estate,
-					 PlanState *subplanstate,
+					 EPQState *epqstate,
 					 ResultRelInfo *relinfo,
 					 ItemPointer tupleid);
 extern void ExecARDeleteTriggers(EState *estate,
@@ -150,7 +150,7 @@ extern void ExecBSUpdateTriggers(EState *estate,
 extern void ExecASUpdateTriggers(EState *estate,
 					 ResultRelInfo *relinfo);
 extern HeapTuple ExecBRUpdateTriggers(EState *estate,
-					 PlanState *subplanstate,
+					 EPQState *epqstate,
 					 ResultRelInfo *relinfo,
 					 ItemPointer tupleid,
 					 HeapTuple newtuple);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 36e7d35467f..ba2f42d6862 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.162 2009/10/12 18:10:51 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.163 2009/10/26 02:26:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -166,16 +166,23 @@ extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid);
 extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids);
 extern void ExecConstraints(ResultRelInfo *resultRelInfo,
 				TupleTableSlot *slot, EState *estate);
-extern TupleTableSlot *EvalPlanQual(EState *estate, Index rti,
-			 PlanState *subplanstate,
+extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
+			 Relation relation, Index rti,
 			 ItemPointer tid, TransactionId priorXmax);
-extern HeapTuple EvalPlanQualFetch(EState *estate, Index rti,
-				  ItemPointer tid, TransactionId priorXmax);
-extern void EvalPlanQualPush(EState *estate, Index rti,
-							 PlanState *subplanstate);
-extern void EvalPlanQualSetTuple(EState *estate, Index rti, HeapTuple tuple);
-extern TupleTableSlot *EvalPlanQualNext(EState *estate);
-extern void EvalPlanQualPop(EState *estate, PlanState *subplanstate);
+extern HeapTuple EvalPlanQualFetch(EState *estate, Relation relation,
+				  int lockmode, ItemPointer tid, TransactionId priorXmax);
+extern void EvalPlanQualInit(EPQState *epqstate, EState *estate,
+							 Plan *subplan, int epqParam);
+extern void EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan);
+extern void EvalPlanQualAddRowMark(EPQState *epqstate, ExecRowMark *erm);
+extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti,
+								 HeapTuple tuple);
+extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti);
+#define EvalPlanQualSetSlot(epqstate, slot)  ((epqstate)->origslot = (slot))
+extern void EvalPlanQualFetchRowMarks(EPQState *epqstate);
+extern TupleTableSlot *EvalPlanQualNext(EPQState *epqstate);
+extern void EvalPlanQualBegin(EPQState *epqstate, EState *parentestate);
+extern void EvalPlanQualEnd(EPQState *epqstate);
 extern DestReceiver *CreateIntoRelDestReceiver(void);
 
 /*
@@ -211,9 +218,12 @@ extern TupleTableSlot *ExecProject(ProjectionInfo *projInfo,
  * prototypes from functions in execScan.c
  */
 typedef TupleTableSlot *(*ExecScanAccessMtd) (ScanState *node);
+typedef bool (*ExecScanRecheckMtd) (ScanState *node, TupleTableSlot *slot);
 
-extern TupleTableSlot *ExecScan(ScanState *node, ExecScanAccessMtd accessMtd);
+extern TupleTableSlot *ExecScan(ScanState *node, ExecScanAccessMtd accessMtd,
+								ExecScanRecheckMtd recheckMtd);
 extern void ExecAssignScanProjectionInfo(ScanState *node);
+extern void ExecScanReScan(ScanState *node);
 
 /*
  * prototypes from functions in execTuples.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 93d4aa00db3..bba9c0370bd 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.210 2009/10/12 18:10:51 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.211 2009/10/26 02:26:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -331,6 +331,7 @@ typedef struct EState
 	Snapshot	es_snapshot;	/* time qual to use */
 	Snapshot	es_crosscheck_snapshot; /* crosscheck time qual for RI */
 	List	   *es_range_table; /* List of RangeTblEntry */
+	PlannedStmt *es_plannedstmt;	/* link to top of plan tree */
 
 	JunkFilter *es_junkFilter;	/* top-level junk filter, if any */
 
@@ -375,31 +376,45 @@ typedef struct EState
 	 */
 	ExprContext *es_per_tuple_exprcontext;
 
-	/* Below is to re-evaluate plan qual in READ COMMITTED mode */
-	PlannedStmt *es_plannedstmt;	/* link to top of plan tree */
-	struct evalPlanQual *es_evalPlanQual;		/* chain of PlanQual states */
-	bool	   *es_evTupleNull; /* local array of EPQ status */
-	HeapTuple  *es_evTuple;		/* shared array of EPQ substitute tuples */
+	/*
+	 * These fields are for re-evaluating plan quals when an updated tuple is
+	 * substituted in READ COMMITTED mode.  es_epqTuple[] contains tuples
+	 * that scan plan nodes should return instead of whatever they'd normally
+	 * return, or NULL if nothing to return; es_epqTupleSet[] is true if a
+	 * particular array entry is valid; and es_epqScanDone[] is state to
+	 * remember if the tuple has been returned already.  Arrays are of size
+	 * list_length(es_range_table) and are indexed by scan node scanrelid - 1.
+	 */
+	HeapTuple  *es_epqTuple;		/* array of EPQ substitute tuples */
+	bool	   *es_epqTupleSet;		/* true if EPQ tuple is provided */
+	bool	   *es_epqScanDone;		/* true if EPQ tuple has been fetched */
 } EState;
 
 
 /*
+ * ExecRowMark -
+ *	   runtime representation of FOR UPDATE/SHARE clauses
+ *
+ * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we should have an
+ * ExecRowMark for each non-target relation in the query (except inheritance
+ * parent RTEs, which can be ignored at runtime).  See PlanRowMark for details
+ * about most of the fields.
+ *
  * es_rowMarks is a list of these structs.  Each LockRows node has its own
  * list, which is the subset of locks that it is supposed to enforce; note
  * that the per-node lists point to the same structs that are in the global
- * list.  See RowMarkClause for details about rti, prti, and rowmarkId.
- * toidAttno is not used in a "plain" (non-inherited) rowmark.
+ * list.
  */
 typedef struct ExecRowMark
 {
-	Relation	relation;		/* opened and RowShareLock'd relation */
+	Relation	relation;		/* opened and suitably locked relation */
 	Index		rti;			/* its range table index */
 	Index		prti;			/* parent range table index, if child */
-	Index		rowmarkId;		/* unique identifier assigned by planner */
-	bool		forUpdate;		/* true = FOR UPDATE, false = FOR SHARE */
+	RowMarkType	markType;		/* see enum in nodes/plannodes.h */
 	bool		noWait;			/* NOWAIT option */
-	AttrNumber	ctidAttNo;		/* resno of its ctid junk attribute */
+	AttrNumber	ctidAttNo;		/* resno of ctid junk attribute, if any */
 	AttrNumber	toidAttNo;		/* resno of tableoid junk attribute, if any */
+	AttrNumber	wholeAttNo;		/* resno of whole-row junk attribute, if any */
 	ItemPointerData curCtid;	/* ctid of currently locked tuple, if any */
 } ExecRowMark;
 
@@ -967,6 +982,21 @@ typedef struct PlanState
 #define innerPlanState(node)		(((PlanState *)(node))->righttree)
 #define outerPlanState(node)		(((PlanState *)(node))->lefttree)
 
+/*
+ * EPQState is state for executing an EvalPlanQual recheck on a candidate
+ * tuple in ModifyTable or LockRows.  The estate and planstate fields are
+ * NULL if inactive.
+ */
+typedef struct EPQState
+{
+	EState	   *estate;			/* subsidiary EState */
+	PlanState  *planstate;		/* plan state tree ready to be executed */
+	TupleTableSlot *origslot;	/* original output tuple to be rechecked */
+	Plan	   *plan;			/* plan tree to be executed */
+	List	   *rowMarks;		/* ExecRowMarks (non-locking only) */
+	int			epqParam;		/* ID of Param to force scan node re-eval */
+} EPQState;
+
 
 /* ----------------
  *	 ResultState information
@@ -991,6 +1021,7 @@ typedef struct ModifyTableState
 	PlanState	  **mt_plans;		/* subplans (one per target rel) */
 	int				mt_nplans;		/* number of plans in the array */
 	int				mt_whichplan;	/* which one is being executed (0..n-1) */
+	EPQState		mt_epqstate;	/* for evaluating EvalPlanQual rechecks */
 	bool			fireBSTriggers;	/* do we need to fire stmt triggers? */
 } ModifyTableState;
 
@@ -1651,8 +1682,7 @@ typedef struct LockRowsState
 {
 	PlanState	ps;				/* its first field is NodeTag */
 	List	   *lr_rowMarks;	/* List of ExecRowMarks */
-	JunkFilter *lr_junkFilter;	/* needed for getting ctid columns */
-	bool		lr_useEvalPlan;	/* evaluating EPQ tuples? */
+	EPQState	lr_epqstate;	/* for evaluating EvalPlanQual rechecks */
 } LockRowsState;
 
 /* ----------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index c452d53505a..f26f5d8d112 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.230 2009/10/12 18:10:51 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.231 2009/10/26 02:26:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -73,7 +73,8 @@ typedef enum NodeTag
 	T_SetOp,
 	T_LockRows,
 	T_Limit,
-	/* this one isn't a subclass of Plan: */
+	/* these aren't subclasses of Plan: */
+	T_PlanRowMark,
 	T_PlanInvalItem,
 
 	/*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index adc870543b6..450a89fe85b 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.410 2009/10/14 22:14:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.411 2009/10/26 02:26:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -800,28 +800,17 @@ typedef struct WindowClause
 
 /*
  * RowMarkClause -
- *	   representation of FOR UPDATE/SHARE clauses
- *
- * We create a separate RowMarkClause node for each target relation.  In the
- * output of the parser and rewriter, all RowMarkClauses have rti == prti and
- * isParent == false.  When the planner discovers that a target relation
- * is the root of an inheritance tree, it sets isParent true, and adds an
- * additional RowMarkClause to the list for each child relation (including
- * the target rel itself in its role as a child).  The child entries have
- * rti == child rel's RT index, prti == parent's RT index, and can therefore
- * be recognized as children by the fact that prti != rti.
- * rowmarkId is a unique ID for the RowMarkClause across an entire query,
- * and is assigned during planning; it's always zero upstream of the planner.
+ *	   parser output representation of FOR UPDATE/SHARE clauses
+ *
+ * Query.rowMarks contains a separate RowMarkClause node for each relation
+ * identified as a FOR UPDATE/SHARE target.
  */
 typedef struct RowMarkClause
 {
 	NodeTag		type;
 	Index		rti;			/* range table index of target relation */
-	Index		prti;			/* range table index of parent relation */
-	Index		rowmarkId;		/* unique identifier assigned by planner */
 	bool		forUpdate;		/* true = FOR UPDATE, false = FOR SHARE */
 	bool		noWait;			/* NOWAIT option */
-	bool		isParent;		/* set by planner when expanding inheritance */
 } RowMarkClause;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 10177f3d7fc..e9c994e4f27 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.112 2009/10/12 18:10:51 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.113 2009/10/26 02:26:42 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,7 +59,7 @@ typedef struct PlannedStmt
 
 	Bitmapset  *rewindPlanIDs;	/* indices of subplans that require REWIND */
 
-	List	   *rowMarks;		/* a list of RowMarkClause's */
+	List	   *rowMarks;		/* a list of PlanRowMark's */
 
 	List	   *relationOids;	/* OIDs of relations the plan depends on */
 
@@ -167,6 +167,8 @@ typedef struct ModifyTable
 	List	   *resultRelations;	/* integer list of RT indexes */
 	List	   *plans;				/* plan(s) producing source data */
 	List	   *returningLists;		/* per-target-table RETURNING tlists */
+	List	   *rowMarks;			/* PlanRowMarks (non-locking only) */
+	int			epqParam;			/* ID of Param for EvalPlanQual re-eval */
 } ModifyTable;
 
 /* ----------------
@@ -620,12 +622,15 @@ typedef struct SetOp
  *
  * rowMarks identifies the rels to be locked by this node; it should be
  * a subset of the rowMarks listed in the top-level PlannedStmt.
+ * epqParam is a Param that all scan nodes below this one must depend on.
+ * It is used to force re-evaluation of the plan during EvalPlanQual.
  * ----------------
  */
 typedef struct LockRows
 {
 	Plan		plan;
-	List	   *rowMarks;		/* a list of RowMarkClause's */
+	List	   *rowMarks;		/* a list of PlanRowMark's */
+	int			epqParam;		/* ID of Param for EvalPlanQual re-eval */
 } LockRows;
 
 /* ----------------
@@ -643,6 +648,63 @@ typedef struct Limit
 } Limit;
 
 
+/*
+ * RowMarkType -
+ *	  enums for types of row-marking operations
+ *
+ * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we have to uniquely
+ * identify all the source rows, not only those from the target relations, so
+ * that we can perform EvalPlanQual rechecking at need.  For plain tables we
+ * can just fetch the TID, the same as for a target relation.  Otherwise (for
+ * example for VALUES or FUNCTION scans) we have to copy the whole row value.
+ * The latter is pretty inefficient but fortunately the case is not
+ * performance-critical in practice.
+ */
+typedef enum RowMarkType
+{
+	ROW_MARK_EXCLUSIVE,			/* obtain exclusive tuple lock */
+	ROW_MARK_SHARE,				/* obtain shared tuple lock */
+	ROW_MARK_REFERENCE,			/* just fetch the TID */
+	ROW_MARK_COPY				/* physically copy the row value */
+} RowMarkType;
+
+#define RowMarkRequiresRowShareLock(marktype)  ((marktype) <= ROW_MARK_SHARE)
+
+/*
+ * PlanRowMark -
+ *	   plan-time representation of FOR UPDATE/SHARE clauses
+ *
+ * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we create a separate
+ * PlanRowMark node for each non-target relation in the query.  Relations that
+ * are not specified as FOR UPDATE/SHARE are marked ROW_MARK_REFERENCE (if
+ * real tables) or ROW_MARK_COPY (if not).
+ *
+ * Initially all PlanRowMarks have rti == prti and isParent == false.
+ * When the planner discovers that a relation is the root of an inheritance
+ * tree, it sets isParent true, and adds an additional PlanRowMark to the
+ * list for each child relation (including the target rel itself in its role
+ * as a child).  The child entries have rti == child rel's RT index and
+ * prti == parent's RT index, and can therefore be recognized as children by
+ * the fact that prti != rti.
+ *
+ * The AttrNumbers are filled in during preprocess_targetlist.  We use
+ * different subsets of them for plain relations, inheritance children,
+ * and non-table relations.
+ */
+typedef struct PlanRowMark
+{
+	NodeTag		type;
+	Index		rti;			/* range table index of markable relation */
+	Index		prti;			/* range table index of parent relation */
+	RowMarkType	markType;		/* see enum above */
+	bool		noWait;			/* NOWAIT option */
+	bool		isParent;		/* true if this is a "dummy" parent entry */
+	AttrNumber	ctidAttNo;		/* resno of ctid junk attribute, if any */
+	AttrNumber	toidAttNo;		/* resno of tableoid junk attribute, if any */
+	AttrNumber	wholeAttNo;		/* resno of whole-row junk attribute, if any */
+} PlanRowMark;
+
+
 /*
  * Plan invalidation info
  *
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 8edb58727c0..59f83e85628 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.177 2009/10/12 18:10:51 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.178 2009/10/26 02:26:43 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -68,13 +68,13 @@ typedef struct PlannerGlobal
 
 	List	   *subrtables;		/* Rangetables for SubPlan nodes */
 
-	List	   *subrowmarks;	/* RowMarkClauses for SubPlan nodes */
+	List	   *subrowmarks;	/* PlanRowMarks for SubPlan nodes */
 
 	Bitmapset  *rewindPlanIDs;	/* indices of subplans that require REWIND */
 
 	List	   *finalrtable;	/* "flat" rangetable for executor */
 
-	List	   *finalrowmarks;	/* "flat" list of RowMarkClauses */
+	List	   *finalrowmarks;	/* "flat" list of PlanRowMarks */
 
 	List	   *relationOids;	/* OIDs of relations the plan depends on */
 
@@ -82,8 +82,6 @@ typedef struct PlannerGlobal
 
 	Index		lastPHId;		/* highest PlaceHolderVar ID assigned */
 
-	Index		lastRowmarkId;	/* highest RowMarkClause ID assigned */
-
 	bool		transientPlan;	/* redo plan when TransactionXmin changes? */
 } PlannerGlobal;
 
@@ -169,6 +167,8 @@ typedef struct PlannerInfo
 
 	List	   *append_rel_list;	/* list of AppendRelInfos */
 
+	List	   *rowMarks;		/* list of PlanRowMarks */
+
 	List	   *placeholder_list;		/* list of PlaceHolderInfos */
 
 	List	   *query_pathkeys; /* desired pathkeys for query_planner(), and
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index acf24bc17ed..9a661a5ee8c 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.120 2009/10/12 18:10:51 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.121 2009/10/26 02:26:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -68,7 +68,7 @@ extern Group *make_group(PlannerInfo *root, List *tlist, List *qual,
 		   Plan *lefttree);
 extern Plan *materialize_finished_plan(Plan *subplan);
 extern Unique *make_unique(Plan *lefttree, List *distinctList);
-extern LockRows *make_lockrows(Plan *lefttree, List *rowMarks);
+extern LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
 extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
 		   int64 offset_est, int64 count_est);
 extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
@@ -77,7 +77,8 @@ extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
 extern Result *make_result(PlannerInfo *root, List *tlist,
 			Node *resconstantqual, Plan *subplan);
 extern ModifyTable *make_modifytable(CmdType operation, List *resultRelations,
-									 List *subplans, List *returningLists);
+									 List *subplans, List *returningLists,
+									 List *rowMarks, int epqParam);
 extern bool is_projection_capable_plan(Plan *plan);
 
 /*
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
index e8dfb1e28fb..982635a57d3 100644
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/prep.h,v 1.66 2009/05/12 00:56:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/prep.h,v 1.67 2009/10/26 02:26:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -40,6 +40,8 @@ extern Expr *canonicalize_qual(Expr *qual);
  */
 extern List *preprocess_targetlist(PlannerInfo *root, List *tlist);
 
+extern PlanRowMark *get_plan_rowmark(List *rowmarks, Index rtindex);
+
 /*
  * prototypes for prepunion.c
  */
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index dcae8031c30..954cff69ac3 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -5,7 +5,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.37 2009/06/11 14:49:11 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.38 2009/10/26 02:26:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -29,6 +29,6 @@ extern void SS_finalize_plan(PlannerInfo *root, Plan *plan,
 				 bool attach_initplans);
 extern Param *SS_make_initplan_from_plan(PlannerInfo *root, Plan *plan,
 						   Oid resulttype, int32 resulttypmod);
-extern int	SS_assign_worktable_param(PlannerInfo *root);
+extern int	SS_assign_special_param(PlannerInfo *root);
 
 #endif   /* SUBSELECT_H */
diff --git a/src/include/parser/parsetree.h b/src/include/parser/parsetree.h
index 1aeea5bba0b..2399dccf07d 100644
--- a/src/include/parser/parsetree.h
+++ b/src/include/parser/parsetree.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/parser/parsetree.h,v 1.37 2009/01/01 17:24:00 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parsetree.h,v 1.38 2009/10/26 02:26:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -74,6 +74,6 @@ extern TargetEntry *get_tle_by_resno(List *tlist, AttrNumber resno);
  * ----------------
  */
 
-extern RowMarkClause *get_rowmark(Query *qry, Index rtindex);
+extern RowMarkClause *get_parse_rowmark(Query *qry, Index rtindex);
 
 #endif   /* PARSETREE_H */
-- 
GitLab