diff --git a/contrib/userlock/user_locks.c b/contrib/userlock/user_locks.c
index 0996970a9f485240d9b3058a0498bdb13973b5c6..f2cd7b2f76de4bf5430937ba2c94c13b6fe2ed57 100644
--- a/contrib/userlock/user_locks.c
+++ b/contrib/userlock/user_locks.c
@@ -75,7 +75,7 @@ user_write_unlock_oid(Oid oid)
 int
 user_unlock_all(void)
 {
-	return LockReleaseAll(USER_LOCKMETHOD, MyProc, ReleaseAll, 0, NULL);
+	return LockReleaseAll(USER_LOCKMETHOD, MyProc, true);
 }
 
 /* end of file */
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index dc424a6773d45e2dec779686142690f423eba0df..1ac3e78dfe190ded8aed5ba609dfeeb7fbae0109 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.52 2004/07/01 00:49:27 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.53 2004/07/17 03:27:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,7 @@
 #include "access/genam.h"
 #include "access/gist.h"
 #include "access/gistscan.h"
+#include "utils/resowner.h"
 
 
 /* routines defined and used here */
@@ -41,7 +42,7 @@ static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
 typedef struct GISTScanListData
 {
 	IndexScanDesc gsl_scan;
-	TransactionId gsl_creatingXid;
+	ResourceOwner gsl_owner;
 	struct GISTScanListData *gsl_next;
 } GISTScanListData;
 
@@ -224,7 +225,7 @@ gistregscan(IndexScanDesc s)
 
 	l = (GISTScanList) palloc(sizeof(GISTScanListData));
 	l->gsl_scan = s;
-	l->gsl_creatingXid = GetCurrentTransactionId();
+	l->gsl_owner = CurrentResourceOwner;
 	l->gsl_next = GISTScans;
 	GISTScans = l;
 }
@@ -253,52 +254,28 @@ gistdropscan(IndexScanDesc s)
 }
 
 /*
- * AtEOXact_gist() --- clean up gist subsystem at xact abort or commit.
+ * ReleaseResources_gist() --- clean up gist subsystem resources.
  *
  * This is here because it needs to touch this module's static var GISTScans.
  */
 void
-AtEOXact_gist(void)
-{
-	/*
-	 * Note: these actions should only be necessary during xact abort; but
-	 * they can't hurt during a commit.
-	 */
-
-	/*
-	 * Reset the active-scans list to empty. We do not need to free the
-	 * list elements, because they're all palloc()'d, so they'll go away
-	 * at end of transaction anyway.
-	 */
-	GISTScans = NULL;
-}
-
-/*
- * AtEOSubXact_gist() --- clean up gist subsystem at subxact abort or commit.
- *
- * This is here because it needs to touch this module's static var GISTScans.
- */
-void
-AtEOSubXact_gist(TransactionId childXid)
+ReleaseResources_gist(void)
 {
 	GISTScanList l;
 	GISTScanList prev;
 	GISTScanList next;
 
 	/*
-	 * Note: these actions should only be necessary during xact abort; but
-	 * they can't hurt during a commit.
-	 */
-
-	/*
-	 * Forget active scans that were started in this subtransaction.
+	 * Note: this should be a no-op during normal query shutdown.
+	 * However, in an abort situation ExecutorEnd is not called and so
+	 * there may be open index scans to clean up.
 	 */
 	prev = NULL;
 
 	for (l = GISTScans; l != NULL; l = next)
 	{
 		next = l->gsl_next;
-		if (l->gsl_creatingXid == childXid)
+		if (l->gsl_owner == CurrentResourceOwner)
 		{
 			if (prev == NULL)
 				GISTScans = next;
diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c
index d107596c75063d541f53c657b4a6cf7006279ede..81acfcc4525647ea8ab47b58d625e1ce438e2c01 100644
--- a/src/backend/access/hash/hashscan.c
+++ b/src/backend/access/hash/hashscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashscan.c,v 1.34 2004/07/01 00:49:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashscan.c,v 1.35 2004/07/17 03:27:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -16,12 +16,13 @@
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "utils/resowner.h"
 
 
 typedef struct HashScanListData
 {
 	IndexScanDesc hashsl_scan;
-	TransactionId hashsl_creatingXid;
+	ResourceOwner hashsl_owner;
 	struct HashScanListData *hashsl_next;
 } HashScanListData;
 
@@ -31,52 +32,28 @@ static HashScanList HashScans = NULL;
 
 
 /*
- * AtEOXact_hash() --- clean up hash subsystem at xact abort or commit.
+ * ReleaseResources_hash() --- clean up hash subsystem resources.
  *
  * This is here because it needs to touch this module's static var HashScans.
  */
 void
-AtEOXact_hash(void)
-{
-	/*
-	 * Note: these actions should only be necessary during xact abort; but
-	 * they can't hurt during a commit.
-	 */
-
-	/*
-	 * Reset the active-scans list to empty. We do not need to free the
-	 * list elements, because they're all palloc()'d, so they'll go away
-	 * at end of transaction anyway.
-	 */
-	HashScans = NULL;
-}
-
-/*
- * AtEOSubXact_hash() --- clean up hash subsystem at subxact abort or commit.
- *
- * This is here because it needs to touch this module's static var HashScans.
- */
-void
-AtEOSubXact_hash(TransactionId childXid)
+ReleaseResources_hash(void)
 {
 	HashScanList l;
 	HashScanList prev;
 	HashScanList next;
 
 	/*
-	 * Note: these actions should only be necessary during xact abort; but
-	 * they can't hurt during a commit.
-	 */
-
-	/*
-	 * Forget active scans that were started in this subtransaction.
+	 * Note: this should be a no-op during normal query shutdown.
+	 * However, in an abort situation ExecutorEnd is not called and so
+	 * there may be open index scans to clean up.
 	 */
 	prev = NULL;
 
 	for (l = HashScans; l != NULL; l = next)
 	{
 		next = l->hashsl_next;
-		if (l->hashsl_creatingXid == childXid)
+		if (l->hashsl_owner == CurrentResourceOwner)
 		{
 			if (prev == NULL)
 				HashScans = next;
@@ -101,7 +78,7 @@ _hash_regscan(IndexScanDesc scan)
 
 	new_el = (HashScanList) palloc(sizeof(HashScanListData));
 	new_el->hashsl_scan = scan;
-	new_el->hashsl_creatingXid = GetCurrentTransactionId();
+	new_el->hashsl_owner = CurrentResourceOwner;
 	new_el->hashsl_next = HashScans;
 	HashScans = new_el;
 }
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 01fe502071eefe1cd1a7bf5480a33223ae8295d8..07e4fe0a11ec7318bc661eda5269abfd3fc41b1e 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.118 2004/06/05 19:48:07 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.119 2004/07/17 03:27:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -58,16 +58,6 @@ static void btbuildCallback(Relation index,
 				void *state);
 
 
-/*
- * AtEOXact_nbtree() --- clean up nbtree subsystem at xact abort or commit.
- */
-void
-AtEOXact_nbtree(void)
-{
-	/* nothing to do at the moment */
-}
-
-
 /*
  *	btbuild() -- build a new btree index.
  *
diff --git a/src/backend/access/rtree/rtscan.c b/src/backend/access/rtree/rtscan.c
index d3530966e6d1bad29d57aee82b3ed4f63e3e9f5d..5dfcffb1847361abb615043cfd8cededca203561 100644
--- a/src/backend/access/rtree/rtscan.c
+++ b/src/backend/access/rtree/rtscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.52 2004/07/01 00:49:31 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.53 2004/07/17 03:28:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,6 +18,7 @@
 #include "access/genam.h"
 #include "access/rtree.h"
 #include "utils/lsyscache.h"
+#include "utils/resowner.h"
 
 
 /* routines defined and used here */
@@ -42,7 +43,7 @@ static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
 typedef struct RTScanListData
 {
 	IndexScanDesc rtsl_scan;
-	TransactionId rtsl_creatingXid;
+	ResourceOwner rtsl_owner;
 	struct RTScanListData *rtsl_next;
 } RTScanListData;
 
@@ -241,7 +242,7 @@ rtregscan(IndexScanDesc s)
 
 	l = (RTScanList) palloc(sizeof(RTScanListData));
 	l->rtsl_scan = s;
-	l->rtsl_creatingXid = GetCurrentTransactionId();
+	l->rtsl_owner = CurrentResourceOwner;
 	l->rtsl_next = RTScans;
 	RTScans = l;
 }
@@ -272,52 +273,28 @@ rtdropscan(IndexScanDesc s)
 }
 
 /*
- * AtEOXact_rtree() --- clean up rtree subsystem at xact abort or commit.
+ * ReleaseResources_rtree() --- clean up rtree subsystem resources.
  *
  * This is here because it needs to touch this module's static var RTScans.
  */
 void
-AtEOXact_rtree(void)
-{
-	/*
-	 * Note: these actions should only be necessary during xact abort; but
-	 * they can't hurt during a commit.
-	 */
-
-	/*
-	 * Reset the active-scans list to empty. We do not need to free the
-	 * list elements, because they're all palloc()'d, so they'll go away
-	 * at end of transaction anyway.
-	 */
-	RTScans = NULL;
-}
-
-/*
- * AtEOSubXact_rtree() --- clean up rtree subsystem at subxact abort or commit.
- *
- * This is here because it needs to touch this module's static var RTScans.
- */
-void
-AtEOSubXact_rtree(TransactionId childXid)
+ReleaseResources_rtree(void)
 {
 	RTScanList l;
 	RTScanList prev;
 	RTScanList next;
 
 	/*
-	 * Note: these actions should only be necessary during xact abort; but
-	 * they can't hurt during a commit.
-	 */
-
-	/*
-	 * Forget active scans that were started in this subtransaction.
+	 * Note: this should be a no-op during normal query shutdown.
+	 * However, in an abort situation ExecutorEnd is not called and so
+	 * there may be open index scans to clean up.
 	 */
 	prev = NULL;
 
 	for (l = RTScans; l != NULL; l = next)
 	{
 		next = l->rtsl_next;
-		if (l->rtsl_creatingXid == childXid)
+		if (l->rtsl_owner == CurrentResourceOwner)
 		{
 			if (prev == NULL)
 				RTScans = next;
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b6efb3155816a4054f63b9a22920997e63647c34..d88f7164d34a5308379960ccb15f4742e02ba4f8 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.170 2004/07/01 20:11:02 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.171 2004/07/17 03:28:23 tgl Exp $
  *
  * NOTES
  *		Transaction aborts can now occur two ways:
@@ -144,10 +144,6 @@
 #include <time.h>
 #include <unistd.h>
 
-#include "access/gistscan.h"
-#include "access/hash.h"
-#include "access/nbtree.h"
-#include "access/rtree.h"
 #include "access/subtrans.h"
 #include "access/xact.h"
 #include "catalog/heap.h"
@@ -168,23 +164,73 @@
 #include "utils/inval.h"
 #include "utils/memutils.h"
 #include "utils/portal.h"
-#include "utils/catcache.h"
-#include "utils/relcache.h"
+#include "utils/resowner.h"
 #include "pgstat.h"
 
 
+
+/*
+ *	transaction states - transaction state from server perspective
+ */
+typedef enum TransState
+{
+	TRANS_DEFAULT,
+	TRANS_START,
+	TRANS_INPROGRESS,
+	TRANS_COMMIT,
+	TRANS_ABORT
+} TransState;
+
+/*
+ *	transaction block states - transaction state of client queries
+ */
+typedef enum TBlockState
+{
+	TBLOCK_DEFAULT,
+	TBLOCK_STARTED,
+	TBLOCK_BEGIN,
+	TBLOCK_INPROGRESS,
+	TBLOCK_END,
+	TBLOCK_ABORT,
+	TBLOCK_ENDABORT,
+
+	TBLOCK_SUBBEGIN,
+	TBLOCK_SUBBEGINABORT,
+	TBLOCK_SUBINPROGRESS,
+	TBLOCK_SUBEND,
+	TBLOCK_SUBABORT,
+	TBLOCK_SUBENDABORT_OK,
+	TBLOCK_SUBENDABORT_ERROR
+} TBlockState;
+
+/*
+ *	transaction state structure
+ */
+typedef struct TransactionStateData
+{
+	TransactionId	transactionIdData;		/* my XID */
+	CommandId		commandId;				/* current CID */
+	TransState		state;					/* low-level state */
+	TBlockState		blockState;				/* high-level state */
+	int				nestingLevel;			/* nest depth */
+	MemoryContext	curTransactionContext;	/* my xact-lifetime context */
+	ResourceOwner	curTransactionOwner;	/* my query resources */
+	List		   *childXids;				/* subcommitted child XIDs */
+	AclId			currentUser;			/* subxact start current_user */
+	struct TransactionStateData *parent;	/* back link to parent */
+} TransactionStateData;
+
+typedef TransactionStateData *TransactionState;
+
+
 static void AbortTransaction(void);
-static void AtAbort_Cache(void);
-static void AtAbort_Locks(void);
 static void AtAbort_Memory(void);
 static void AtCleanup_Memory(void);
-static void AtCommit_Cache(void);
 static void AtCommit_LocalCache(void);
-static void AtCommit_Locks(void);
 static void AtCommit_Memory(void);
 static void AtStart_Cache(void);
-static void AtStart_Locks(void);
 static void AtStart_Memory(void);
+static void AtStart_ResourceOwner(void);
 static void CallEOXactCallbacks(bool isCommit);
 static void CleanupTransaction(void);
 static void CommitTransaction(void);
@@ -200,11 +246,11 @@ static void StartAbortedSubTransaction(void);
 static void PushTransaction(void);
 static void PopTransaction(void);
 
-static void AtSubAbort_Locks(void);
 static void AtSubAbort_Memory(void);
 static void AtSubCleanup_Memory(void);
 static void AtSubCommit_Memory(void);
 static void AtSubStart_Memory(void);
+static void AtSubStart_ResourceOwner(void);
 
 static void ShowTransactionState(const char *str);
 static void ShowTransactionStateRec(TransactionState state);
@@ -224,6 +270,7 @@ static TransactionStateData TopTransactionStateData = {
 								 * perspective */
 	0,							/* nesting level */
 	NULL,						/* cur transaction context */
+	NULL,						/* cur transaction resource owner */
 	NIL,						/* subcommitted child Xids */
 	0,							/* entry-time current userid */
 	NULL						/* link to parent state block */
@@ -462,8 +509,7 @@ CommandCounterIncrement(void)
 		SerializableSnapshot->curcid = s->commandId;
 
 	/*
-	 * make cache changes visible to me.  AtCommit_LocalCache() instead of
-	 * AtCommit_Cache() is called here.
+	 * make cache changes visible to me.
 	 */
 	AtCommit_LocalCache();
 	AtStart_Cache();
@@ -484,20 +530,6 @@ AtStart_Cache(void)
 	AcceptInvalidationMessages();
 }
 
-/*
- *		AtStart_Locks
- */
-static void
-AtStart_Locks(void)
-{
-	/*
-	 * at present, it is unknown to me what belongs here -cim 3/18/90
-	 *
-	 * There isn't anything to do at the start of a xact for locks. -mer
-	 * 5/24/92
-	 */
-}
-
 /*
  *	AtStart_Memory
  */
@@ -532,6 +564,29 @@ AtStart_Memory(void)
 	MemoryContextSwitchTo(CurTransactionContext);
 }
 
+/*
+ *	AtStart_ResourceOwner
+ */
+static void
+AtStart_ResourceOwner(void)
+{
+	TransactionState s = CurrentTransactionState;
+
+	/*
+	 * We shouldn't have a transaction resource owner already.
+	 */
+	Assert(TopTransactionResourceOwner == NULL);
+
+	/*
+	 * Create a toplevel resource owner for the transaction.
+	 */
+	s->curTransactionOwner = ResourceOwnerCreate(NULL, "TopTransaction");
+
+	TopTransactionResourceOwner = s->curTransactionOwner;
+	CurTransactionResourceOwner = s->curTransactionOwner;
+	CurrentResourceOwner = s->curTransactionOwner;
+}
+
 /* ----------------------------------------------------------------
  *						StartSubTransaction stuff
  * ----------------------------------------------------------------
@@ -563,6 +618,28 @@ AtSubStart_Memory(void)
 	MemoryContextSwitchTo(CurTransactionContext);
 }
 
+/*
+ * AtSubStart_ResourceOwner
+ */
+static void
+AtSubStart_ResourceOwner(void)
+{
+	TransactionState s = CurrentTransactionState;
+
+	Assert(s->parent != NULL);
+
+	/*
+	 * Create a resource owner for the subtransaction.  We make it a
+	 * child of the immediate parent's resource owner.
+	 */
+	s->curTransactionOwner =
+		ResourceOwnerCreate(s->parent->curTransactionOwner,
+							"SubTransaction");
+
+	CurTransactionResourceOwner = s->curTransactionOwner;
+	CurrentResourceOwner = s->curTransactionOwner;
+}
+
 /* ----------------------------------------------------------------
  *						CommitTransaction stuff
  * ----------------------------------------------------------------
@@ -581,7 +658,7 @@ RecordTransactionCommit(void)
 
 	/* Get data needed for commit record */
 	nrels = smgrGetPendingDeletes(true, &rptr);
-	nchildren = xactGetCommittedChildren(&children, false);
+	nchildren = xactGetCommittedChildren(&children);
 
 	/*
 	 * If we made neither any XLOG entries nor any temp-rel updates,
@@ -714,23 +791,6 @@ RecordTransactionCommit(void)
 }
 
 
-/*
- *	AtCommit_Cache
- */
-static void
-AtCommit_Cache(void)
-{
-	/*
-	 * Clean up the relation cache.
-	 */
-	AtEOXact_RelationCache(true);
-
-	/*
-	 * Make catalog changes visible to all backends.
-	 */
-	AtEOXact_Inval(true);
-}
-
 /*
  *	AtCommit_LocalCache
  */
@@ -743,20 +803,6 @@ AtCommit_LocalCache(void)
 	CommandEndInvalidationMessages();
 }
 
-/*
- *	AtCommit_Locks
- */
-static void
-AtCommit_Locks(void)
-{
-	/*
-	 * XXX What if ProcReleaseLocks fails?	(race condition?)
-	 *
-	 * Then you're up a creek! -mer 5/24/92
-	 */
-	ProcReleaseLocks(ReleaseAllExceptSession, 0, NULL);
-}
-
 /*
  *	AtCommit_Memory
  */
@@ -878,7 +924,7 @@ RecordTransactionAbort(void)
 
 	/* Get data needed for abort record */
 	nrels = smgrGetPendingDeletes(false, &rptr);
-	nchildren = xactGetCommittedChildren(&children, false);
+	nchildren = xactGetCommittedChildren(&children);
 
 	/*
 	 * If we made neither any transaction-controlled XLOG entries nor any
@@ -979,31 +1025,6 @@ RecordTransactionAbort(void)
 		pfree(children);
 }
 
-/*
- *	AtAbort_Cache
- */
-static void
-AtAbort_Cache(void)
-{
-	AtEOXact_RelationCache(false);
-	AtEOXact_Inval(false);
-}
-
-/*
- *	AtAbort_Locks
- */
-static void
-AtAbort_Locks(void)
-{
-	/*
-	 * XXX What if ProcReleaseLocks() fails?  (race condition?)
-	 *
-	 * Then you're up a creek without a paddle! -mer
-	 */
-	ProcReleaseLocks(ReleaseAll, 0, NULL);
-}
-
-
 /*
  *	AtAbort_Memory
  */
@@ -1029,22 +1050,6 @@ AtAbort_Memory(void)
 		MemoryContextSwitchTo(TopMemoryContext);
 }
 
-/*
- * AtSubAbort_Locks
- */
-static void
-AtSubAbort_Locks(void)
-{
-	int nxids;
-	TransactionId *xids;
-
-	nxids = xactGetCommittedChildren(&xids, true);
-
-	ProcReleaseLocks(ReleaseGivenXids, nxids, xids);
-
-	pfree(xids);
-}
-
 
 /*
  * AtSubAbort_Memory
@@ -1070,7 +1075,7 @@ RecordSubTransactionAbort(void)
 
 	/* Get data needed for abort record */
 	nrels = smgrGetPendingDeletes(false, &rptr);
-	nchildren = xactGetCommittedChildren(&children, false);
+	nchildren = xactGetCommittedChildren(&children);
 
 	/*
 	 * If we made neither any transaction-controlled XLOG entries nor any
@@ -1241,6 +1246,12 @@ StartTransaction(void)
 	XactIsoLevel = DefaultXactIsoLevel;
 	XactReadOnly = DefaultXactReadOnly;
 
+	/*
+	 * must initialize resource-management stuff first
+	 */
+	AtStart_Memory();
+	AtStart_ResourceOwner();
+
 	/*
 	 * generate a new transaction id
 	 */
@@ -1268,16 +1279,10 @@ StartTransaction(void)
 	 */
 
 	/*
-	 * initialize the various transaction subsystems
+	 * initialize other subsystems for new transaction
 	 */
-	AtStart_Memory();
 	AtStart_Inval();
 	AtStart_Cache();
-	AtStart_Locks();
-
-	/*
-	 * Tell the trigger manager we're starting a transaction
-	 */
 	DeferredTriggerBeginXact();
 
 	/*
@@ -1380,27 +1385,49 @@ CommitTransaction(void)
 	 * pins); then release locks; then release backend-local resources. We
 	 * want to release locks at the point where any backend waiting for us
 	 * will see our transaction as being fully cleaned up.
+	 *
+	 * Resources that can be associated with individual queries are
+	 * handled by the ResourceOwner mechanism.  The other calls here
+	 * are for backend-wide state.
 	 */
 
 	smgrDoPendingDeletes(true);
-	AtCommit_Cache();
-	AtEOXact_Buffers(true);
 	/* smgrcommit already done */
 
-	AtCommit_Locks();
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 true, true);
+
+	/*
+	 * Make catalog changes visible to all backends.  This has to happen
+	 * after relcache references are dropped (see comments for
+	 * AtEOXact_RelationCache), but before locks are released (if anyone
+	 * is waiting for lock on a relation we've modified, we want them to
+	 * know about the catalog change before they start using the relation).
+	 */
+	AtEOXact_Inval(true);
+
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_LOCKS,
+						 true, true);
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 true, true);
 
 	CallEOXactCallbacks(true);
 	AtEOXact_GUC(true, false);
 	AtEOXact_SPI(true);
-	AtEOXact_gist();
-	AtEOXact_hash();
-	AtEOXact_nbtree();
-	AtEOXact_rtree();
 	AtEOXact_on_commit_actions(true, s->transactionIdData);
 	AtEOXact_Namespace(true);
-	AtEOXact_CatCache(true);
 	AtEOXact_Files();
 	pgstat_count_xact_commit();
+
+	CurrentResourceOwner = NULL;
+	ResourceOwnerDelete(TopTransactionResourceOwner);
+	s->curTransactionOwner = NULL;
+	CurTransactionResourceOwner = NULL;
+	TopTransactionResourceOwner = NULL;
+
 	AtCommit_Memory();
 
 	s->nestingLevel = 0;
@@ -1504,22 +1531,24 @@ AbortTransaction(void)
 	 */
 
 	smgrDoPendingDeletes(false);
-	AtAbort_Cache();
-	AtEOXact_Buffers(false);
 	smgrabort();
 
-	AtAbort_Locks();
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 false, true);
+	AtEOXact_Inval(false);
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_LOCKS,
+						 false, true);
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 false, true);
 
 	CallEOXactCallbacks(false);
 	AtEOXact_GUC(false, false);
 	AtEOXact_SPI(false);
-	AtEOXact_gist();
-	AtEOXact_hash();
-	AtEOXact_nbtree();
-	AtEOXact_rtree();
 	AtEOXact_on_commit_actions(false, s->transactionIdData);
 	AtEOXact_Namespace(false);
-	AtEOXact_CatCache(false);
 	AtEOXact_Files();
 	SetReindexProcessing(InvalidOid, InvalidOid);
 	pgstat_count_xact_rollback();
@@ -1548,6 +1577,13 @@ CleanupTransaction(void)
 	 * do abort cleanup processing
 	 */
 	AtCleanup_Portals();		/* now safe to release portal memory */
+
+	CurrentResourceOwner = NULL; /* and resource owner */
+	ResourceOwnerDelete(TopTransactionResourceOwner);
+	s->curTransactionOwner = NULL;
+	CurTransactionResourceOwner = NULL;
+	TopTransactionResourceOwner = NULL;
+
 	AtCleanup_Memory();			/* and transaction memory */
 
 	s->nestingLevel = 0;
@@ -2483,6 +2519,12 @@ StartSubTransaction(void)
 
 	s->state = TRANS_START;
 
+	/*
+	 * must initialize resource-management stuff first
+	 */
+	AtSubStart_Memory();
+	AtSubStart_ResourceOwner();
+
 	/*
 	 * Generate a new Xid and record it in pg_subtrans.
 	 */
@@ -2495,13 +2537,10 @@ StartSubTransaction(void)
 	 */
 	s->currentUser = GetUserId();
 	
-	/* Initialize the various transaction subsystems */
-	AtSubStart_Memory();
+	/*
+	 * Initialize other subsystems for new subtransaction
+	 */
 	AtSubStart_Inval();
-	AtSubStart_RelationCache();
-	AtSubStart_CatCache();
-	AtSubStart_Buffers();
-	AtSubStart_smgr();
 	AtSubStart_Notify();
 	DeferredTriggerBeginSubXact();
 
@@ -2524,7 +2563,8 @@ CommitSubTransaction(void)
 		elog(WARNING, "CommitSubTransaction and not in in-progress state");
 
 	/* Pre-commit processing */
-	AtSubCommit_Portals(s->parent->transactionIdData);
+	AtSubCommit_Portals(s->parent->transactionIdData,
+						s->parent->curTransactionOwner);
 	DeferredTriggerEndSubXact(true);
 
 	s->state = TRANS_COMMIT;
@@ -2539,17 +2579,31 @@ CommitSubTransaction(void)
 
 	AtSubEOXact_Inval(true);
 	AtEOSubXact_SPI(true, s->transactionIdData);
+
+	/*
+	 * Note that we just release the resource owner's resources and don't
+	 * delete it.  This is because locks are not actually released here.
+	 * The owner object continues to exist as a child of its parent owner
+	 * (namely my parent transaction's resource owner), and the locks
+	 * effectively become that owner object's responsibility.
+	 */
+	ResourceOwnerRelease(s->curTransactionOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 true, false);
+	/* we can skip the LOCKS phase */
+	ResourceOwnerRelease(s->curTransactionOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 true, false);
+
 	AtSubCommit_Notify();
 	AtEOXact_GUC(true, true);
-	AtEOSubXact_gist(s->transactionIdData);
-	AtEOSubXact_hash(s->transactionIdData);
-	AtEOSubXact_rtree(s->transactionIdData);
 	AtEOSubXact_on_commit_actions(true, s->transactionIdData,
 								  s->parent->transactionIdData);
 
-	AtEOSubXact_CatCache(true);
-	AtEOSubXact_RelationCache(true);
-	AtEOSubXact_Buffers(true);
+	CurrentResourceOwner = s->parent->curTransactionOwner;
+	CurTransactionResourceOwner = s->parent->curTransactionOwner;
+	s->curTransactionOwner = NULL;
+
 	AtSubCommit_Memory();
 
 	s->state = TRANS_DEFAULT;
@@ -2597,20 +2651,25 @@ AbortSubTransaction(void)
 	AtSubAbort_smgr();
 
 	DeferredTriggerEndSubXact(false);
-	AtSubAbort_Portals();
-	AtSubEOXact_Inval(false);
-	AtSubAbort_Locks();
 	AtEOSubXact_SPI(false, s->transactionIdData);
+	AtSubAbort_Portals(s->parent->transactionIdData,
+					   s->parent->curTransactionOwner);
+	AtSubEOXact_Inval(false);
+
+	ResourceOwnerRelease(s->curTransactionOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 false, false);
+	ResourceOwnerRelease(s->curTransactionOwner,
+						 RESOURCE_RELEASE_LOCKS,
+						 false, false);
+	ResourceOwnerRelease(s->curTransactionOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 false, false);
+
 	AtSubAbort_Notify();
 	AtEOXact_GUC(false, true);
-	AtEOSubXact_gist(s->transactionIdData);
-	AtEOSubXact_hash(s->transactionIdData);
-	AtEOSubXact_rtree(s->transactionIdData);
 	AtEOSubXact_on_commit_actions(false, s->transactionIdData,
 								  s->parent->transactionIdData);
-	AtEOSubXact_RelationCache(false);
-	AtEOSubXact_CatCache(false);
-	AtEOSubXact_Buffers(false);
 
 	/*
 	 * Reset user id which might have been changed transiently.  Here we
@@ -2645,6 +2704,12 @@ CleanupSubTransaction(void)
 		elog(WARNING, "CleanupSubTransaction and not in aborted state");
 
 	AtSubCleanup_Portals();
+
+	CurrentResourceOwner = s->parent->curTransactionOwner;
+	CurTransactionResourceOwner = s->parent->curTransactionOwner;
+	ResourceOwnerDelete(s->curTransactionOwner);
+	s->curTransactionOwner = NULL;
+
 	AtSubCleanup_Memory();
 
 	s->state = TRANS_DEFAULT;
@@ -2685,6 +2750,7 @@ StartAbortedSubTransaction(void)
 	 * Initialize only what has to be there for CleanupSubTransaction to work.
 	 */
 	AtSubStart_Memory();
+	AtSubStart_ResourceOwner();
 
 	s->state = TRANS_ABORT;
 
@@ -2723,6 +2789,7 @@ PushTransaction(void)
 	 */
 	s->transactionIdData = p->transactionIdData;
 	s->curTransactionContext = p->curTransactionContext;
+	s->curTransactionOwner = p->curTransactionOwner;
 	s->currentUser = p->currentUser;
 
 	CurrentTransactionState = s;
@@ -2752,6 +2819,10 @@ PopTransaction(void)
 	CurTransactionContext = s->parent->curTransactionContext;
 	MemoryContextSwitchTo(CurTransactionContext);
 
+	/* Ditto for ResourceOwner links */
+	CurTransactionResourceOwner = s->parent->curTransactionOwner;
+	CurrentResourceOwner = s->parent->curTransactionOwner;
+
 	/* Free the old child structure */
 	pfree(s);
 }
@@ -2861,11 +2932,9 @@ TransStateAsString(TransState state)
  * value is the number of child transactions.  *children is set to point to a
  * palloc'd array of TransactionIds.  If there are no subxacts, *children is
  * set to NULL.
- *
- * If metoo is true, include the current TransactionId.
  */
 int
-xactGetCommittedChildren(TransactionId **ptr, bool metoo)
+xactGetCommittedChildren(TransactionId **ptr)
 {
 	TransactionState	s = CurrentTransactionState;
 	int					nchildren;
@@ -2873,8 +2942,6 @@ xactGetCommittedChildren(TransactionId **ptr, bool metoo)
 	ListCell		   *p;
 
 	nchildren = list_length(s->childXids);
-	if (metoo)
-		nchildren++;
 	if (nchildren == 0)
 	{
 		*ptr = NULL;
@@ -2887,10 +2954,9 @@ xactGetCommittedChildren(TransactionId **ptr, bool metoo)
 	foreach(p, s->childXids)
 	{
 		TransactionId child = lfirst_int(p);
-		*children++ = (TransactionId)child;
+
+		*children++ = child;
 	}
-	if (metoo)
-		*children = s->transactionIdData;
 
 	return nchildren;
 }
diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y
index 7b712bd555b51370d79985f61c786d762bb10b1a..f5448254f0aa6b3f820161a259eee3ebac3b6ff2 100644
--- a/src/backend/bootstrap/bootparse.y
+++ b/src/backend/bootstrap/bootparse.y
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/bootstrap/bootparse.y,v 1.70 2004/06/18 06:13:17 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/bootstrap/bootparse.y,v 1.71 2004/07/17 03:28:37 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -269,7 +269,12 @@ Boot_DeclareUniqueIndexStmt:
 		;
 
 Boot_BuildIndsStmt:
-		  XBUILD INDICES		{ build_indices(); }
+		  XBUILD INDICES
+				{
+					do_start();
+					build_indices();
+					do_end();
+				}
 		;
 
 
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 7fe2ea02a65bfaa28df47dc2fa34e964bdd45688..4e219df4f5b703d447171b5ef612ccbfc9d77b62 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.186 2004/07/11 00:18:43 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.187 2004/07/17 03:28:37 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -54,7 +54,6 @@ static void usage(void);
 static void bootstrap_signals(void);
 static hashnode *AddStr(char *str, int strlength, int mderef);
 static Form_pg_attribute AllocateAttribute(void);
-static bool BootstrapAlreadySeen(Oid id);
 static int	CompHash(char *str, int len);
 static hashnode *FindStr(char *str, int length, hashnode *mderef);
 static Oid	gettype(char *type);
@@ -880,34 +879,6 @@ InsertOneNull(int i)
 	Blanks[i] = 'n';
 }
 
-#define MORE_THAN_THE_NUMBER_OF_CATALOGS 256
-
-static bool
-BootstrapAlreadySeen(Oid id)
-{
-	static Oid	seenArray[MORE_THAN_THE_NUMBER_OF_CATALOGS];
-	static int	nseen = 0;
-	bool		seenthis;
-	int			i;
-
-	seenthis = false;
-
-	for (i = 0; i < nseen; i++)
-	{
-		if (seenArray[i] == id)
-		{
-			seenthis = true;
-			break;
-		}
-	}
-	if (!seenthis)
-	{
-		seenArray[nseen] = id;
-		nseen++;
-	}
-	return seenthis;
-}
-
 /* ----------------
  *		cleanup
  * ----------------
@@ -1270,25 +1241,6 @@ build_indices(void)
 		 * index, but in bootstrap mode it will not.
 		 */
 
-		/*
-		 * All of the rest of this routine is needed only because in
-		 * bootstrap processing we don't increment xact id's.  The normal
-		 * DefineIndex code replaces a pg_class tuple with updated info
-		 * including the relhasindex flag (which we need to have updated).
-		 * Unfortunately, there are always two indices defined on each
-		 * catalog causing us to update the same pg_class tuple twice for
-		 * each catalog getting an index during bootstrap resulting in the
-		 * ghost tuple problem (see heap_update).	To get around this we
-		 * change the relhasindex field ourselves in this routine keeping
-		 * track of what catalogs we already changed so that we don't
-		 * modify those tuples twice.  The normal mechanism for updating
-		 * pg_class is disabled during bootstrap.
-		 *
-		 * -mer
-		 */
-		if (!BootstrapAlreadySeen(RelationGetRelid(heap)))
-			UpdateStats(RelationGetRelid(heap), 0);
-
 		/* XXX Probably we ought to close the heap and index here? */
 	}
 }
diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c
index 3a184182b8201cc8832316868969af10fba0e3b3..4123b5d9c16966ad5138b24b85499a71d31b9091 100644
--- a/src/backend/catalog/pg_proc.c
+++ b/src/backend/catalog/pg_proc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/catalog/pg_proc.c,v 1.116 2004/05/26 04:41:08 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/catalog/pg_proc.c,v 1.117 2004/07/17 03:28:43 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -847,7 +847,7 @@ function_parse_error_transpose(const char *prosrc)
 	}
 
 	/* We can get the original query text from the active portal (hack...) */
-	Assert(ActivePortal && ActivePortal->portalActive);
+	Assert(ActivePortal && ActivePortal->status == PORTAL_ACTIVE);
 	queryText = ActivePortal->sourceText;
 
 	/* Try to locate the prosrc in the original text */
diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c
index 9cb8febd3117bfe54ce4b39156ce1566e5c28ea9..b176a6c0c7bafb0720e52acd7d351deabd352696 100644
--- a/src/backend/commands/portalcmds.c
+++ b/src/backend/commands/portalcmds.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/portalcmds.c,v 1.28 2004/06/11 01:08:37 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/portalcmds.c,v 1.29 2004/07/17 03:28:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -233,7 +233,7 @@ PerformPortalClose(const char *name)
  * for portals.
  */
 void
-PortalCleanup(Portal portal, bool isError)
+PortalCleanup(Portal portal)
 {
 	QueryDesc  *queryDesc;
 
@@ -253,8 +253,16 @@ PortalCleanup(Portal portal, bool isError)
 	if (queryDesc)
 	{
 		portal->queryDesc = NULL;
-		if (!isError)
+		if (portal->status != PORTAL_FAILED)
+		{
+			ResourceOwner saveResourceOwner;
+
+			/* We must make the portal's resource owner current */
+			saveResourceOwner = CurrentResourceOwner;
+			CurrentResourceOwner = portal->resowner;
 			ExecutorEnd(queryDesc);
+			CurrentResourceOwner = saveResourceOwner;
+		}
 	}
 }
 
@@ -271,6 +279,7 @@ PersistHoldablePortal(Portal portal)
 {
 	QueryDesc  *queryDesc = PortalGetQueryDesc(portal);
 	Portal		saveActivePortal;
+	ResourceOwner saveResourceOwner;
 	MemoryContext savePortalContext;
 	MemoryContext saveQueryContext;
 	MemoryContext oldcxt;
@@ -281,8 +290,6 @@ PersistHoldablePortal(Portal portal)
 	 */
 	Assert(portal->createXact == GetCurrentTransactionId());
 	Assert(queryDesc != NULL);
-	Assert(portal->portalReady);
-	Assert(!portal->portalDone);
 
 	/*
 	 * Caller must have created the tuplestore already.
@@ -303,17 +310,19 @@ PersistHoldablePortal(Portal portal)
 	/*
 	 * Check for improper portal use, and mark portal active.
 	 */
-	if (portal->portalActive)
+	if (portal->status != PORTAL_READY)
 		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_IN_USE),
-				 errmsg("portal \"%s\" already active", portal->name)));
-	portal->portalActive = true;
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("portal \"%s\" cannot be run", portal->name)));
+	portal->status = PORTAL_ACTIVE;
 
 	/*
 	 * Set global portal context pointers.
 	 */
 	saveActivePortal = ActivePortal;
 	ActivePortal = portal;
+	saveResourceOwner = CurrentResourceOwner;
+	CurrentResourceOwner = portal->resowner;
 	savePortalContext = PortalContext;
 	PortalContext = PortalGetHeapMemory(portal);
 	saveQueryContext = QueryContext;
@@ -342,13 +351,6 @@ PersistHoldablePortal(Portal portal)
 	portal->queryDesc = NULL;	/* prevent double shutdown */
 	ExecutorEnd(queryDesc);
 
-	/* Mark portal not active */
-	portal->portalActive = false;
-
-	ActivePortal = saveActivePortal;
-	PortalContext = savePortalContext;
-	QueryContext = saveQueryContext;
-
 	/*
 	 * Reset the position in the result set: ideally, this could be
 	 * implemented by just skipping straight to the tuple # that we need
@@ -394,4 +396,12 @@ PersistHoldablePortal(Portal portal)
 	 * portal's heap via PortalContext.
 	 */
 	MemoryContextDeleteChildren(PortalGetHeapMemory(portal));
+
+	/* Mark portal not active */
+	portal->status = PORTAL_READY;
+
+	ActivePortal = saveActivePortal;
+	CurrentResourceOwner = saveResourceOwner;
+	PortalContext = savePortalContext;
+	QueryContext = saveQueryContext;
 }
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 4a9ddc32432cc82e36c6fa21a4c0afc716d548ae..d9447ac394d834c2a15846193491a3f7a453ea0c 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.172 2004/07/01 00:50:46 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.173 2004/07/17 03:28:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,8 +45,8 @@
 #include "storage/bufpage.h"
 #include "storage/proc.h"
 #include "storage/smgr.h"
-#include "utils/memutils.h"
 #include "utils/relcache.h"
+#include "utils/resowner.h"
 #include "pgstat.h"
 
 
@@ -65,13 +65,9 @@ long		NDirectFileRead;	/* some I/O's are direct file access.
 								 * bypass bufmgr */
 long		NDirectFileWrite;	/* e.g., I/O in psort and hashjoin. */
 
-/* List of upper-level-transaction buffer refcount arrays */
-static List *upperRefCounts = NIL;
 
-
-static void PinBuffer(BufferDesc *buf);
-static void UnpinBuffer(BufferDesc *buf);
-static void BufferFixLeak(Buffer bufnum, int32 shouldBe, bool emitWarning);
+static void PinBuffer(BufferDesc *buf, bool fixOwner);
+static void UnpinBuffer(BufferDesc *buf, bool fixOwner);
 static void WaitIO(BufferDesc *buf);
 static void StartBufferIO(BufferDesc *buf, bool forInput);
 static void TerminateBufferIO(BufferDesc *buf, int err_flag);
@@ -103,6 +99,7 @@ static void write_buffer(Buffer buffer, bool unpin);
 Buffer
 ReadBuffer(Relation reln, BlockNumber blockNum)
 {
+	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
 	return ReadBufferInternal(reln, blockNum, false);
 }
 
@@ -111,6 +108,8 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
  *
  * bufferLockHeld: if true, caller already acquired the bufmgr lock.
  * (This is assumed never to be true if dealing with a local buffer!)
+ *
+ * The caller must have done ResourceOwnerEnlargeBuffers(CurrentResourceOwner)
  */
 static Buffer
 ReadBufferInternal(Relation reln, BlockNumber blockNum,
@@ -287,7 +286,7 @@ BufferAlloc(Relation reln,
 		 */
 		*foundPtr = TRUE;
 
-		PinBuffer(buf);
+		PinBuffer(buf, true);
 
 		if (!(buf->flags & BM_VALID))
 		{
@@ -337,6 +336,9 @@ BufferAlloc(Relation reln,
 		buf->refcount = 1;
 		PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
 
+		ResourceOwnerRememberBuffer(CurrentResourceOwner,
+									BufferDescriptorGetBuffer(buf));
+
 		if ((buf->flags & BM_VALID) &&
 			(buf->flags & BM_DIRTY || buf->cntxDirty))
 		{
@@ -382,7 +384,7 @@ BufferAlloc(Relation reln,
 				 * buffer we were planning to use.
 				 */
 				TerminateBufferIO(buf, 0);
-				UnpinBuffer(buf);
+				UnpinBuffer(buf, true);
 
 				buf = buf2;
 
@@ -390,7 +392,7 @@ BufferAlloc(Relation reln,
 
 				*foundPtr = TRUE;
 
-				PinBuffer(buf);
+				PinBuffer(buf, true);
 
 				if (!(buf->flags & BM_VALID))
 				{
@@ -425,7 +427,7 @@ BufferAlloc(Relation reln,
 			if (buf->refcount > 1 || buf->flags & BM_DIRTY || buf->cntxDirty)
 			{
 				TerminateBufferIO(buf, 0);
-				UnpinBuffer(buf);
+				UnpinBuffer(buf, true);
 				inProgress = FALSE;
 				buf = NULL;
 			}
@@ -497,7 +499,7 @@ write_buffer(Buffer buffer, bool release)
 	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
 
 	if (release)
-		UnpinBuffer(bufHdr);
+		UnpinBuffer(bufHdr, true);
 	LWLockRelease(BufMgrLock);
 }
 
@@ -561,6 +563,8 @@ ReleaseAndReadBuffer(Buffer buffer,
 			if (bufHdr->tag.blockNum == blockNum &&
 				RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
 				return buffer;
+			ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
+			/* owner now has a free slot, so no need for Enlarge() */
 			LocalRefCount[-buffer - 1]--;
 		}
 		else
@@ -570,16 +574,20 @@ ReleaseAndReadBuffer(Buffer buffer,
 			if (bufHdr->tag.blockNum == blockNum &&
 				RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
 				return buffer;
+			ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
+			/* owner now has a free slot, so no need for Enlarge() */
 			if (PrivateRefCount[buffer - 1] > 1)
 				PrivateRefCount[buffer - 1]--;
 			else
 			{
 				LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-				UnpinBuffer(bufHdr);
+				UnpinBuffer(bufHdr, false);
 				return ReadBufferInternal(relation, blockNum, true);
 			}
 		}
 	}
+	else
+		ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
 
 	return ReadBufferInternal(relation, blockNum, false);
 }
@@ -589,9 +597,12 @@ ReleaseAndReadBuffer(Buffer buffer,
  *
  * This should be applied only to shared buffers, never local ones.
  * Bufmgr lock must be held by caller.
+ *
+ * Most but not all callers want CurrentResourceOwner to be adjusted.
+ * Note that ResourceOwnerEnlargeBuffers must have been done already.
  */
 static void
-PinBuffer(BufferDesc *buf)
+PinBuffer(BufferDesc *buf, bool fixOwner)
 {
 	int			b = BufferDescriptorGetBuffer(buf) - 1;
 
@@ -599,6 +610,9 @@ PinBuffer(BufferDesc *buf)
 		buf->refcount++;
 	PrivateRefCount[b]++;
 	Assert(PrivateRefCount[b] > 0);
+	if (fixOwner)
+		ResourceOwnerRememberBuffer(CurrentResourceOwner,
+									BufferDescriptorGetBuffer(buf));
 }
 
 /*
@@ -606,12 +620,18 @@ PinBuffer(BufferDesc *buf)
  *
  * This should be applied only to shared buffers, never local ones.
  * Bufmgr lock must be held by caller.
+ *
+ * Most but not all callers want CurrentResourceOwner to be adjusted.
  */
 static void
-UnpinBuffer(BufferDesc *buf)
+UnpinBuffer(BufferDesc *buf, bool fixOwner)
 {
 	int			b = BufferDescriptorGetBuffer(buf) - 1;
 
+	if (fixOwner)
+		ResourceOwnerForgetBuffer(CurrentResourceOwner,
+								  BufferDescriptorGetBuffer(buf));
+
 	Assert(buf->refcount > 0);
 	Assert(PrivateRefCount[b] > 0);
 	PrivateRefCount[b]--;
@@ -677,6 +697,9 @@ BufferSync(int percent, int maxpages)
 	if (maxpages > 0 && num_buffer_dirty > maxpages)
 		num_buffer_dirty = maxpages;
 
+	/* Make sure we can handle the pin inside the loop */
+	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+
 	/*
 	 * Loop over buffers to be written.  Note the BufMgrLock is held at
 	 * loop top, but is released and reacquired within FlushBuffer,
@@ -724,13 +747,13 @@ BufferSync(int percent, int maxpages)
 		 * buffer now and set IO state for it *before* acquiring shlock to
 		 * avoid conflicts with FlushRelationBuffers.
 		 */
-		PinBuffer(bufHdr);
+		PinBuffer(bufHdr, true);
 		StartBufferIO(bufHdr, false);
 
 		FlushBuffer(bufHdr, NULL);
 
 		TerminateBufferIO(bufHdr, 0);
-		UnpinBuffer(bufHdr);
+		UnpinBuffer(bufHdr, true);
 	}
 
 	LWLockRelease(BufMgrLock);
@@ -831,102 +854,32 @@ AtEOXact_Buffers(bool isCommit)
 	for (i = 0; i < NBuffers; i++)
 	{
 		if (PrivateRefCount[i] != 0)
-			BufferFixLeak(i, 0, isCommit);
-	}
-
-	AtEOXact_LocalBuffers(isCommit);
-}
-
-/*
- * During subtransaction start, save buffer reference counts.
- */
-void
-AtSubStart_Buffers(void)
-{
-	int32		   *copyRefCounts;
-	Size			rcSize;
-	MemoryContext	old_cxt;
-
-	/* this is probably the active context already, but be safe */
-	old_cxt = MemoryContextSwitchTo(CurTransactionContext);
-
-	/*
-	 * We need to copy the current state of PrivateRefCount[].  In the typical
-	 * scenario, few if any of the entries will be nonzero, and we could save
-	 * space by storing only the nonzero ones.  However, copying the whole
-	 * thing is lots simpler and faster both here and in AtEOSubXact_Buffers,
-	 * so it seems best to waste the space.
-	 */
-	rcSize = NBuffers * sizeof(int32);
-	copyRefCounts = (int32 *) palloc(rcSize);
-	memcpy(copyRefCounts, PrivateRefCount, rcSize);
-
-	/* Attach to list */
-	upperRefCounts = lcons(copyRefCounts, upperRefCounts);
-
-	MemoryContextSwitchTo(old_cxt);
-}
-
-/*
- * AtEOSubXact_Buffers
- *
- * At subtransaction end, we restore the saved counts.  If committing, we
- * complain if the refcounts don't match; if aborting, just restore silently.
- */
-void
-AtEOSubXact_Buffers(bool isCommit)
-{
-	int32	   *oldRefCounts;
-	int			i;
-
-	oldRefCounts = (int32 *) linitial(upperRefCounts);
-	upperRefCounts = list_delete_first(upperRefCounts);
+		{
+			BufferDesc *buf = &(BufferDescriptors[i]);
+
+			if (isCommit)
+				elog(WARNING,
+					 "buffer refcount leak: [%03d] "
+					 "(rel=%u/%u/%u, blockNum=%u, flags=0x%x, refcount=%u %d)",
+					 i,
+					 buf->tag.rnode.spcNode, buf->tag.rnode.dbNode,
+					 buf->tag.rnode.relNode,
+					 buf->tag.blockNum, buf->flags,
+					 buf->refcount, PrivateRefCount[i]);
 
-	for (i = 0; i < NBuffers; i++)
-	{
-		if (PrivateRefCount[i] != oldRefCounts[i])
-			BufferFixLeak(i, oldRefCounts[i], isCommit);
+			/*
+			 * We don't worry about updating the ResourceOwner structures;
+			 * resowner.c will clear them for itself.
+			 */
+			PrivateRefCount[i] = 1;		/* make sure we release shared pin */
+			LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
+			UnpinBuffer(buf, false);
+			LWLockRelease(BufMgrLock);
+			Assert(PrivateRefCount[i] == 0);
+		}
 	}
 
-	pfree(oldRefCounts);
-}
-
-/*
- * Fix a buffer refcount leak.
- *
- * The caller does not hold the BufMgrLock.
- */
-static void
-BufferFixLeak(Buffer bufnum, int32 shouldBe, bool emitWarning)
-{
-	BufferDesc	*buf = &(BufferDescriptors[bufnum]);
-
-	if (emitWarning)
-		elog(WARNING,
-			 "buffer refcount leak: [%03d] (rel=%u/%u/%u, blockNum=%u, flags=0x%x, refcount=%u %d, should be=%d)",
-			 bufnum,
-			 buf->tag.rnode.spcNode, buf->tag.rnode.dbNode,
-			 buf->tag.rnode.relNode,
-			 buf->tag.blockNum, buf->flags,
-			 buf->refcount, PrivateRefCount[bufnum], shouldBe);
-
-	/* If it's less, we're in a heap o' trouble */
-	if (PrivateRefCount[bufnum] <= shouldBe)
-		elog(FATAL, "buffer refcount was decreased by subtransaction");
-
-	if (shouldBe > 0)
-	{
-		/* We still keep the shared-memory pin */
-		PrivateRefCount[bufnum] = shouldBe;
-	}
-	else
-	{
-		PrivateRefCount[bufnum] = 1; /* make sure we release shared pin */
-		LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-		UnpinBuffer(buf);
-		LWLockRelease(BufMgrLock);
-		Assert(PrivateRefCount[bufnum] == 0);
-	}
+	AtEOXact_LocalBuffers(isCommit);
 }
 
 /*
@@ -1172,9 +1125,15 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
 			if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
 				bufHdr->tag.blockNum >= firstDelBlock)
 			{
+				if (LocalRefCount[i] != 0)
+					elog(FATAL, "block %u of %u/%u/%u is still referenced (local %u)",
+						 bufHdr->tag.blockNum,
+						 bufHdr->tag.rnode.spcNode,
+						 bufHdr->tag.rnode.dbNode,
+						 bufHdr->tag.rnode.relNode,
+						 LocalRefCount[i]);
 				bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
 				bufHdr->cntxDirty = false;
-				LocalRefCount[i] = 0;
 				bufHdr->tag.rnode.relNode = InvalidOid;
 			}
 		}
@@ -1205,28 +1164,21 @@ recheck:
 				 */
 				goto recheck;
 			}
-			/* Now we can do what we came for */
-			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
-			bufHdr->cntxDirty = false;
 
 			/*
-			 * Release any refcount we may have.  If someone else has a
-			 * pin on the buffer, we got trouble.
+			 * There should be no pin on the buffer.
 			 */
 			if (bufHdr->refcount != 0)
-			{
-				/* the sole pin should be ours */
-				if (bufHdr->refcount != 1 || PrivateRefCount[i - 1] == 0)
-					elog(FATAL, "block %u of %u/%u/%u is still referenced (private %d, global %u)",
-						 bufHdr->tag.blockNum,
-						 bufHdr->tag.rnode.spcNode,
-						 bufHdr->tag.rnode.dbNode,
-						 bufHdr->tag.rnode.relNode,
-						 PrivateRefCount[i - 1], bufHdr->refcount);
-				/* Make sure it will be released */
-				PrivateRefCount[i - 1] = 1;
-				UnpinBuffer(bufHdr);
-			}
+				elog(FATAL, "block %u of %u/%u/%u is still referenced (private %d, global %u)",
+					 bufHdr->tag.blockNum,
+					 bufHdr->tag.rnode.spcNode,
+					 bufHdr->tag.rnode.dbNode,
+					 bufHdr->tag.rnode.relNode,
+					 PrivateRefCount[i - 1], bufHdr->refcount);
+
+			/* Now we can do what we came for */
+			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+			bufHdr->cntxDirty = false;
 
 			/*
 			 * And mark the buffer as no longer occupied by this rel.
@@ -1353,7 +1305,7 @@ PrintPinnedBufs(void)
 	for (i = 0; i < NBuffers; ++i, ++buf)
 	{
 		if (PrivateRefCount[i] > 0)
-			elog(WARNING,
+			elog(NOTICE,
 				 "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u/%u, "
 				 "blockNum=%u, flags=0x%x, refcount=%u %d)",
 				 i, buf->freeNext, buf->freePrev,
@@ -1456,6 +1408,9 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
 		return;
 	}
 
+	/* Make sure we can handle the pin inside the loop */
+	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+
 	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
 
 	for (i = 0; i < NBuffers; i++)
@@ -1466,7 +1421,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
 			if ((bufHdr->flags & BM_VALID) &&
 				(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
 			{
-				PinBuffer(bufHdr);
+				PinBuffer(bufHdr, true);
 				/* Someone else might be flushing buffer */
 				if (bufHdr->flags & BM_IO_IN_PROGRESS)
 					WaitIO(bufHdr);
@@ -1479,7 +1434,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
 
 					TerminateBufferIO(bufHdr, 0);
 				}
-				UnpinBuffer(bufHdr);
+				UnpinBuffer(bufHdr, true);
 				if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
 					elog(ERROR, "FlushRelationBuffers(\"%s\", %u): block %u was re-dirtied",
 						 RelationGetRelationName(rel), firstDelBlock,
@@ -1507,6 +1462,8 @@ ReleaseBuffer(Buffer buffer)
 {
 	BufferDesc *bufHdr;
 
+	ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
+
 	if (BufferIsLocal(buffer))
 	{
 		Assert(LocalRefCount[-buffer - 1] > 0);
@@ -1526,11 +1483,39 @@ ReleaseBuffer(Buffer buffer)
 	else
 	{
 		LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-		UnpinBuffer(bufHdr);
+		UnpinBuffer(bufHdr, false);
 		LWLockRelease(BufMgrLock);
 	}
 }
 
+/*
+ * IncrBufferRefCount
+ *		Increment the pin count on a buffer that we have *already* pinned
+ *		at least once.
+ *
+ *		This function cannot be used on a buffer we do not have pinned,
+ *		because it doesn't change the shared buffer state.  Therefore the
+ *		Assert checks are for refcount > 0.  Someone got this wrong once...
+ */
+void
+IncrBufferRefCount(Buffer buffer)
+{
+	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+	ResourceOwnerRememberBuffer(CurrentResourceOwner, buffer);
+	if (BufferIsLocal(buffer))
+	{
+		Assert(buffer >= -NLocBuffer);
+		Assert(LocalRefCount[-buffer - 1] > 0);
+		LocalRefCount[-buffer - 1]++;
+	}
+	else
+	{
+		Assert(!BAD_BUFFER_ID(buffer));
+		Assert(PrivateRefCount[buffer - 1] > 0);
+		PrivateRefCount[buffer - 1]++;
+	}
+}
+
 #ifdef NOT_USED
 void
 IncrBufferRefCount_Debug(char *file, int line, Buffer buffer)
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index f4d1163f16ac061b06c55f7a1b8eb9730bda9eed..7103c46c11f9b6190fd6b1fe0ef891d9d736d964 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.56 2004/06/18 06:13:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.57 2004/07/17 03:28:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,6 +19,7 @@
 #include "storage/bufmgr.h"
 #include "storage/smgr.h"
 #include "utils/relcache.h"
+#include "utils/resowner.h"
 
 
 /*#define LBDEBUG*/
@@ -62,6 +63,8 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
 #endif
 
 			LocalRefCount[i]++;
+			ResourceOwnerRememberBuffer(CurrentResourceOwner,
+										BufferDescriptorGetBuffer(bufHdr));
 			if (bufHdr->flags & BM_VALID)
 				*foundPtr = TRUE;
 			else
@@ -88,6 +91,8 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
 		{
 			bufHdr = &LocalBufferDescriptors[b];
 			LocalRefCount[b]++;
+			ResourceOwnerRememberBuffer(CurrentResourceOwner,
+										BufferDescriptorGetBuffer(bufHdr));
 			nextFreeLocalBuf = (b + 1) % NLocBuffer;
 			break;
 		}
@@ -179,6 +184,7 @@ WriteLocalBuffer(Buffer buffer, bool release)
 	{
 		Assert(LocalRefCount[bufid] > 0);
 		LocalRefCount[bufid]--;
+		ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
 	}
 }
 
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 6b7f43440e66f8de414229ac2d0ba83dc02f3daf..f7978fca7ae9c0f810bd9daabb5734eac673e91e 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.134 2004/07/01 00:50:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.135 2004/07/17 03:28:51 tgl Exp $
  *
  * NOTES
  *	  Outside modules can create a lock table and acquire/release
@@ -30,14 +30,15 @@
  */
 #include "postgres.h"
 
-#include <unistd.h>
 #include <signal.h>
+#include <unistd.h>
 
 #include "access/xact.h"
 #include "miscadmin.h"
 #include "storage/proc.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
+#include "utils/resowner.h"
 
 
 /* This configuration variable is used to set the lock table size */
@@ -424,6 +425,9 @@ LockAcquire(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
 	/* ???????? This must be changed when short term locks will be used */
 	locktag->lockmethodid = lockmethodid;
 
+	/* Prepare to record the lock in the current resource owner */
+	ResourceOwnerEnlargeLocks(CurrentResourceOwner);
+
 	Assert(lockmethodid < NumLockMethods);
 	lockMethodTable = LockMethods[lockmethodid];
 	if (!lockMethodTable)
@@ -567,6 +571,8 @@ LockAcquire(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
 	if (proclock->holding[lockmode] > 0)
 	{
 		GrantLock(lock, proclock, lockmode);
+		ResourceOwnerRememberLock(CurrentResourceOwner, locktag, xid,
+								  lockmode);
 		PROCLOCK_PRINT("LockAcquire: owning", proclock);
 		LWLockRelease(masterLock);
 		return TRUE;
@@ -580,6 +586,8 @@ LockAcquire(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
 	if (myHolding[lockmode] > 0)
 	{
 		GrantLock(lock, proclock, lockmode);
+		ResourceOwnerRememberLock(CurrentResourceOwner, locktag, xid,
+								  lockmode);
 		PROCLOCK_PRINT("LockAcquire: my other XID owning", proclock);
 		LWLockRelease(masterLock);
 		return TRUE;
@@ -601,6 +609,8 @@ LockAcquire(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
 	{
 		/* No conflict with held or previously requested locks */
 		GrantLock(lock, proclock, lockmode);
+		ResourceOwnerRememberLock(CurrentResourceOwner, locktag, xid,
+								  lockmode);
 	}
 	else
 	{
@@ -803,6 +813,9 @@ LockCountMyLocks(SHMEM_OFFSET lockOffset, PGPROC *proc, int *myHolding)
  *
  * NOTE: if proc was blocked, it also needs to be removed from the wait list
  * and have its waitLock/waitHolder fields cleared.  That's not done here.
+ *
+ * NOTE: the lock also has to be recorded in the current ResourceOwner;
+ * but since we may be awaking some other process, we can't do that here.
  */
 void
 GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode)
@@ -964,6 +977,9 @@ LockRelease(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
 	/* ???????? This must be changed when short term locks will be used */
 	locktag->lockmethodid = lockmethodid;
 
+	/* Record release of the lock in the current resource owner */
+	ResourceOwnerForgetLock(CurrentResourceOwner, locktag, xid, lockmode);
+
 	Assert(lockmethodid < NumLockMethods);
 	lockMethodTable = LockMethods[lockmethodid];
 	if (!lockMethodTable)
@@ -1134,20 +1150,15 @@ LockRelease(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
  *
  * Well, not necessarily *all* locks.  The available behaviors are:
  *
- * which == ReleaseAll: release all locks regardless of transaction
+ * allxids == true: release all locks regardless of transaction
  * affiliation.
  *
- * which == ReleaseAllExceptSession: release all locks with Xid != 0
+ * allxids == false: release all locks with Xid != 0
  * (zero is the Xid used for "session" locks).
- *
- * which == ReleaseGivenXids: release only locks whose Xids appear in
- * the xids[] array (of length nxids).
- *
- * xids/nxids are ignored when which != ReleaseGivenXids.
  */
 bool
 LockReleaseAll(LOCKMETHODID lockmethodid, PGPROC *proc,
-			   LockReleaseWhich which, int nxids, TransactionId *xids)
+			   bool allxids)
 {
 	SHM_QUEUE  *procHolders = &(proc->procHolders);
 	PROCLOCK   *proclock;
@@ -1196,25 +1207,9 @@ LockReleaseAll(LOCKMETHODID lockmethodid, PGPROC *proc,
 		if (LOCK_LOCKMETHOD(*lock) != lockmethodid)
 			goto next_item;
 
-		if (which == ReleaseGivenXids)
-		{
-			/* Ignore locks with an Xid not in the list */
-			bool release = false;
-
-			for (i = 0; i < nxids; i++)
-			{
-				if (TransactionIdEquals(proclock->tag.xid, xids[i]))
-				{
-					release = true;
-					break;
-				}
-			}
-			if (!release)
-				goto next_item;
-		}
-		/* Ignore locks with Xid=0 unless we are asked to release All locks */
-		else if (TransactionIdEquals(proclock->tag.xid, InvalidTransactionId)
-				 && which != ReleaseAll)
+		/* Ignore locks with Xid=0 unless we are asked to release all locks */
+		if (TransactionIdEquals(proclock->tag.xid, InvalidTransactionId)
+			&& !allxids)
 			goto next_item;
 
 		PROCLOCK_PRINT("LockReleaseAll", proclock);
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index abe44e808adde8f64439a85a12939b246e502665..3c7249ffc13e8a0c280ee47691c49b0628c6b311 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.149 2004/07/01 00:50:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.150 2004/07/17 03:28:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -40,7 +40,6 @@
  */
 #include "postgres.h"
 
-#include <errno.h>
 #include <signal.h>
 #include <unistd.h>
 #include <sys/time.h>
@@ -51,6 +50,8 @@
 #include "storage/proc.h"
 #include "storage/sinval.h"
 #include "storage/spin.h"
+#include "utils/resowner.h"
+
 
 /* GUC variables */
 int			DeadlockTimeout = 1000;
@@ -75,6 +76,11 @@ static PGPROC *DummyProcs = NULL;
 static bool waitingForLock = false;
 static bool waitingForSignal = false;
 
+/* Auxiliary state, valid when waitingForLock is true */
+static LOCKTAG waitingForLockTag;
+static TransactionId waitingForLockXid;
+static LOCKMODE waitingForLockMode;
+
 /* Mark these volatile because they can be changed by signal handler */
 static volatile bool statement_timeout_active = false;
 static volatile bool deadlock_timeout_active = false;
@@ -234,7 +240,7 @@ InitProcess(void)
 	 * prepared for us by InitProcGlobal.
 	 */
 	SHMQueueElemInit(&(MyProc->links));
-	MyProc->errType = STATUS_OK;
+	MyProc->waitStatus = STATUS_OK;
 	MyProc->xid = InvalidTransactionId;
 	MyProc->xmin = InvalidTransactionId;
 	MyProc->pid = MyProcPid;
@@ -308,7 +314,7 @@ InitDummyProcess(int proctype)
 	 */
 	MyProc->pid = MyProcPid;	/* marks dummy proc as in use by me */
 	SHMQueueElemInit(&(MyProc->links));
-	MyProc->errType = STATUS_OK;
+	MyProc->waitStatus = STATUS_OK;
 	MyProc->xid = InvalidTransactionId;
 	MyProc->xmin = InvalidTransactionId;
 	MyProc->databaseId = MyDatabaseId;
@@ -348,15 +354,40 @@ LockWaitCancel(void)
 	if (!waitingForLock)
 		return false;
 
-	waitingForLock = false;
-
 	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
 	disable_sig_alarm(false);
 
 	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
 	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
+
 	if (MyProc->links.next != INVALID_OFFSET)
+	{
+		/* We could not have been granted the lock yet */
+		Assert(MyProc->waitStatus == STATUS_ERROR);
 		RemoveFromWaitQueue(MyProc);
+	}
+	else
+	{
+		/*
+		 * Somebody kicked us off the lock queue already.  Perhaps they
+		 * granted us the lock, or perhaps they detected a deadlock.
+		 * If they did grant us the lock, we'd better remember it in
+		 * CurrentResourceOwner.
+		 *
+		 * Exception: if CurrentResourceOwner is NULL then we can't do
+		 * anything.  This could only happen when we are invoked from ProcKill
+		 * or some similar place, where all our locks are about to be released
+		 * anyway.
+		 */
+		if (MyProc->waitStatus == STATUS_OK && CurrentResourceOwner != NULL)
+			ResourceOwnerRememberLock(CurrentResourceOwner,
+									  &waitingForLockTag,
+									  waitingForLockXid,
+									  waitingForLockMode);
+	}
+
+	waitingForLock = false;
+
 	LWLockRelease(LockMgrLock);
 
 	/*
@@ -380,34 +411,29 @@ LockWaitCancel(void)
 
 /*
  * ProcReleaseLocks() -- release locks associated with current transaction
- *			at main transaction and subtransaction commit or abort
- *
- * The options for which locks to release are the same as for the underlying
- * LockReleaseAll() function.
- *
- * Notes:
+ *			at main transaction commit or abort
  *
  * At main transaction commit, we release all locks except session locks.
  * At main transaction abort, we release all locks including session locks;
  * this lets us clean up after a VACUUM FULL failure.
  *
  * At subtransaction commit, we don't release any locks (so this func is not
- * called at all); we will defer the releasing to the parent transaction.
+ * needed at all); we will defer the releasing to the parent transaction.
  * At subtransaction abort, we release all locks held by the subtransaction;
- * this is implemented by passing in the Xids of the failed subxact and its
- * children in the xids[] array.
+ * this is implemented by retail releasing of the locks under control of
+ * the ResourceOwner mechanism.
  *
  * Note that user locks are not released in any case.
  */
 void
-ProcReleaseLocks(LockReleaseWhich which, int nxids, TransactionId *xids)
+ProcReleaseLocks(bool isCommit)
 {
 	if (!MyProc)
 		return;
 	/* If waiting, get off wait queue (should only be needed after error) */
 	LockWaitCancel();
 	/* Release locks */
-	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, which, nxids, xids);
+	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, !isCommit);
 }
 
 
@@ -440,11 +466,11 @@ ProcKill(int code, Datum arg)
 	LockWaitCancel();
 
 	/* Remove from the standard lock table */
-	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, ReleaseAll, 0, NULL);
+	LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, true);
 
 #ifdef USER_LOCKS
 	/* Remove from the user lock table */
-	LockReleaseAll(USER_LOCKMETHOD, MyProc, ReleaseAll, 0, NULL);
+	LockReleaseAll(USER_LOCKMETHOD, MyProc, true);
 #endif
 
 	SpinLockAcquire(ProcStructLock);
@@ -618,6 +644,10 @@ ProcSleep(LockMethod lockMethodTable,
 				{
 					/* Skip the wait and just grant myself the lock. */
 					GrantLock(lock, proclock, lockmode);
+					ResourceOwnerRememberLock(CurrentResourceOwner,
+											  &lock->tag,
+											  proclock->tag.xid,
+											  lockmode);
 					return STATUS_OK;
 				}
 				/* Break out of loop to put myself before him */
@@ -653,7 +683,7 @@ ProcSleep(LockMethod lockMethodTable,
 	MyProc->waitHolder = proclock;
 	MyProc->waitLockMode = lockmode;
 
-	MyProc->errType = STATUS_OK;	/* initialize result for success */
+	MyProc->waitStatus = STATUS_ERROR;	/* initialize result for error */
 
 	/*
 	 * If we detected deadlock, give up without waiting.  This must agree
@@ -663,11 +693,13 @@ ProcSleep(LockMethod lockMethodTable,
 	if (early_deadlock)
 	{
 		RemoveFromWaitQueue(MyProc);
-		MyProc->errType = STATUS_ERROR;
 		return STATUS_ERROR;
 	}
 
 	/* mark that we are waiting for a lock */
+	waitingForLockTag = lock->tag;
+	waitingForLockXid = proclock->tag.xid;
+	waitingForLockMode = lockmode;
 	waitingForLock = true;
 
 	/*
@@ -683,7 +715,7 @@ ProcSleep(LockMethod lockMethodTable,
 	/*
 	 * Set timer so we can wake up after awhile and check for a deadlock.
 	 * If a deadlock is detected, the handler releases the process's
-	 * semaphore and sets MyProc->errType = STATUS_ERROR, allowing us to
+	 * semaphore and sets MyProc->waitStatus = STATUS_ERROR, allowing us to
 	 * know that we must report failure rather than success.
 	 *
 	 * By delaying the check until we've waited for a bit, we can avoid
@@ -703,8 +735,10 @@ ProcSleep(LockMethod lockMethodTable,
 	 * We pass interruptOK = true, which eliminates a window in which
 	 * cancel/die interrupts would be held off undesirably.  This is a
 	 * promise that we don't mind losing control to a cancel/die interrupt
-	 * here.  We don't, because we have no state-change work to do after
-	 * being granted the lock (the grantor did it all).
+	 * here.  We don't, because we have no shared-state-change work to do
+	 * after being granted the lock (the grantor did it all).  We do have
+	 * to worry about updating the local CurrentResourceOwner, but if we
+	 * lose control to an error, LockWaitCancel will fix that up.
 	 */
 	PGSemaphoreLock(&MyProc->sem, true);
 
@@ -715,20 +749,32 @@ ProcSleep(LockMethod lockMethodTable,
 		elog(FATAL, "could not disable timer for process wakeup");
 
 	/*
-	 * Now there is nothing for LockWaitCancel to do.
+	 * Re-acquire the locktable's masterLock.  We have to do this to hold
+	 * off cancel/die interrupts before we can mess with waitingForLock
+	 * (else we might have a missed or duplicated CurrentResourceOwner
+	 * update).
+	 */
+	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+
+	/*
+	 * We no longer want LockWaitCancel to do anything.
 	 */
 	waitingForLock = false;
 
 	/*
-	 * Re-acquire the locktable's masterLock.
+	 * If we got the lock, be sure to remember it in CurrentResourceOwner.
 	 */
-	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+	if (MyProc->waitStatus == STATUS_OK)
+		ResourceOwnerRememberLock(CurrentResourceOwner,
+								  &lock->tag,
+								  proclock->tag.xid,
+								  lockmode);
 
 	/*
 	 * We don't have to do anything else, because the awaker did all the
 	 * necessary update of the lock table and MyProc.
 	 */
-	return MyProc->errType;
+	return MyProc->waitStatus;
 }
 
 
@@ -743,7 +789,7 @@ ProcSleep(LockMethod lockMethodTable,
  * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
  */
 PGPROC *
-ProcWakeup(PGPROC *proc, int errType)
+ProcWakeup(PGPROC *proc, int waitStatus)
 {
 	PGPROC	   *retProc;
 
@@ -764,7 +810,7 @@ ProcWakeup(PGPROC *proc, int errType)
 	/* Clean up process' state and pass it the ok/fail signal */
 	proc->waitLock = NULL;
 	proc->waitHolder = NULL;
-	proc->errType = errType;
+	proc->waitStatus = waitStatus;
 
 	/* And awaken it */
 	PGSemaphoreUnlock(&proc->sem);
@@ -891,10 +937,10 @@ CheckDeadLock(void)
 	RemoveFromWaitQueue(MyProc);
 
 	/*
-	 * Set MyProc->errType to STATUS_ERROR so that ProcSleep will report
+	 * Set MyProc->waitStatus to STATUS_ERROR so that ProcSleep will report
 	 * an error after we return from the signal handler.
 	 */
-	MyProc->errType = STATUS_ERROR;
+	MyProc->waitStatus = STATUS_ERROR;
 
 	/*
 	 * Unlock my semaphore so that the interrupted ProcSleep() call can
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 5ad43955e46e4cfd66b3f08bbe9da7f3979e01db..c9efd6341bcc77fd374ce3278e8310cf54cc3894 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,12 +11,13 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.76 2004/07/11 19:52:51 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.77 2004/07/17 03:28:55 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
+#include "access/xact.h"
 #include "commands/tablespace.h"
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
@@ -81,10 +82,15 @@ static HTAB *SMgrRelationHash = NULL;
  * executed immediately, but is just entered in the list.  When and if
  * the transaction commits, we can delete the physical file.
  *
- * The list is kept in CurTransactionContext.  In subtransactions, each
- * subtransaction has its own list in its own CurTransactionContext, but
- * successful subtransactions attach their lists to their parent's list.
- * Failed subtransactions can immediately execute the abort-time actions.
+ * To handle subtransactions, every entry is marked with its transaction
+ * nesting level.  At subtransaction commit, we reassign the subtransaction's
+ * entries to the parent nesting level.  At subtransaction abort, we can
+ * immediately execute the abort-time actions for all entries of the current
+ * nesting level.
+ *
+ * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
+ * unbetimes.  It'd probably be OK to keep it in TopTransactionContext,
+ * but I'm being paranoid.
  */
 
 typedef struct PendingRelDelete
@@ -93,11 +99,11 @@ typedef struct PendingRelDelete
 	int			which;			/* which storage manager? */
 	bool		isTemp;			/* is it a temporary relation? */
 	bool		atCommit;		/* T=delete at commit; F=delete at abort */
+	int			nestLevel;		/* xact nesting level of request */
+	struct PendingRelDelete *next;		/* linked-list link */
 } PendingRelDelete;
 
-static List *pendingDeletes = NIL;		/* head of linked list */
-
-static List *upperPendingDeletes = NIL; /* list of upper-xact lists */
+static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
 
 
 /*
@@ -308,7 +314,6 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
 	XLogRecData		rdata;
 	xl_smgr_create	xlrec;
 	PendingRelDelete *pending;
-	MemoryContext	old_cxt;
 
 	/*
 	 * We may be using the target table space for the first time in this
@@ -349,17 +354,15 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
 	lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLOG_NO_TRAN, &rdata);
 
 	/* Add the relation to the list of stuff to delete at abort */
-	old_cxt = MemoryContextSwitchTo(CurTransactionContext);
-
-	pending = (PendingRelDelete *) palloc(sizeof(PendingRelDelete));
+	pending = (PendingRelDelete *)
+		MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
 	pending->relnode = reln->smgr_rnode;
 	pending->which = reln->smgr_which;
 	pending->isTemp = isTemp;
 	pending->atCommit = false;	/* delete if abort */
-
-	pendingDeletes = lcons(pending, pendingDeletes);
-
-	MemoryContextSwitchTo(old_cxt);
+	pending->nestLevel = GetCurrentTransactionNestLevel();
+	pending->next = pendingDeletes;
+	pendingDeletes = pending;
 }
 
 /*
@@ -374,20 +377,17 @@ void
 smgrscheduleunlink(SMgrRelation reln, bool isTemp)
 {
 	PendingRelDelete *pending;
-	MemoryContext	 old_cxt;
 
 	/* Add the relation to the list of stuff to delete at commit */
-	old_cxt = MemoryContextSwitchTo(CurTransactionContext);
-
-	pending = (PendingRelDelete *) palloc(sizeof(PendingRelDelete));
+	pending = (PendingRelDelete *)
+		MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
 	pending->relnode = reln->smgr_rnode;
 	pending->which = reln->smgr_which;
 	pending->isTemp = isTemp;
 	pending->atCommit = true;	/* delete if commit */
-
-	pendingDeletes = lcons(pending, pendingDeletes);
-
-	MemoryContextSwitchTo(old_cxt);
+	pending->nestLevel = GetCurrentTransactionNestLevel();
+	pending->next = pendingDeletes;
+	pendingDeletes = pending;
 
 	/*
 	 * NOTE: if the relation was created in this transaction, it will now
@@ -647,25 +647,45 @@ smgrimmedsync(SMgrRelation reln)
 
 /*
  *	smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
+ *
+ * This also runs when aborting a subxact; we want to clean up a failed
+ * subxact immediately.
  */
 void
 smgrDoPendingDeletes(bool isCommit)
 {
-	ListCell *p;
+	int			nestLevel = GetCurrentTransactionNestLevel();
+	PendingRelDelete *pending;
+	PendingRelDelete *prev;
+	PendingRelDelete *next;
 
-	foreach(p, pendingDeletes)
+	prev = NULL;
+	for (pending = pendingDeletes; pending != NULL; pending = next)
 	{
-		PendingRelDelete *pending = lfirst(p);
-
-		if (pending->atCommit == isCommit)
-			smgr_internal_unlink(pending->relnode,
-								 pending->which,
-								 pending->isTemp,
-								 false);
+		next = pending->next;
+		if (pending->nestLevel < nestLevel)
+		{
+			/* outer-level entries should not be processed yet */
+			prev = pending;
+		}
+		else
+		{
+			/* unlink list entry first, so we don't retry on failure */
+			if (prev)
+				prev->next = next;
+			else
+				pendingDeletes = next;
+			/* do deletion if called for */
+			if (pending->atCommit == isCommit)
+				smgr_internal_unlink(pending->relnode,
+									 pending->which,
+									 pending->isTemp,
+									 false);
+			/* must explicitly free the list entry */
+			pfree(pending);
+			/* prev does not change */
+		}
 	}
-
-	/* We needn't free the cells since they are in CurTransactionContext */
-	pendingDeletes = NIL;
 }
 
 /*
@@ -681,16 +701,15 @@ smgrDoPendingDeletes(bool isCommit)
 int
 smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
 {
+	int			nestLevel = GetCurrentTransactionNestLevel();
 	int			nrels;
 	RelFileNode *rptr;
-	ListCell	*p;
+	PendingRelDelete *pending;
 
 	nrels = 0;
-	foreach(p, pendingDeletes)
+	for (pending = pendingDeletes; pending != NULL; pending = pending->next)
 	{
-		PendingRelDelete *pending = lfirst(p);
-
-		if (pending->atCommit == forCommit)
+		if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit)
 			nrels++;
 	}
 	if (nrels == 0)
@@ -700,50 +719,30 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
 	}
 	rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode));
 	*ptr = rptr;
-	foreach(p, pendingDeletes)
+	for (pending = pendingDeletes; pending != NULL; pending = pending->next)
 	{
-		PendingRelDelete *pending = lfirst(p);
-
-		if (pending->atCommit == forCommit)
+		if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit)
 			*rptr++ = pending->relnode;
 	}
 	return nrels;
 }
 
-/*
- * AtSubStart_smgr() --- Take care of subtransaction start.
- *
- * Push empty state for the new subtransaction.
- */
-void
-AtSubStart_smgr(void)
-{
-	MemoryContext	old_cxt;
-
-	/* Keep the list-of-lists in TopTransactionContext for simplicity */
-	old_cxt = MemoryContextSwitchTo(TopTransactionContext);
-
-	upperPendingDeletes = lcons(pendingDeletes, upperPendingDeletes);
-
-	pendingDeletes = NIL;
-
-	MemoryContextSwitchTo(old_cxt);
-}
-
 /*
  * AtSubCommit_smgr() --- Take care of subtransaction commit.
  *
- * Reassign all items in the pending deletes list to the parent transaction.
+ * Reassign all items in the pending-deletes list to the parent transaction.
  */
 void
 AtSubCommit_smgr(void)
 {
-	List	*parentPendingDeletes;
-
-	parentPendingDeletes = (List *) linitial(upperPendingDeletes);
-	upperPendingDeletes = list_delete_first(upperPendingDeletes);
+	int			nestLevel = GetCurrentTransactionNestLevel();
+	PendingRelDelete *pending;
 
-	pendingDeletes = list_concat(parentPendingDeletes, pendingDeletes);
+	for (pending = pendingDeletes; pending != NULL; pending = pending->next)
+	{
+		if (pending->nestLevel >= nestLevel)
+			pending->nestLevel = nestLevel - 1;
+	}
 }
 
 /*
@@ -757,10 +756,6 @@ void
 AtSubAbort_smgr(void)
 {
 	smgrDoPendingDeletes(false);
-
-	/* Must pop the stack, too */
-	pendingDeletes = (List *) linitial(upperPendingDeletes);
-	upperPendingDeletes = list_delete_first(upperPendingDeletes);
 }
 
 /*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 54ba0a1fe6fa72cb62f5d96e345b4b9fe9d43b5f..36fb347de3e810b7c12afab1423512c61f2ee114 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.423 2004/07/11 00:18:44 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.424 2004/07/17 03:29:00 tgl Exp $
  *
  * NOTES
  *	  this is the "main" module of the postgres backend and
@@ -2796,6 +2796,12 @@ PostgresMain(int argc, char *argv[], const char *username)
 		DisableCatchupInterrupt();
 		debug_query_string = NULL;
 
+		/*
+		 * If there's an active portal, mark it as failed
+		 */
+		if (ActivePortal)
+			ActivePortal->status = PORTAL_FAILED;
+
 		/*
 		 * Make sure we are in a valid memory context during recovery.
 		 *
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 85d05ed1b8bb79920a3673adfbff76cbae137371..49e2a4b008210b48d1faf26b68b56697279633b3 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tcop/pquery.c,v 1.80 2004/06/05 19:48:08 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tcop/pquery.c,v 1.81 2004/07/17 03:29:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -235,12 +235,25 @@ ChoosePortalStrategy(List *parseTrees)
 void
 PortalStart(Portal portal, ParamListInfo params)
 {
+	Portal		saveActivePortal;
+	ResourceOwner saveResourceOwner;
+	MemoryContext savePortalContext;
 	MemoryContext oldContext;
 	QueryDesc  *queryDesc;
 
 	AssertArg(PortalIsValid(portal));
 	AssertState(portal->queryContext != NULL);	/* query defined? */
-	AssertState(!portal->portalReady);	/* else extra PortalStart */
+	AssertState(portal->status == PORTAL_NEW);	/* else extra PortalStart */
+
+	/*
+	 * Set global portal context pointers.  (Should we set QueryContext?)
+	 */
+	saveActivePortal = ActivePortal;
+	ActivePortal = portal;
+	saveResourceOwner = CurrentResourceOwner;
+	CurrentResourceOwner = portal->resowner;
+	savePortalContext = PortalContext;
+	PortalContext = PortalGetHeapMemory(portal);
 
 	oldContext = MemoryContextSwitchTo(PortalGetHeapMemory(portal));
 
@@ -324,7 +337,11 @@ PortalStart(Portal portal, ParamListInfo params)
 
 	MemoryContextSwitchTo(oldContext);
 
-	portal->portalReady = true;
+	ActivePortal = saveActivePortal;
+	CurrentResourceOwner = saveResourceOwner;
+	PortalContext = savePortalContext;
+
+	portal->status = PORTAL_READY;
 }
 
 /*
@@ -403,12 +420,12 @@ PortalRun(Portal portal, long count,
 {
 	bool		result;
 	Portal		saveActivePortal;
+	ResourceOwner saveResourceOwner;
 	MemoryContext savePortalContext;
 	MemoryContext saveQueryContext;
 	MemoryContext oldContext;
 
 	AssertArg(PortalIsValid(portal));
-	AssertState(portal->portalReady);	/* else no PortalStart */
 
 	/* Initialize completion tag to empty string */
 	if (completionTag)
@@ -425,21 +442,19 @@ PortalRun(Portal portal, long count,
 	/*
 	 * Check for improper portal use, and mark portal active.
 	 */
-	if (portal->portalDone)
+	if (portal->status != PORTAL_READY)
 		ereport(ERROR,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-		   errmsg("portal \"%s\" cannot be run anymore", portal->name)));
-	if (portal->portalActive)
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("portal \"%s\" already active", portal->name)));
-	portal->portalActive = true;
+				 errmsg("portal \"%s\" cannot be run", portal->name)));
+	portal->status = PORTAL_ACTIVE;
 
 	/*
 	 * Set global portal context pointers.
 	 */
 	saveActivePortal = ActivePortal;
 	ActivePortal = portal;
+	saveResourceOwner = CurrentResourceOwner;
+	CurrentResourceOwner = portal->resowner;
 	savePortalContext = PortalContext;
 	PortalContext = PortalGetHeapMemory(portal);
 	saveQueryContext = QueryContext;
@@ -455,6 +470,9 @@ PortalRun(Portal portal, long count,
 			if (completionTag && portal->commandTag)
 				strcpy(completionTag, portal->commandTag);
 
+			/* Mark portal not active */
+			portal->status = PORTAL_READY;
+
 			/*
 			 * Since it's a forward fetch, say DONE iff atEnd is now true.
 			 */
@@ -491,6 +509,9 @@ PortalRun(Portal portal, long count,
 			if (completionTag && portal->commandTag)
 				strcpy(completionTag, portal->commandTag);
 
+			/* Mark portal not active */
+			portal->status = PORTAL_READY;
+
 			/*
 			 * Since it's a forward fetch, say DONE iff atEnd is now true.
 			 */
@@ -499,6 +520,10 @@ PortalRun(Portal portal, long count,
 
 		case PORTAL_MULTI_QUERY:
 			PortalRunMulti(portal, dest, altdest, completionTag);
+
+			/* Prevent portal's commands from being re-executed */
+			portal->status = PORTAL_DONE;
+
 			/* Always complete at end of RunMulti */
 			result = true;
 			break;
@@ -512,10 +537,8 @@ PortalRun(Portal portal, long count,
 
 	MemoryContextSwitchTo(oldContext);
 
-	/* Mark portal not active */
-	portal->portalActive = false;
-
 	ActivePortal = saveActivePortal;
+	CurrentResourceOwner = saveResourceOwner;
 	PortalContext = savePortalContext;
 	QueryContext = saveQueryContext;
 
@@ -914,9 +937,6 @@ PortalRunMulti(Portal portal,
 		else if (strcmp(completionTag, "DELETE") == 0)
 			strcpy(completionTag, "DELETE 0");
 	}
-
-	/* Prevent portal's commands from being re-executed */
-	portal->portalDone = true;
 }
 
 /*
@@ -933,31 +953,29 @@ PortalRunFetch(Portal portal,
 {
 	long		result;
 	Portal		saveActivePortal;
+	ResourceOwner saveResourceOwner;
 	MemoryContext savePortalContext;
 	MemoryContext saveQueryContext;
 	MemoryContext oldContext;
 
 	AssertArg(PortalIsValid(portal));
-	AssertState(portal->portalReady);	/* else no PortalStart */
 
 	/*
 	 * Check for improper portal use, and mark portal active.
 	 */
-	if (portal->portalDone)
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-		   errmsg("portal \"%s\" cannot be run anymore", portal->name)));
-	if (portal->portalActive)
+	if (portal->status != PORTAL_READY)
 		ereport(ERROR,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("portal \"%s\" already active", portal->name)));
-	portal->portalActive = true;
+				 errmsg("portal \"%s\" cannot be run", portal->name)));
+	portal->status = PORTAL_ACTIVE;
 
 	/*
 	 * Set global portal context pointers.
 	 */
 	saveActivePortal = ActivePortal;
 	ActivePortal = portal;
+	saveResourceOwner = CurrentResourceOwner;
+	CurrentResourceOwner = portal->resowner;
 	savePortalContext = PortalContext;
 	PortalContext = PortalGetHeapMemory(portal);
 	saveQueryContext = QueryContext;
@@ -980,9 +998,10 @@ PortalRunFetch(Portal portal,
 	MemoryContextSwitchTo(oldContext);
 
 	/* Mark portal not active */
-	portal->portalActive = false;
+	portal->status = PORTAL_READY;
 
 	ActivePortal = saveActivePortal;
+	CurrentResourceOwner = saveResourceOwner;
 	PortalContext = savePortalContext;
 	QueryContext = saveQueryContext;
 
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile
index 657144ccd8787d26d61442f7be149f38b154edea..d48db1e8188a5bea48f3c21fa294ded9c3b42800 100644
--- a/src/backend/utils/Makefile
+++ b/src/backend/utils/Makefile
@@ -1,14 +1,14 @@
 #
 # Makefile for utils
 #
-# $PostgreSQL: pgsql/src/backend/utils/Makefile,v 1.22 2004/01/04 05:57:21 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/utils/Makefile,v 1.23 2004/07/17 03:29:15 tgl Exp $
 #
 
 subdir = src/backend/utils/
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-SUBDIRS     := adt cache error fmgr hash init misc mmgr sort time mb
+SUBDIRS     := adt cache error fmgr hash init mb misc mmgr resowner sort time
 SUBDIROBJS  := $(SUBDIRS:%=%/SUBSYS.o)
 
 
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 8bfa3610bdb96e0b0d6246b63dd6f01e81b9aaef..c382d7497465201eb3e2da4d5ef3faca30101f56 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/catcache.c,v 1.113 2004/07/01 00:51:17 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/catcache.c,v 1.114 2004/07/17 03:29:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,6 +31,7 @@
 #include "utils/fmgroids.h"
 #include "utils/catcache.h"
 #include "utils/relcache.h"
+#include "utils/resowner.h"
 #include "utils/syscache.h"
 
 
@@ -360,8 +361,6 @@ CatCacheRemoveCTup(CatCache *cache, CatCTup *ct)
 	/* free associated tuple data */
 	if (ct->tuple.t_data != NULL)
 		pfree(ct->tuple.t_data);
-	if (ct->prev_refcount != NULL)
-		pfree(ct->prev_refcount);
 	pfree(ct);
 
 	--cache->cc_ntup;
@@ -396,8 +395,6 @@ CatCacheRemoveCList(CatCache *cache, CatCList *cl)
 	/* free associated tuple data */
 	if (cl->tuple.t_data != NULL)
 		pfree(cl->tuple.t_data);
-	if (cl->prev_refcount != NULL)
-		pfree(cl->prev_refcount);
 	pfree(cl);
 }
 
@@ -531,7 +528,7 @@ CreateCacheMemoryContext(void)
 /*
  *		AtEOXact_CatCache
  *
- * Clean up catcaches at end of transaction (either commit or abort)
+ * Clean up catcaches at end of main transaction (either commit or abort)
  *
  * We scan the caches to reset refcounts to zero.  This is of course
  * necessary in the abort case, since elog() may have interrupted routines.
@@ -564,13 +561,6 @@ AtEOXact_CatCache(bool isCommit)
 				cl->refcount = 0;
 			}
 
-			/*
-			 * Reset the refcount stack.  Drop the item count to zero,
-			 * but don't deallocate the stack itself, so it can be used by
-			 * future subtransactions.
-			 */
-			cl->numpushes = 0;
-
 			/* Clean up any now-deletable dead entries */
 			if (cl->dead)
 				CatCacheRemoveCList(ccp, cl);
@@ -596,174 +586,12 @@ AtEOXact_CatCache(bool isCommit)
 			ct->refcount = 0;
 		}
 
-		/*
-		 * Reset the refcount stack.  Drop the item count to zero,
-		 * but don't deallocate the stack itself, so it can be used by
-		 * future subtransactions.
-		 */
-		ct->numpushes = 0;
-
 		/* Clean up any now-deletable dead entries */
 		if (ct->dead)
 			CatCacheRemoveCTup(ct->my_cache, ct);
 	}
 }
 
-/*
- * AtSubStart_CatCache
- *
- * Saves reference counts of each entry at subtransaction start so they
- * can be restored if the subtransaction later aborts.
- */
-void
-AtSubStart_CatCache(void)
-{
-	CatCache   *ccp;
-	Dlelem	   *elt,
-			   *nextelt;
-	MemoryContext old_cxt;
-   
-
-	old_cxt = MemoryContextSwitchTo(CacheMemoryContext);
-
-	/*
-	 * Prepare CLists
-	 */
-	for (ccp = CacheHdr->ch_caches; ccp; ccp = ccp->cc_next)
-	{
-		for (elt = DLGetHead(&ccp->cc_lists); elt; elt = nextelt)
-		{
-			CatCList   *cl = (CatCList *) DLE_VAL(elt);
-
-			nextelt = DLGetSucc(elt);
-
-			if (cl->numpushes == cl->numalloc)
-			{
-				if (cl->numalloc == 0)
-				{
-					cl->numalloc = 8;
-					cl->prev_refcount = palloc(sizeof(int) * cl->numalloc);
-				}
-				else
-				{
-					cl->numalloc *= 2;
-					cl->prev_refcount = repalloc(cl->prev_refcount, cl->numalloc * sizeof(int));
-				}
-			}
-
-			cl->prev_refcount[cl->numpushes++] = cl->refcount;
-		}
-	}
-
-	/*
-	 * Prepare CTuples
-	 */
-	for (elt = DLGetHead(&CacheHdr->ch_lrulist); elt; elt = nextelt)
-	{
-		CatCTup    *ct = (CatCTup *) DLE_VAL(elt);
-
-		nextelt = DLGetSucc(elt);
-
-		if (ct->numpushes == ct->numalloc)
-		{
-			if (ct->numalloc == 0)
-			{
-				ct->numalloc = 8;
-				ct->prev_refcount = palloc(sizeof(int) * ct->numalloc);
-			}
-			else
-			{
-				ct->numalloc *= 2;
-				ct->prev_refcount = repalloc(ct->prev_refcount, sizeof(int) * ct->numalloc);
-			}
-		}
-
-		ct->prev_refcount[ct->numpushes++] = ct->refcount;
-	}
-
-	MemoryContextSwitchTo(old_cxt);
-}
-
-void
-AtEOSubXact_CatCache(bool isCommit)
-{
-	CatCache   *ccp;
-	Dlelem	   *elt,
-			   *nextelt;
-	
-	/*
-	 * Restore CLists
-	 */
-	for (ccp = CacheHdr->ch_caches; ccp; ccp = ccp->cc_next)
-	{
-		for (elt = DLGetHead(&ccp->cc_lists); elt; elt = nextelt)
-		{
-			CatCList   *cl = (CatCList *) DLE_VAL(elt);
-
-			nextelt = DLGetSucc(elt);
-
-			/*
-			 * During commit, check whether the count is what
-			 * we expect.
-			 */
-			if (isCommit)
-			{
-				int expected_refcount;
-				if (cl->numpushes > 0)
-					expected_refcount = cl->prev_refcount[cl->numpushes - 1];
-				else
-					expected_refcount = 0;
-
-				if (cl->refcount != expected_refcount)
-					elog(WARNING, "catcache reference leak");
-			}
-
-			/*
-			 * During abort we have to restore the original count;
-			 * during commit, we have to restore in case of a leak,
-			 * and it won't harm if this is the expected count.
-			 */
-			if (cl->numpushes > 0)
-				cl->refcount = cl->prev_refcount[--cl->numpushes];
-			else
-				cl->refcount = 0;
-		}
-	}
-
-	/*
-	 * Prepare CTuples
-	 */
-	for (elt = DLGetHead(&CacheHdr->ch_lrulist); elt; elt = nextelt)
-	{
-		CatCTup    *ct = (CatCTup *) DLE_VAL(elt);
-
-		nextelt = DLGetSucc(elt);
-
-		if (isCommit)
-		{
-			int expected_refcount;
-
-			if (ct->numpushes > 0)
-				expected_refcount = ct->prev_refcount[ct->numpushes - 1];
-			else
-				expected_refcount = 0;
-
-			if (ct->refcount != expected_refcount)
-				elog(WARNING, "catcache reference leak");
-		}
-
-		/*
-		 * During abort we have to restore the original count;
-		 * during commit, we have to restore in case of a leak,
-		 * and it won't harm if this is the expected count.
-		 */
-		if (ct->numpushes > 0)
-			ct->refcount = ct->prev_refcount[--ct->numpushes];
-		else
-			ct->refcount = 0;
-	}
-}
-
 /*
  *		ResetCatalogCache
  *
@@ -1334,7 +1162,9 @@ SearchCatCache(CatCache *cache,
 		 */
 		if (!ct->negative)
 		{
+			ResourceOwnerEnlargeCatCacheRefs(CurrentResourceOwner);
 			ct->refcount++;
+			ResourceOwnerRememberCatCacheRef(CurrentResourceOwner, &ct->tuple);
 
 			CACHE3_elog(DEBUG2, "SearchCatCache(%s): found in bucket %d",
 						cache->cc_relname, hashIndex);
@@ -1389,6 +1219,10 @@ SearchCatCache(CatCache *cache,
 		ct = CatalogCacheCreateEntry(cache, ntp,
 									 hashValue, hashIndex,
 									 false);
+		/* immediately set the refcount to 1 */
+		ResourceOwnerEnlargeCatCacheRefs(CurrentResourceOwner);
+		ct->refcount++;
+		ResourceOwnerRememberCatCacheRef(CurrentResourceOwner, &ct->tuple);
 		break;					/* assume only one match */
 	}
 
@@ -1415,10 +1249,9 @@ SearchCatCache(CatCache *cache,
 					cache->cc_relname, hashIndex);
 
 		/*
-		 * We are not returning the new entry to the caller, so reset its
-		 * refcount.
+		 * We are not returning the negative entry to the caller, so leave
+		 * its refcount zero.
 		 */
-		ct->refcount = 0;		/* negative entries never have refs */
 
 		return NULL;
 	}
@@ -1457,6 +1290,7 @@ ReleaseCatCache(HeapTuple tuple)
 	Assert(ct->refcount > 0);
 
 	ct->refcount--;
+	ResourceOwnerForgetCatCacheRef(CurrentResourceOwner, &ct->tuple);
 
 	if (ct->refcount == 0
 #ifndef CATCACHE_FORCE_RELEASE
@@ -1564,7 +1398,10 @@ SearchCatCacheList(CatCache *cache,
 		 * do not move the members to the fronts of their hashbucket
 		 * lists, however, since there's no point in that unless they are
 		 * searched for individually.)	Also bump the members' refcounts.
+		 * (member refcounts are NOT registered separately with the
+		 * resource owner.)
 		 */
+		ResourceOwnerEnlargeCatCacheListRefs(CurrentResourceOwner);
 		for (i = 0; i < cl->n_members; i++)
 		{
 			cl->members[i]->refcount++;
@@ -1574,6 +1411,7 @@ SearchCatCacheList(CatCache *cache,
 
 		/* Bump the list's refcount and return it */
 		cl->refcount++;
+		ResourceOwnerRememberCatCacheListRef(CurrentResourceOwner, cl);
 
 		CACHE2_elog(DEBUG2, "SearchCatCacheList(%s): found list",
 					cache->cc_relname);
@@ -1639,9 +1477,7 @@ SearchCatCacheList(CatCache *cache,
 			if (ct->c_list)
 				continue;
 
-			/* Found a match, so bump its refcount and move to front */
-			ct->refcount++;
-
+			/* Found a match, so move it to front */
 			DLMoveToFront(&ct->lrulist_elem);
 
 			break;
@@ -1655,6 +1491,16 @@ SearchCatCacheList(CatCache *cache,
 										 false);
 		}
 
+		/*
+		 * We have to bump the member refcounts immediately to ensure they
+		 * won't get dropped from the cache while loading other members.
+		 * If we get an error before we finish constructing the CatCList
+		 * then we will leak those reference counts.  This is annoying but
+		 * it has no real consequence beyond possibly generating some
+		 * warning messages at the next transaction commit, so it's not
+		 * worth fixing.
+		 */
+		ct->refcount++;
 		ctlist = lcons(ct, ctlist);
 		nmembers++;
 	}
@@ -1677,10 +1523,7 @@ SearchCatCacheList(CatCache *cache,
 	cl->cl_magic = CL_MAGIC;
 	cl->my_cache = cache;
 	DLInitElem(&cl->cache_elem, (void *) cl);
-	cl->refcount = 1;			/* count this first reference */
-	cl->prev_refcount = NULL;
-	cl->numpushes = 0;
-	cl->numalloc = 0;
+	cl->refcount = 0;			/* for the moment */
 	cl->dead = false;
 	cl->ordered = ordered;
 	cl->nkeys = nkeys;
@@ -1704,6 +1547,11 @@ SearchCatCacheList(CatCache *cache,
 	CACHE3_elog(DEBUG2, "SearchCatCacheList(%s): made list of %d members",
 				cache->cc_relname, nmembers);
 
+	/* Finally, bump the list's refcount and return it */
+	ResourceOwnerEnlargeCatCacheListRefs(CurrentResourceOwner);
+	cl->refcount++;
+	ResourceOwnerRememberCatCacheListRef(CurrentResourceOwner, cl);
+
 	return cl;
 }
 
@@ -1735,6 +1583,7 @@ ReleaseCatCacheList(CatCList *list)
 	}
 
 	list->refcount--;
+	ResourceOwnerForgetCatCacheListRef(CurrentResourceOwner, list);
 
 	if (list->refcount == 0
 #ifndef CATCACHE_FORCE_RELEASE
@@ -1748,7 +1597,7 @@ ReleaseCatCacheList(CatCList *list)
 /*
  * CatalogCacheCreateEntry
  *		Create a new CatCTup entry, copying the given HeapTuple and other
- *		supplied data into it.	The new entry is given refcount 1.
+ *		supplied data into it.	The new entry initially has refcount 0.
  */
 static CatCTup *
 CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp,
@@ -1775,13 +1624,10 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp,
 	DLInitElem(&ct->lrulist_elem, (void *) ct);
 	DLInitElem(&ct->cache_elem, (void *) ct);
 	ct->c_list = NULL;
-	ct->refcount = 1;			/* count this first reference */
+	ct->refcount = 0;			/* for the moment */
 	ct->dead = false;
 	ct->negative = negative;
 	ct->hash_value = hashValue;
-	ct->prev_refcount = NULL;
-	ct->numpushes = 0;
-	ct->numalloc = 0;
 
 	DLAddHead(&CacheHdr->ch_lrulist, &ct->lrulist_elem);
 	DLAddHead(&cache->cc_bucket[hashIndex], &ct->cache_elem);
@@ -1791,8 +1637,8 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp,
 
 	/*
 	 * If we've exceeded the desired size of the caches, try to throw away
-	 * the least recently used entry.  NB: the newly-built entry cannot
-	 * get thrown away here, because it has positive refcount.
+	 * the least recently used entry.  NB: be careful not to throw away
+	 * the newly-built entry...
 	 */
 	if (CacheHdr->ch_ntup > CacheHdr->ch_maxtup)
 	{
@@ -1805,7 +1651,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp,
 
 			prevelt = DLGetPred(elt);
 
-			if (oldct->refcount == 0)
+			if (oldct->refcount == 0 && oldct != ct)
 			{
 				CACHE2_elog(DEBUG2, "CatCacheCreateEntry(%s): Overflow, LRU removal",
 							cache->cc_relname);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 23428992724c3ebb680a7059180d6dea7a14738b..c4787042c08d26c7913c741f4d7a52ba91c73b37 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.206 2004/07/01 00:51:17 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.207 2004/07/17 03:29:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,6 +62,7 @@
 #include "utils/inval.h"
 #include "utils/lsyscache.h"
 #include "utils/relcache.h"
+#include "utils/resowner.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
@@ -273,8 +274,6 @@ static void IndexSupportInitialize(Form_pg_index iform,
 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
 				  StrategyNumber numStrats,
 				  StrategyNumber numSupport);
-static inline void RelationPushReferenceCount(Relation rel);
-static inline void RelationPopReferenceCount(Relation rel);
 
 
 /*
@@ -829,17 +828,13 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo,
 	 */
 	RelationGetRelid(relation) = relid;
 
-	/*
-	 * initialize relation->rd_refcnt
-	 */
-	RelationSetReferenceCount(relation, 1);
-
 	/*
 	 * normal relations are not nailed into the cache; nor can a
 	 * pre-existing relation be new.  It could be temp though.	(Actually,
 	 * it could be new too, but it's okay to forget that fact if forced to
 	 * flush the entry.)
 	 */
+	relation->rd_refcnt = 0;
 	relation->rd_isnailed = 0;
 	relation->rd_isnew = false;
 	relation->rd_istemp = isTempNamespace(relation->rd_rel->relnamespace);
@@ -1280,9 +1275,9 @@ formrdesc(const char *relationName,
 	relation->rd_smgr = NULL;
 
 	/*
-	 * initialize reference count
+	 * initialize reference count: 1 because it is nailed in cache
 	 */
-	RelationSetReferenceCount(relation, 1);
+	relation->rd_refcnt = 1;
 
 	/*
 	 * all entries built with this routine are nailed-in-cache; none are
@@ -1487,6 +1482,8 @@ RelationIdGetRelation(Oid relationId)
 	buildinfo.i.info_id = relationId;
 
 	rd = RelationBuildDesc(buildinfo, NULL);
+	if (RelationIsValid(rd))
+		RelationIncrementReferenceCount(rd);
 	return rd;
 }
 
@@ -1516,6 +1513,8 @@ RelationSysNameGetRelation(const char *relationName)
 	buildinfo.i.info_name = (char *) relationName;
 
 	rd = RelationBuildDesc(buildinfo, NULL);
+	if (RelationIsValid(rd))
+		RelationIncrementReferenceCount(rd);
 	return rd;
 }
 
@@ -1524,6 +1523,36 @@ RelationSysNameGetRelation(const char *relationName)
  * ----------------------------------------------------------------
  */
 
+/*
+ * RelationIncrementReferenceCount
+ *		Increments relation reference count.
+ *
+ * Note: bootstrap mode has its own weird ideas about relation refcount
+ * behavior; we ought to fix it someday, but for now, just disable
+ * reference count ownership tracking in bootstrap mode.
+ */
+void
+RelationIncrementReferenceCount(Relation rel)
+{
+	ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
+	rel->rd_refcnt += 1;
+	if (!IsBootstrapProcessingMode())
+		ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
+}
+
+/*
+ * RelationDecrementReferenceCount
+ *		Decrements relation reference count.
+ */
+void
+RelationDecrementReferenceCount(Relation rel)
+{
+	Assert(rel->rd_refcnt > 0);
+	rel->rd_refcnt -= 1;
+	if (!IsBootstrapProcessingMode())
+		ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
+}
+
 /*
  * RelationClose - close an open relation
  *
@@ -1680,8 +1709,6 @@ RelationClearRelation(Relation relation, bool rebuild)
 	list_free(relation->rd_indexlist);
 	if (relation->rd_indexcxt)
 		MemoryContextDelete(relation->rd_indexcxt);
-	if (relation->rd_prevrefcnt)
-		pfree(relation->rd_prevrefcnt);
 
 	/*
 	 * If we're really done with the relcache entry, blow it away. But if
@@ -1704,6 +1731,10 @@ RelationClearRelation(Relation relation, bool rebuild)
 		 * When rebuilding an open relcache entry, must preserve ref count
 		 * and rd_isnew flag.  Also attempt to preserve the tupledesc and
 		 * rewrite-rule substructures in place.
+		 *
+		 * Note that this process does not touch CurrentResourceOwner;
+		 * which is good because whatever ref counts the entry may have
+		 * do not necessarily belong to that resource owner.
 		 */
 		int			old_refcnt = relation->rd_refcnt;
 		bool		old_isnew = relation->rd_isnew;
@@ -1726,7 +1757,7 @@ RelationClearRelation(Relation relation, bool rebuild)
 			elog(ERROR, "relation %u deleted while still in use",
 				 buildinfo.i.info_id);
 		}
-		RelationSetReferenceCount(relation, old_refcnt);
+		relation->rd_refcnt = old_refcnt;
 		relation->rd_isnew = old_isnew;
 		if (equalTupleDescs(old_att, relation->rd_att))
 		{
@@ -1964,7 +1995,7 @@ RelationCacheInvalidate(void)
 /*
  * AtEOXact_RelationCache
  *
- *	Clean up the relcache at transaction commit or abort.
+ *	Clean up the relcache at main-transaction commit or abort.
  *
  * Note: this must be called *before* processing invalidation messages.
  * In the case of abort, we don't want to try to rebuild any invalidated
@@ -2031,21 +2062,15 @@ AtEOXact_RelationCache(bool isCommit)
 				elog(WARNING, "relcache reference leak: relation \"%s\" has refcnt %d instead of %d",
 					 RelationGetRelationName(relation),
 					 relation->rd_refcnt, expected_refcnt);
-				RelationSetReferenceCount(relation, expected_refcnt);
+				relation->rd_refcnt = expected_refcnt;
 			}
 		}
 		else
 		{
 			/* abort case, just reset it quietly */
-			RelationSetReferenceCount(relation, expected_refcnt);
+			relation->rd_refcnt = expected_refcnt;
 		}
 
-		/*
-		 * Reset the refcount stack.  Just drop the item count; don't deallocate
-		 * the stack itself so it can be reused by future subtransactions.
-		 */
-		relation->rd_numpushed = 0;
-
 		/*
 		 * Flush any temporary index list.
 		 */
@@ -2058,131 +2083,6 @@ AtEOXact_RelationCache(bool isCommit)
 	}
 }
 
-/*
- * RelationPushReferenceCount
- *
- * Push the current reference count into the stack.  Don't modify the
- * reference count itself.
- */
-static inline void
-RelationPushReferenceCount(Relation rel)
-{
-	/* Enlarge the stack if we run out of space. */
-	if (rel->rd_numpushed == rel->rd_numalloc)
-	{
-		MemoryContext	old_cxt = MemoryContextSwitchTo(CacheMemoryContext);
-
-		if (rel->rd_numalloc == 0)
-		{
-			rel->rd_numalloc = 8;
-			rel->rd_prevrefcnt = palloc(rel->rd_numalloc * sizeof(int));
-		}
-		else
-		{
-			rel->rd_numalloc *= 2;
-			rel->rd_prevrefcnt = repalloc(rel->rd_prevrefcnt, rel->rd_numalloc * sizeof(int));
-		}
-
-		MemoryContextSwitchTo(old_cxt);
-	}
-
-	rel->rd_prevrefcnt[rel->rd_numpushed++] = rel->rd_refcnt;
-}
-
-/*
- * RelationPopReferenceCount
- *
- * Pop the latest stored reference count.  If there is none, drop it
- * to zero; the entry was created in the current subtransaction.
- */
-static inline void
-RelationPopReferenceCount(Relation rel)
-{
-	if (rel->rd_numpushed == 0)
-	{
-		rel->rd_refcnt = rel->rd_isnailed ? 1 : 0;
-		return;
-	}
-
-	rel->rd_refcnt = rel->rd_prevrefcnt[--rel->rd_numpushed];
-}
-
-/*
- * AtEOSubXact_RelationCache
- */
-void
-AtEOSubXact_RelationCache(bool isCommit)
-{
-	HASH_SEQ_STATUS status;
-	RelIdCacheEnt *idhentry;
-
-	/* We'd better not be bootstrapping. */
-	Assert(!IsBootstrapProcessingMode());
-
-	hash_seq_init(&status, RelationIdCache);
-
-	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
-	{
-		Relation	relation = idhentry->reldesc;
-
-		/*
-		 * During subtransaction commit, we first check whether the
-		 * current refcount is correct: if there is no item in the stack,
-		 * the relcache entry was created during this subtransaction, it should
-		 * be 0 (or 1 for nailed relations).  If the stack has at least one
-		 * item, the expected count is whatever that item is.
-		 */
-		if (isCommit)
-		{
-			int expected_refcnt;
-
-			if (relation->rd_numpushed == 0)
-				expected_refcnt = relation->rd_isnailed ? 1 : 0;
-			else
-				expected_refcnt = relation->rd_prevrefcnt[relation->rd_numpushed - 1];
-
-			if (relation->rd_refcnt != expected_refcnt)
-			{
-				elog(WARNING, "relcache reference leak: relation \"%s\" has refcnt %d instead of %d",
-						RelationGetRelationName(relation),
-						relation->rd_refcnt, expected_refcnt);
-			}
-		}
-
-		/*
-		 * On commit, the expected count is stored so there's no harm in
-		 * popping it (and we may need to fix if there was a leak); and during
-		 * abort, the correct refcount has to be restored.
-		 */
-		RelationPopReferenceCount(relation);
-	}
-}
-
-/*
- * AtSubStart_RelationCache
- *
- * At subtransaction start, we push the current reference count into
- * the refcount stack, so it can be restored if the subtransaction aborts.
- */
-void
-AtSubStart_RelationCache(void)
-{
-	HASH_SEQ_STATUS status;
-	RelIdCacheEnt *idhentry;
-
-	/* We'd better not be bootstrapping. */
-	Assert(!IsBootstrapProcessingMode());
-
-	hash_seq_init(&status, RelationIdCache);
-
-	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
-	{
-		Relation	relation = idhentry->reldesc;
-
-		RelationPushReferenceCount(relation);
-	}
-}
-
 /*
  *		RelationBuildLocalRelation
  *			Build a relcache entry for an about-to-be-created relation,
@@ -2223,7 +2123,7 @@ RelationBuildLocalRelation(const char *relname,
 	/* make sure relation is marked as having no open file yet */
 	rel->rd_smgr = NULL;
 
-	RelationSetReferenceCount(rel, 1);
+	rel->rd_refcnt = nailit ? 1 : 0;
 
 	/* it's being created in this transaction */
 	rel->rd_isnew = true;
@@ -2305,6 +2205,11 @@ RelationBuildLocalRelation(const char *relname,
 	 */
 	MemoryContextSwitchTo(oldcxt);
 
+	/*
+	 * Caller expects us to pin the returned entry.
+	 */
+	RelationIncrementReferenceCount(rel);
+
 	return rel;
 }
 
@@ -2422,7 +2327,7 @@ RelationCacheInitializePhase2(void)
 			buildinfo.i.info_name = (indname); \
 			ird = RelationBuildDesc(buildinfo, NULL); \
 			ird->rd_isnailed = 1; \
-			RelationSetReferenceCount(ird, 1); \
+			ird->rd_refcnt = 1; \
 		} while (0)
 
 		LOAD_CRIT_INDEX(ClassNameNspIndex);
@@ -3201,9 +3106,9 @@ load_relcache_init_file(void)
 		rel->rd_smgr = NULL;
 		rel->rd_targblock = InvalidBlockNumber;
 		if (rel->rd_isnailed)
-			RelationSetReferenceCount(rel, 1);
+			rel->rd_refcnt = 1;
 		else
-			RelationSetReferenceCount(rel, 0);
+			rel->rd_refcnt = 0;
 		rel->rd_indexvalid = 0;
 		rel->rd_indexlist = NIL;
 		MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index 466b2fc97bf635cb638c147593b718b394eeb524..2093dc2a4636dff21834492c44be7eaab6945b92 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.66 2004/07/01 00:51:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.67 2004/07/17 03:29:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -186,6 +186,10 @@ CreatePortal(const char *name, bool allowDup, bool dupSilent)
 										 ALLOCSET_SMALL_INITSIZE,
 										 ALLOCSET_SMALL_MAXSIZE);
 
+	/* create a resource owner for the portal */
+	portal->resowner = ResourceOwnerCreate(CurTransactionResourceOwner,
+										   "Portal");
+
 	/* initialize portal fields that don't start off zero */
 	portal->cleanup = PortalCleanup;
 	portal->createXact = GetCurrentTransactionId();
@@ -291,17 +295,14 @@ PortalCreateHoldStore(Portal portal)
 /*
  * PortalDrop
  *		Destroy the portal.
- *
- *		isError: if true, we are destroying portals at the end of a failed
- *		transaction.  (This causes PortalCleanup to skip unneeded steps.)
  */
 void
-PortalDrop(Portal portal, bool isError)
+PortalDrop(Portal portal, bool isTopCommit)
 {
 	AssertArg(PortalIsValid(portal));
 
 	/* Not sure if this case can validly happen or not... */
-	if (portal->portalActive)
+	if (portal->status == PORTAL_ACTIVE)
 		elog(ERROR, "cannot drop active portal");
 
 	/*
@@ -314,7 +315,49 @@ PortalDrop(Portal portal, bool isError)
 
 	/* let portalcmds.c clean up the state it knows about */
 	if (PointerIsValid(portal->cleanup))
-		(*portal->cleanup) (portal, isError);
+		(*portal->cleanup) (portal);
+
+	/*
+	 * Release any resources still attached to the portal.  There are
+	 * several cases being covered here:
+	 *
+	 * Top transaction commit (indicated by isTopCommit): normally we should
+	 * do nothing here and let the regular end-of-transaction resource
+	 * releasing mechanism handle these resources too.  However, if we have
+	 * a FAILED portal (eg, a cursor that got an error), we'd better clean
+	 * up its resources to avoid resource-leakage warning messages.
+	 *
+	 * Sub transaction commit: never comes here at all, since we don't
+	 * kill any portals in AtSubCommit_Portals().
+	 *
+	 * Main or sub transaction abort: we will do nothing here because
+	 * portal->resowner was already set NULL; the resources were already
+	 * cleaned up in transaction abort.
+	 *
+	 * Ordinary portal drop: must release resources.  However, if the portal
+	 * is not FAILED then we do not release its locks.  The locks become
+	 * the responsibility of the transaction's ResourceOwner (since it is
+	 * the parent of the portal's owner) and will be released when the
+	 * transaction eventually ends.
+	 */
+	if (portal->resowner &&
+		(!isTopCommit || portal->status == PORTAL_FAILED))
+	{
+		bool	isCommit = (portal->status != PORTAL_FAILED);
+
+		ResourceOwnerRelease(portal->resowner,
+							 RESOURCE_RELEASE_BEFORE_LOCKS,
+							 isCommit, false);
+		ResourceOwnerRelease(portal->resowner,
+							 RESOURCE_RELEASE_LOCKS,
+							 isCommit, false);
+		ResourceOwnerRelease(portal->resowner,
+							 RESOURCE_RELEASE_AFTER_LOCKS,
+							 isCommit, false);
+		if (!isCommit)
+			ResourceOwnerDelete(portal->resowner);
+	}
+	portal->resowner = NULL;
 
 	/*
 	 * Delete tuplestore if present.  We should do this even under error
@@ -396,19 +439,29 @@ AtCommit_Portals(void)
 		/*
 		 * Do not touch active portals --- this can only happen in the
 		 * case of a multi-transaction utility command, such as VACUUM.
+		 *
+		 * Note however that any resource owner attached to such a portal
+		 * is still going to go away, so don't leave a dangling pointer.
 		 */
-		if (portal->portalActive)
+		if (portal->status == PORTAL_ACTIVE)
+		{
+			portal->resowner = NULL;
 			continue;
+		}
 
-		if (portal->cursorOptions & CURSOR_OPT_HOLD)
-		{
-			/*
-			 * Do nothing to cursors held over from a previous
-			 * transaction.
-			 */
-			if (portal->createXact != xact)
-				continue;
+		/*
+		 * Do nothing else to cursors held over from a previous
+		 * transaction. (This test must include checking CURSOR_OPT_HOLD,
+		 * else we will fail to clean up a VACUUM portal if it fails after
+		 * its first sub-transaction.)
+		 */
+		if (portal->createXact != xact &&
+			(portal->cursorOptions & CURSOR_OPT_HOLD))
+			continue;
 
+		if ((portal->cursorOptions & CURSOR_OPT_HOLD) &&
+			portal->status == PORTAL_READY)
+		{
 			/*
 			 * We are exiting the transaction that created a holdable
 			 * cursor.	Instead of dropping the portal, prepare it for
@@ -420,11 +473,18 @@ AtCommit_Portals(void)
 			 */
 			PortalCreateHoldStore(portal);
 			PersistHoldablePortal(portal);
+
+			/*
+			 * Any resources belonging to the portal will be released in the
+			 * upcoming transaction-wide cleanup; the portal will no
+			 * longer have its own resources.
+			 */
+			portal->resowner = NULL;
 		}
 		else
 		{
 			/* Zap all non-holdable portals */
-			PortalDrop(portal, false);
+			PortalDrop(portal, true);
 		}
 	}
 }
@@ -432,13 +492,11 @@ AtCommit_Portals(void)
 /*
  * Abort processing for portals.
  *
- * At this point we reset the "active" flags and run the cleanup hook if
+ * At this point we reset "active" status and run the cleanup hook if
  * present, but we can't release memory until the cleanup call.
  *
  * The reason we need to reset active is so that we can replace the unnamed
- * portal, else we'll fail to execute ROLLBACK when it arrives.  Also, we
- * want to run the cleanup hook now to be certain it knows that we had an
- * error abort and not successful conclusion.
+ * portal, else we'll fail to execute ROLLBACK when it arrives.
  */
 void
 AtAbort_Portals(void)
@@ -453,7 +511,8 @@ AtAbort_Portals(void)
 	{
 		Portal		portal = hentry->portal;
 
-		portal->portalActive = false;
+		if (portal->status == PORTAL_ACTIVE)
+			portal->status = PORTAL_FAILED;
 
 		/*
 		 * Do nothing else to cursors held over from a previous
@@ -468,17 +527,22 @@ AtAbort_Portals(void)
 		/* let portalcmds.c clean up the state it knows about */
 		if (PointerIsValid(portal->cleanup))
 		{
-			(*portal->cleanup) (portal, true);
+			(*portal->cleanup) (portal);
 			portal->cleanup = NULL;
 		}
+		/*
+		 * Any resources belonging to the portal will be released in the
+		 * upcoming transaction-wide cleanup; they will be gone before
+		 * we run PortalDrop.
+		 */
+		portal->resowner = NULL;
 	}
 }
 
 /*
  * Post-abort cleanup for portals.
  *
- * Delete all portals not held over from prior transactions.
- */
+ * Delete all portals not held over from prior transactions.  */
 void
 AtCleanup_Portals(void)
 {
@@ -492,10 +556,9 @@ AtCleanup_Portals(void)
 	{
 		Portal		portal = hentry->portal;
 
-		/*
-		 * Let's just make sure no one's active...
-		 */
-		portal->portalActive = false;
+		/* AtAbort_Portals should have fixed these: */
+		Assert(portal->status != PORTAL_ACTIVE);
+		Assert(portal->resowner == NULL);
 
 		/*
 		 * Do nothing else to cursors held over from a previous
@@ -507,8 +570,8 @@ AtCleanup_Portals(void)
 			(portal->cursorOptions & CURSOR_OPT_HOLD))
 			continue;
 
-		/* Else zap it with prejudice. */
-		PortalDrop(portal, true);
+		/* Else zap it. */
+		PortalDrop(portal, false);
 	}
 }
 
@@ -516,11 +579,11 @@ AtCleanup_Portals(void)
  * Pre-subcommit processing for portals.
  *
  * Reassign the portals created in the current subtransaction to the parent
- * transaction.  (XXX perhaps we should reassign only holdable cursors,
- * and drop the rest?)
+ * transaction.
  */
 void
-AtSubCommit_Portals(TransactionId parentXid)
+AtSubCommit_Portals(TransactionId parentXid,
+					ResourceOwner parentXactOwner)
 {
 	HASH_SEQ_STATUS status;
 	PortalHashEnt *hentry;
@@ -533,19 +596,24 @@ AtSubCommit_Portals(TransactionId parentXid)
 		Portal	portal = hentry->portal;
 
 		if (portal->createXact == curXid)
+		{
 			portal->createXact = parentXid;
+			if (portal->resowner)
+				ResourceOwnerNewParent(portal->resowner, parentXactOwner);
+		}
 	}
 }
 
 /*
  * Subtransaction abort handling for portals.
  *
- * Deactivate all portals created during the failed subtransaction.
+ * Deactivate failed portals created during the failed subtransaction.
  * Note that per AtSubCommit_Portals, this will catch portals created
  * in descendants of the subtransaction too.
  */
 void
-AtSubAbort_Portals(void)
+AtSubAbort_Portals(TransactionId parentXid,
+				   ResourceOwner parentXactOwner)
 {
 	HASH_SEQ_STATUS status;
 	PortalHashEnt *hentry;
@@ -560,13 +628,39 @@ AtSubAbort_Portals(void)
 		if (portal->createXact != curXid)
 			continue;
 
-		portal->portalActive = false;
+		/*
+		 * Force any active portals of my own transaction into FAILED state.
+		 * This is mostly to ensure that a portal running a FETCH will go
+		 * FAILED if the underlying cursor fails.  (Note we do NOT want to
+		 * do this to upper-level portals, since they may be able to continue.)
+		 */
+ 		if (portal->status == PORTAL_ACTIVE)
+			portal->status = PORTAL_FAILED;
 
-		/* let portalcmds.c clean up the state it knows about */
-		if (PointerIsValid(portal->cleanup))
+		/*
+		 * If the portal is READY then allow it to survive into the
+		 * parent transaction; otherwise shut it down.
+		 */
+		if (portal->status == PORTAL_READY)
 		{
-			(*portal->cleanup) (portal, true);
-			portal->cleanup = NULL;
+			portal->createXact = parentXid;
+			if (portal->resowner)
+				ResourceOwnerNewParent(portal->resowner, parentXactOwner);
+		}
+		else
+		{
+			/* let portalcmds.c clean up the state it knows about */
+			if (PointerIsValid(portal->cleanup))
+			{
+				(*portal->cleanup) (portal);
+				portal->cleanup = NULL;
+			}
+			/*
+			 * Any resources belonging to the portal will be released in the
+			 * upcoming transaction-wide cleanup; they will be gone before
+			 * we run PortalDrop.
+			 */
+			portal->resowner = NULL;
 		}
 	}
 }
@@ -574,8 +668,8 @@ AtSubAbort_Portals(void)
 /*
  * Post-subabort cleanup for portals.
  *
- * Drop all portals created in the finishing subtransaction and all
- * its descendants.
+ * Drop all portals created in the failed subtransaction (but note that
+ * we will not drop any that were reassigned to the parent above).
  */
 void
 AtSubCleanup_Portals(void)
@@ -593,12 +687,11 @@ AtSubCleanup_Portals(void)
 		if (portal->createXact != curXid)
 			continue;
 
-		/*
-		 * Let's just make sure no one's active...
-		 */
-		portal->portalActive = false;
+		/* AtSubAbort_Portals should have fixed these: */
+		Assert(portal->status != PORTAL_ACTIVE);
+		Assert(portal->resowner == NULL);
 
-		/* Zap it with prejudice. */
-		PortalDrop(portal, true);
+		/* Zap it. */
+		PortalDrop(portal, false);
 	}
 }
diff --git a/src/backend/utils/resowner/Makefile b/src/backend/utils/resowner/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..691d316f162b2fd94d7a9606ff2af41970de8381
--- /dev/null
+++ b/src/backend/utils/resowner/Makefile
@@ -0,0 +1,30 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for utils/resowner
+#
+# IDENTIFICATION
+#    $PostgreSQL: pgsql/src/backend/utils/resowner/Makefile,v 1.1 2004/07/17 03:30:10 tgl Exp $
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/utils/resowner
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = resowner.o
+
+all: SUBSYS.o
+
+SUBSYS.o: $(OBJS)
+	$(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
+
+depend dep:
+	$(CC) -MM $(CFLAGS) *.c >depend
+
+clean: 
+	rm -f SUBSYS.o $(OBJS)
+
+ifeq (depend,$(wildcard depend))
+include depend
+endif
diff --git a/src/backend/utils/resowner/README b/src/backend/utils/resowner/README
new file mode 100644
index 0000000000000000000000000000000000000000..27180f6aff70005bd783f0b5930cdc448aa91183
--- /dev/null
+++ b/src/backend/utils/resowner/README
@@ -0,0 +1,74 @@
+$PostgreSQL: pgsql/src/backend/utils/resowner/README,v 1.1 2004/07/17 03:30:10 tgl Exp $
+
+Notes about resource owners
+---------------------------
+
+ResourceOwner objects are a concept invented to simplify management of
+query-related resources, such as buffer pins and table locks.  These
+resources need to be tracked in a reliable way to ensure that they will
+be released at query end, even if the query fails due to an error.
+Rather than expecting the entire executor to have bulletproof data
+structures, we localize the tracking of such resources into a single
+module.
+
+The design of the ResourceOwner API is modeled on our MemoryContext API,
+which has proven very flexible and successful in preventing memory leaks.
+In particular we allow ResourceOwners to have child ResourceOwner objects
+so that there can be forests of the things; releasing a parent
+ResourceOwner acts on all its direct and indirect children as well.
+
+(It is tempting to consider unifying ResourceOwners and MemoryContexts
+into a single object type, but their usage patterns are sufficiently
+different that this is probably not really a helpful thing to do.)
+
+We create a ResourceOwner for each transaction or subtransaction as
+well as one for each Portal.  During execution of a Portal, the global
+variable CurrentResourceOwner points to the Portal's ResourceOwner.
+This causes operations such as ReadBuffer and LockAcquire to record
+ownership of the acquired resources in that ResourceOwner object.
+
+When a Portal is closed, any remaining resources (typically only locks)
+become the responsibility of the current transaction.  This is represented
+by making the Portal's ResourceOwner a child of the current transaction's
+ResourceOwner.  Similarly, subtransaction ResourceOwners are children of
+their immediate parent.
+
+We need transaction-related ResourceOwners as well as Portal-related ones
+because transactions may initiate operations that require resources (such
+as query parsing) when no associated Portal exists yet.
+
+
+API overview
+------------
+
+The basic operations on a ResourceOwner are:
+
+* create a ResourceOwner
+
+* associate or deassociate some resource with a ResourceOwner
+
+* release a ResourceOwner's assets (free all owned resources, but not the
+  owner object itself)
+
+* delete a ResourceOwner (including child owner objects); all resources
+  must have been released beforehand
+
+Currently, ResourceOwners contain direct support for recording ownership
+of buffer pins, lmgr locks, and catcache and relcache references.  Other
+objects can be associated with a ResourceOwner by recording the address of
+the owning ResourceOwner in such an object.  There is an API for other
+modules to get control during ResourceOwner release, so that they can scan
+their own data structures to find the objects that need to be deleted.
+
+Whenever we are inside a transaction, the global variable
+CurrentResourceOwner shows which resource owner should be assigned
+ownership of acquired resources.  Note however that CurrentResourceOwner
+is NULL when not inside any transaction (or when inside a failed
+transaction).  In this case it is not valid to acquire query-lifespan
+resources.
+
+When unpinning a buffer or releasing a lock or cache reference,
+CurrentResourceOwner must point to the same resource owner that was current
+when the buffer, lock, or cache reference was acquired.  It would be possible
+to relax this restriction given additional bookkeeping effort, but at present
+there seems no need.
diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c
new file mode 100644
index 0000000000000000000000000000000000000000..e2eb1183ef41bb5d8feb87cd7fe8ba4b6f064bfa
--- /dev/null
+++ b/src/backend/utils/resowner/resowner.c
@@ -0,0 +1,840 @@
+/*-------------------------------------------------------------------------
+ *
+ * resowner.c
+ *	  POSTGRES resource owner management code.
+ *
+ * Query-lifespan resources are tracked by associating them with
+ * ResourceOwner objects.  This provides a simple mechanism for ensuring
+ * that such resources are freed at the right time.
+ * See utils/resowner/README for more info.
+ *
+ *
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/utils/resowner/resowner.c,v 1.1 2004/07/17 03:30:10 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "utils/resowner.h"
+#include "access/gistscan.h"
+#include "access/hash.h"
+#include "access/rtree.h"
+#include "storage/bufmgr.h"
+#include "storage/proc.h"
+#include "utils/memutils.h"
+#include "utils/relcache.h"
+
+
+/*
+ * Info needed to identify/release a lock
+ */
+typedef struct LockIdData
+{
+	/* we assume lockmethodid is part of locktag */
+	LOCKTAG		locktag;
+	TransactionId xid;
+	LOCKMODE	lockmode;
+} LockIdData;
+
+
+/*
+ * ResourceOwner objects look like this
+ */
+typedef struct ResourceOwnerData
+{
+	ResourceOwner parent;		/* NULL if no parent (toplevel owner) */
+	ResourceOwner firstchild;	/* head of linked list of children */
+	ResourceOwner nextchild;	/* next child of same parent */
+	const char   *name;			/* name (just for debugging) */
+
+	/* We have built-in support for remembering owned buffers */
+	int			nbuffers;		/* number of owned buffer pins */
+	Buffer	   *buffers;		/* dynamically allocated array */
+	int			maxbuffers;		/* currently allocated array size */
+
+	/* We have built-in support for remembering owned locks */
+	int			nlocks;			/* number of owned locks */
+	LockIdData *locks;			/* dynamically allocated array */
+	int			maxlocks;		/* currently allocated array size */
+
+	/* We have built-in support for remembering catcache references */
+	int			ncatrefs;		/* number of owned catcache pins */
+	HeapTuple  *catrefs;		/* dynamically allocated array */
+	int			maxcatrefs;		/* currently allocated array size */
+
+	int			ncatlistrefs;	/* number of owned catcache-list pins */
+	CatCList  **catlistrefs;	/* dynamically allocated array */
+	int			maxcatlistrefs;	/* currently allocated array size */
+
+	/* We have built-in support for remembering relcache references */
+	int			nrelrefs;		/* number of owned relcache pins */
+	Relation   *relrefs;		/* dynamically allocated array */
+	int			maxrelrefs;		/* currently allocated array size */
+} ResourceOwnerData;
+
+
+/*****************************************************************************
+ *	  GLOBAL MEMORY															 *
+ *****************************************************************************/
+
+ResourceOwner CurrentResourceOwner = NULL;
+ResourceOwner CurTransactionResourceOwner = NULL;
+ResourceOwner TopTransactionResourceOwner = NULL;
+
+/*
+ * List of add-on callbacks for resource releasing
+ */
+typedef struct ResourceReleaseCallbackItem
+{
+	struct ResourceReleaseCallbackItem *next;
+	ResourceReleaseCallback callback;
+	void	   *arg;
+} ResourceReleaseCallbackItem;
+
+static ResourceReleaseCallbackItem *ResourceRelease_callbacks = NULL;
+
+
+/*****************************************************************************
+ *	  EXPORTED ROUTINES														 *
+ *****************************************************************************/
+
+
+/*
+ * ResourceOwnerCreate
+ *		Create an empty ResourceOwner.
+ *
+ * All ResourceOwner objects are kept in TopMemoryContext, since they should
+ * only be freed explicitly.
+ */
+ResourceOwner
+ResourceOwnerCreate(ResourceOwner parent, const char *name)
+{
+	ResourceOwner owner;
+
+	owner = (ResourceOwner) MemoryContextAllocZero(TopMemoryContext,
+												   sizeof(ResourceOwnerData));
+	owner->name = name;
+
+	if (parent)
+	{
+		owner->parent = parent;
+		owner->nextchild = parent->firstchild;
+		parent->firstchild = owner;
+	}
+
+	return owner;
+}
+
+/*
+ * ResourceOwnerRelease
+ *		Release all resources owned by a ResourceOwner and its descendants,
+ *		but don't delete the owner objects themselves.
+ *
+ * Note that this executes just one phase of release, and so typically
+ * must be called three times.  We do it this way because (a) we want to
+ * do all the recursion separately for each phase, thereby preserving
+ * the needed order of operations; and (b) xact.c may have other operations
+ * to do between the phases.
+ *
+ * phase: release phase to execute
+ * isCommit: true for successful completion of a query or transaction,
+ *			false for unsuccessful
+ * isTopLevel: true if completing a main transaction, else false
+ *
+ * isCommit is passed because some modules may expect that their resources
+ * were all released already if the transaction or portal finished normally.
+ * If so it is reasonable to give a warning (NOT an error) should any
+ * unreleased resources be present.  When isCommit is false, such warnings
+ * are generally inappropriate.
+ *
+ * isTopLevel is passed when we are releasing TopTransactionResourceOwner
+ * at completion of a main transaction.  This generally means that *all*
+ * resources will be released, and so we can optimize things a bit.
+ */
+void
+ResourceOwnerRelease(ResourceOwner owner,
+					 ResourceReleasePhase phase,
+					 bool isCommit,
+					 bool isTopLevel)
+{
+	ResourceOwner child;
+	ResourceOwner save;
+	ResourceReleaseCallbackItem *item;
+
+	/* Recurse to handle descendants */
+	for (child = owner->firstchild; child != NULL; child = child->nextchild)
+		ResourceOwnerRelease(child, phase, isCommit, isTopLevel);
+
+	/*
+	 * Make CurrentResourceOwner point to me, so that ReleaseBuffer etc
+	 * don't get confused.
+	 */
+	save = CurrentResourceOwner;
+	CurrentResourceOwner = owner;
+
+	if (phase == RESOURCE_RELEASE_BEFORE_LOCKS)
+	{
+		/* Release buffer pins */
+		if (isTopLevel)
+		{
+			/*
+			 * For a top-level xact we are going to release all buffers,
+			 * so just do a single bufmgr call at the top of the recursion.
+			 */
+			if (owner == TopTransactionResourceOwner)
+				AtEOXact_Buffers(isCommit);
+			/* Mark object as owning no buffers, just for sanity */
+			owner->nbuffers = 0;
+		}
+		else
+		{
+			/*
+			 * Release buffers retail.  Note that ReleaseBuffer will remove
+			 * the buffer entry from my list, so I just have to iterate till
+			 * there are none.
+			 *
+			 * XXX this is fairly inefficient due to multiple BufMgrLock grabs
+			 * if there are lots of buffers to be released, but we don't
+			 * expect many (indeed none in the success case) so it's probably
+			 * not worth optimizing.
+			 *
+			 * We are however careful to release back-to-front, so as to
+			 * avoid O(N^2) behavior in ResourceOwnerForgetBuffer().
+			 */
+			while (owner->nbuffers > 0)
+				ReleaseBuffer(owner->buffers[owner->nbuffers - 1]);
+		}
+		/* Release relcache references */
+		if (isTopLevel)
+		{
+			/*
+			 * For a top-level xact we are going to release all references,
+			 * so just do a single relcache call at the top of the recursion.
+			 */
+			if (owner == TopTransactionResourceOwner)
+				AtEOXact_RelationCache(isCommit);
+			/* Mark object as owning no relrefs, just for sanity */
+			owner->nrelrefs = 0;
+		}
+		else
+		{
+			/*
+			 * Release relcache refs retail.  Note that RelationClose will
+			 * remove the relref entry from my list, so I just have to iterate
+			 * till there are none.
+			 */
+			while (owner->nrelrefs > 0)
+				RelationClose(owner->relrefs[owner->nrelrefs - 1]);
+		}
+	}
+	else if (phase == RESOURCE_RELEASE_LOCKS)
+	{
+		if (isTopLevel)
+		{
+			/*
+			 * For a top-level xact we are going to release all locks (or at
+			 * least all non-session locks), so just do a single lmgr call
+			 * at the top of the recursion.
+			 */
+			if (owner == TopTransactionResourceOwner)
+				ProcReleaseLocks(isCommit);
+			/* Mark object as holding no locks, just for sanity */
+			owner->nlocks = 0;
+		}
+		else if (!isCommit)
+		{
+			/*
+			 * Release locks retail.  Note that LockRelease will remove
+			 * the lock entry from my list, so I just have to iterate till
+			 * there are none.  Also note that if we are committing a
+			 * subtransaction, we do NOT release its locks yet.
+			 *
+			 * XXX as above, this is a bit inefficient but probably not worth
+			 * the trouble to optimize more.
+			 */
+			while (owner->nlocks > 0)
+			{
+				LockIdData *lockid = &owner->locks[owner->nlocks - 1];
+
+				LockRelease(lockid->locktag.lockmethodid,
+							&lockid->locktag,
+							lockid->xid,
+							lockid->lockmode);
+			}
+		}
+	}
+	else if (phase == RESOURCE_RELEASE_AFTER_LOCKS)
+	{
+		/* Release catcache references */
+		if (isTopLevel)
+		{
+			/*
+			 * For a top-level xact we are going to release all references,
+			 * so just do a single catcache call at the top of the recursion.
+			 */
+			if (owner == TopTransactionResourceOwner)
+				AtEOXact_CatCache(isCommit);
+			/* Mark object as owning no catrefs, just for sanity */
+			owner->ncatrefs = 0;
+			owner->ncatlistrefs = 0;
+		}
+		else
+		{
+			/*
+			 * Release catcache refs retail.  Note that ReleaseCatCache will
+			 * remove the catref entry from my list, so I just have to iterate
+			 * till there are none.  Ditto for catcache lists.
+			 */
+			while (owner->ncatrefs > 0)
+				ReleaseCatCache(owner->catrefs[owner->ncatrefs - 1]);
+			while (owner->ncatlistrefs > 0)
+				ReleaseCatCacheList(owner->catlistrefs[owner->ncatlistrefs - 1]);
+		}
+		/* Clean up index scans too */
+		ReleaseResources_gist();
+		ReleaseResources_hash();
+		ReleaseResources_rtree();
+	}
+
+	/* Let add-on modules get a chance too */
+	for (item = ResourceRelease_callbacks; item; item = item->next)
+		(*item->callback) (phase, isCommit, isTopLevel, item->arg);
+
+	CurrentResourceOwner = save;
+}
+
+/*
+ * ResourceOwnerDelete
+ *		Delete an owner object and its descendants.
+ *
+ * The caller must have already released all resources in the object tree.
+ */
+void
+ResourceOwnerDelete(ResourceOwner owner)
+{
+	/* We had better not be deleting CurrentResourceOwner ... */
+	Assert(owner != CurrentResourceOwner);
+
+	/* And it better not own any resources, either */
+	Assert(owner->nbuffers == 0);
+	Assert(owner->nlocks == 0);
+	Assert(owner->ncatrefs == 0);
+	Assert(owner->ncatlistrefs == 0);
+	Assert(owner->nrelrefs == 0);
+
+	/*
+	 * Delete children.  The recursive call will delink the child
+	 * from me, so just iterate as long as there is a child.
+	 */
+	while (owner->firstchild != NULL)
+		ResourceOwnerDelete(owner->firstchild);
+
+	/*
+	 * We delink the owner from its parent before deleting it, so that
+	 * if there's an error we won't have deleted/busted owners still
+	 * attached to the owner tree.  Better a leak than a crash.
+	 */
+	ResourceOwnerNewParent(owner, NULL);
+
+	/* And free the object. */
+	if (owner->buffers)
+		pfree(owner->buffers);
+	if (owner->locks)
+		pfree(owner->locks);
+	if (owner->catrefs)
+		pfree(owner->catrefs);
+	if (owner->catlistrefs)
+		pfree(owner->catlistrefs);
+	if (owner->relrefs)
+		pfree(owner->relrefs);
+
+	pfree(owner);
+}
+
+/*
+ * Reassign a ResourceOwner to have a new parent
+ */
+void
+ResourceOwnerNewParent(ResourceOwner owner,
+					   ResourceOwner newparent)
+{
+	ResourceOwner oldparent = owner->parent;
+
+	if (oldparent)
+	{
+		if (owner == oldparent->firstchild)
+			oldparent->firstchild = owner->nextchild;
+		else
+		{
+			ResourceOwner child;
+
+			for (child = oldparent->firstchild; child; child = child->nextchild)
+			{
+				if (owner == child->nextchild)
+				{
+					child->nextchild = owner->nextchild;
+					break;
+				}
+			}
+		}
+	}
+
+	if (newparent)
+	{
+		Assert(owner != newparent);
+		owner->parent = newparent;
+		owner->nextchild = newparent->firstchild;
+		newparent->firstchild = owner;
+	}
+	else
+	{
+		owner->parent = NULL;
+		owner->nextchild = NULL;
+	}
+}
+
+/*
+ * Register or deregister callback functions for resource cleanup
+ *
+ * These functions are intended for use by dynamically loaded modules.
+ * For built-in modules we generally just hardwire the appropriate calls.
+ *
+ * Note that the callback occurs post-commit or post-abort, so the callback
+ * functions can only do noncritical cleanup.
+ */
+void
+RegisterResourceReleaseCallback(ResourceReleaseCallback callback, void *arg)
+{
+	ResourceReleaseCallbackItem *item;
+
+	item = (ResourceReleaseCallbackItem *)
+		MemoryContextAlloc(TopMemoryContext,
+						   sizeof(ResourceReleaseCallbackItem));
+	item->callback = callback;
+	item->arg = arg;
+	item->next = ResourceRelease_callbacks;
+	ResourceRelease_callbacks = item;
+}
+
+void
+UnregisterResourceReleaseCallback(ResourceReleaseCallback callback, void *arg)
+{
+	ResourceReleaseCallbackItem *item;
+	ResourceReleaseCallbackItem *prev;
+
+	prev = NULL;
+	for (item = ResourceRelease_callbacks; item; prev = item, item = item->next)
+	{
+		if (item->callback == callback && item->arg == arg)
+		{
+			if (prev)
+				prev->next = item->next;
+			else
+				ResourceRelease_callbacks = item->next;
+			pfree(item);
+			break;
+		}
+	}
+}
+
+
+/*
+ * Make sure there is room for at least one more entry in a ResourceOwner's
+ * buffer array.
+ *
+ * This is separate from actually inserting an entry because if we run out
+ * of memory, it's critical to do so *before* acquiring the resource.
+ *
+ * We allow the case owner == NULL because the bufmgr is sometimes invoked
+ * outside any transaction (for example, in the bgwriter).
+ */
+void
+ResourceOwnerEnlargeBuffers(ResourceOwner owner)
+{
+	int			newmax;
+
+	if (owner == NULL ||
+		owner->nbuffers < owner->maxbuffers)
+		return;					/* nothing to do */
+
+	if (owner->buffers == NULL)
+	{
+		newmax = 16;
+		owner->buffers = (Buffer *)
+			MemoryContextAlloc(TopMemoryContext, newmax * sizeof(Buffer));
+		owner->maxbuffers = newmax;
+	}
+	else
+	{
+		newmax = owner->maxbuffers * 2;
+		owner->buffers = (Buffer *)
+			repalloc(owner->buffers, newmax * sizeof(Buffer));
+		owner->maxbuffers = newmax;
+	}
+}
+
+/*
+ * Remember that a buffer pin is owned by a ResourceOwner
+ *
+ * Caller must have previously done ResourceOwnerEnlargeBuffers()
+ *
+ * We allow the case owner == NULL because the bufmgr is sometimes invoked
+ * outside any transaction (for example, in the bgwriter).
+ */
+void
+ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
+{
+	if (owner != NULL)
+	{
+		Assert(owner->nbuffers < owner->maxbuffers);
+		owner->buffers[owner->nbuffers] = buffer;
+		owner->nbuffers++;
+	}
+}
+
+/*
+ * Forget that a buffer pin is owned by a ResourceOwner
+ *
+ * We allow the case owner == NULL because the bufmgr is sometimes invoked
+ * outside any transaction (for example, in the bgwriter).
+ */
+void
+ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
+{
+	if (owner != NULL)
+	{
+		Buffer	   *buffers = owner->buffers;
+		int			nb1 = owner->nbuffers - 1;
+		int			i;
+
+		/*
+		 * Scan back-to-front because it's more likely we are releasing
+		 * a recently pinned buffer.  This isn't always the case of course,
+		 * but it's the way to bet.
+		 */
+		for (i = nb1; i >= 0; i--)
+		{
+			if (buffers[i] == buffer)
+			{
+				while (i < nb1)
+				{
+					buffers[i] = buffers[i + 1];
+					i++;
+				}
+				owner->nbuffers = nb1;
+				return;
+			}
+		}
+		elog(ERROR, "buffer %d is not owned by resource owner %s",
+			 buffer, owner->name);
+	}
+}
+
+/*
+ * Make sure there is room for at least one more entry in a ResourceOwner's
+ * lock array.
+ *
+ * This is separate from actually inserting an entry because if we run out
+ * of memory, it's critical to do so *before* acquiring the resource.
+ */
+void
+ResourceOwnerEnlargeLocks(ResourceOwner owner)
+{
+	int			newmax;
+
+	if (owner->nlocks < owner->maxlocks)
+		return;					/* nothing to do */
+
+	if (owner->locks == NULL)
+	{
+		newmax = 16;
+		owner->locks = (LockIdData *)
+			MemoryContextAlloc(TopMemoryContext, newmax * sizeof(LockIdData));
+		owner->maxlocks = newmax;
+	}
+	else
+	{
+		newmax = owner->maxlocks * 2;
+		owner->locks = (LockIdData *)
+			repalloc(owner->locks, newmax * sizeof(LockIdData));
+		owner->maxlocks = newmax;
+	}
+}
+
+/*
+ * Remember that a lock is owned by a ResourceOwner
+ *
+ * Caller must have previously done ResourceOwnerEnlargeLocks()
+ */
+void
+ResourceOwnerRememberLock(ResourceOwner owner,
+						  LOCKTAG *locktag,
+						  TransactionId xid,
+						  LOCKMODE lockmode)
+{
+	/* Session locks and user locks are not transactional */
+	if (xid != InvalidTransactionId &&
+		locktag->lockmethodid == DEFAULT_LOCKMETHOD)
+	{
+		Assert(owner->nlocks < owner->maxlocks);
+		owner->locks[owner->nlocks].locktag = *locktag;
+		owner->locks[owner->nlocks].xid = xid;
+		owner->locks[owner->nlocks].lockmode = lockmode;
+		owner->nlocks++;
+	}
+}
+
+/*
+ * Forget that a lock is owned by a ResourceOwner
+ */
+void
+ResourceOwnerForgetLock(ResourceOwner owner,
+						LOCKTAG *locktag,
+						TransactionId xid,
+						LOCKMODE lockmode)
+{
+	/* Session locks and user locks are not transactional */
+	if (xid != InvalidTransactionId &&
+		locktag->lockmethodid == DEFAULT_LOCKMETHOD)
+	{
+		LockIdData *locks = owner->locks;
+		int			nl1 = owner->nlocks - 1;
+		int			i;
+
+		for (i = nl1; i >= 0; i--)
+		{
+			if (memcmp(&locks[i].locktag, locktag, sizeof(LOCKTAG)) == 0 &&
+				locks[i].xid == xid &&
+				locks[i].lockmode == lockmode)
+			{
+				while (i < nl1)
+				{
+					locks[i] = locks[i + 1];
+					i++;
+				}
+				owner->nlocks = nl1;
+				return;
+			}
+		}
+		elog(ERROR, "lock %u/%u/%u is not owned by resource owner %s",
+			 locktag->relId, locktag->dbId, locktag->objId.xid, owner->name);
+	}
+}
+
+/*
+ * Make sure there is room for at least one more entry in a ResourceOwner's
+ * catcache reference array.
+ *
+ * This is separate from actually inserting an entry because if we run out
+ * of memory, it's critical to do so *before* acquiring the resource.
+ */
+void
+ResourceOwnerEnlargeCatCacheRefs(ResourceOwner owner)
+{
+	int			newmax;
+
+	if (owner->ncatrefs < owner->maxcatrefs)
+		return;					/* nothing to do */
+
+	if (owner->catrefs == NULL)
+	{
+		newmax = 16;
+		owner->catrefs = (HeapTuple *)
+			MemoryContextAlloc(TopMemoryContext, newmax * sizeof(HeapTuple));
+		owner->maxcatrefs = newmax;
+	}
+	else
+	{
+		newmax = owner->maxcatrefs * 2;
+		owner->catrefs = (HeapTuple *)
+			repalloc(owner->catrefs, newmax * sizeof(HeapTuple));
+		owner->maxcatrefs = newmax;
+	}
+}
+
+/*
+ * Remember that a catcache reference is owned by a ResourceOwner
+ *
+ * Caller must have previously done ResourceOwnerEnlargeCatCacheRefs()
+ */
+void
+ResourceOwnerRememberCatCacheRef(ResourceOwner owner, HeapTuple tuple)
+{
+	Assert(owner->ncatrefs < owner->maxcatrefs);
+	owner->catrefs[owner->ncatrefs] = tuple;
+	owner->ncatrefs++;
+}
+
+/*
+ * Forget that a catcache reference is owned by a ResourceOwner
+ */
+void
+ResourceOwnerForgetCatCacheRef(ResourceOwner owner, HeapTuple tuple)
+{
+	HeapTuple  *catrefs = owner->catrefs;
+	int			nc1 = owner->ncatrefs - 1;
+	int			i;
+
+	for (i = nc1; i >= 0; i--)
+	{
+		if (catrefs[i] == tuple)
+		{
+			while (i < nc1)
+			{
+				catrefs[i] = catrefs[i + 1];
+				i++;
+			}
+			owner->ncatrefs = nc1;
+			return;
+		}
+	}
+	elog(ERROR, "catcache reference %p is not owned by resource owner %s",
+		 tuple, owner->name);
+}
+
+/*
+ * Make sure there is room for at least one more entry in a ResourceOwner's
+ * catcache-list reference array.
+ *
+ * This is separate from actually inserting an entry because if we run out
+ * of memory, it's critical to do so *before* acquiring the resource.
+ */
+void
+ResourceOwnerEnlargeCatCacheListRefs(ResourceOwner owner)
+{
+	int			newmax;
+
+	if (owner->ncatlistrefs < owner->maxcatlistrefs)
+		return;					/* nothing to do */
+
+	if (owner->catlistrefs == NULL)
+	{
+		newmax = 16;
+		owner->catlistrefs = (CatCList **)
+			MemoryContextAlloc(TopMemoryContext, newmax * sizeof(CatCList *));
+		owner->maxcatlistrefs = newmax;
+	}
+	else
+	{
+		newmax = owner->maxcatlistrefs * 2;
+		owner->catlistrefs = (CatCList **)
+			repalloc(owner->catlistrefs, newmax * sizeof(CatCList *));
+		owner->maxcatlistrefs = newmax;
+	}
+}
+
+/*
+ * Remember that a catcache-list reference is owned by a ResourceOwner
+ *
+ * Caller must have previously done ResourceOwnerEnlargeCatCacheListRefs()
+ */
+void
+ResourceOwnerRememberCatCacheListRef(ResourceOwner owner, CatCList *list)
+{
+	Assert(owner->ncatlistrefs < owner->maxcatlistrefs);
+	owner->catlistrefs[owner->ncatlistrefs] = list;
+	owner->ncatlistrefs++;
+}
+
+/*
+ * Forget that a catcache-list reference is owned by a ResourceOwner
+ */
+void
+ResourceOwnerForgetCatCacheListRef(ResourceOwner owner, CatCList *list)
+{
+	CatCList  **catlistrefs = owner->catlistrefs;
+	int			nc1 = owner->ncatlistrefs - 1;
+	int			i;
+
+	for (i = nc1; i >= 0; i--)
+	{
+		if (catlistrefs[i] == list)
+		{
+			while (i < nc1)
+			{
+				catlistrefs[i] = catlistrefs[i + 1];
+				i++;
+			}
+			owner->ncatlistrefs = nc1;
+			return;
+		}
+	}
+	elog(ERROR, "catcache list reference %p is not owned by resource owner %s",
+		 list, owner->name);
+}
+
+/*
+ * Make sure there is room for at least one more entry in a ResourceOwner's
+ * relcache reference array.
+ *
+ * This is separate from actually inserting an entry because if we run out
+ * of memory, it's critical to do so *before* acquiring the resource.
+ */
+void
+ResourceOwnerEnlargeRelationRefs(ResourceOwner owner)
+{
+	int			newmax;
+
+	if (owner->nrelrefs < owner->maxrelrefs)
+		return;					/* nothing to do */
+
+	if (owner->relrefs == NULL)
+	{
+		newmax = 16;
+		owner->relrefs = (Relation *)
+			MemoryContextAlloc(TopMemoryContext, newmax * sizeof(Relation));
+		owner->maxrelrefs = newmax;
+	}
+	else
+	{
+		newmax = owner->maxrelrefs * 2;
+		owner->relrefs = (Relation *)
+			repalloc(owner->relrefs, newmax * sizeof(Relation));
+		owner->maxrelrefs = newmax;
+	}
+}
+
+/*
+ * Remember that a relcache reference is owned by a ResourceOwner
+ *
+ * Caller must have previously done ResourceOwnerEnlargeRelationRefs()
+ */
+void
+ResourceOwnerRememberRelationRef(ResourceOwner owner, Relation rel)
+{
+	Assert(owner->nrelrefs < owner->maxrelrefs);
+	owner->relrefs[owner->nrelrefs] = rel;
+	owner->nrelrefs++;
+}
+
+/*
+ * Forget that a relcache reference is owned by a ResourceOwner
+ */
+void
+ResourceOwnerForgetRelationRef(ResourceOwner owner, Relation rel)
+{
+	Relation   *relrefs = owner->relrefs;
+	int			nr1 = owner->nrelrefs - 1;
+	int			i;
+
+	for (i = nr1; i >= 0; i--)
+	{
+		if (relrefs[i] == rel)
+		{
+			while (i < nr1)
+			{
+				relrefs[i] = relrefs[i + 1];
+				i++;
+			}
+			owner->nrelrefs = nr1;
+			return;
+		}
+	}
+	elog(ERROR, "relcache reference %s is not owned by resource owner %s",
+		 RelationGetRelationName(rel), owner->name);
+}
diff --git a/src/include/access/gistscan.h b/src/include/access/gistscan.h
index 4022f542752fa83e6d50015804cec65fe3bf96bc..d0afed75b71f4eee2509c7657752ed1ed1aa52a8 100644
--- a/src/include/access/gistscan.h
+++ b/src/include/access/gistscan.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/gistscan.h,v 1.23 2004/07/01 00:51:38 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/gistscan.h,v 1.24 2004/07/17 03:30:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,7 +22,6 @@ extern Datum gistmarkpos(PG_FUNCTION_ARGS);
 extern Datum gistrestrpos(PG_FUNCTION_ARGS);
 extern Datum gistendscan(PG_FUNCTION_ARGS);
 extern void gistadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum);
-extern void AtEOXact_gist(void);
-extern void AtEOSubXact_gist(TransactionId childXid);
+extern void ReleaseResources_gist(void);
 
 #endif   /* GISTSCAN_H */
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 2088cc2f5a6c25d1c0ba6f7a3cff939ce088c46d..41afe630296a3e424495a48d2b9bfe7af363d4f5 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.55 2004/07/01 00:51:38 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.56 2004/07/17 03:30:38 tgl Exp $
  *
  * NOTES
  *		modeled after Margo Seltzer's hash implementation for unix.
@@ -292,8 +292,7 @@ extern void _hash_expandtable(Relation rel, Buffer metabuf);
 extern void _hash_regscan(IndexScanDesc scan);
 extern void _hash_dropscan(IndexScanDesc scan);
 extern bool _hash_has_active_scan(Relation rel, Bucket bucket);
-extern void AtEOXact_hash(void);
-extern void AtEOSubXact_hash(TransactionId childXid);
+extern void ReleaseResources_hash(void);
 
 /* hashsearch.c */
 extern bool _hash_next(IndexScanDesc scan, ScanDirection dir);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 6e2491532adcb6c045e40ab64777f586f6995a49..bf95d8ba23da9df7662135af44d31e623bf82b66 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.79 2004/07/11 18:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.80 2004/07/17 03:30:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -402,8 +402,6 @@ typedef BTScanOpaqueData *BTScanOpaque;
 /*
  * prototypes for functions in nbtree.c (external entry points for btree)
  */
-extern void AtEOXact_nbtree(void);
-
 extern Datum btbuild(PG_FUNCTION_ARGS);
 extern Datum btinsert(PG_FUNCTION_ARGS);
 extern Datum btgettuple(PG_FUNCTION_ARGS);
diff --git a/src/include/access/rtree.h b/src/include/access/rtree.h
index 5b5347e9a08a080d7cd1bb0476368fc7e0396230..a23b00283fbe915df56ffd863af75e9015f77a47 100644
--- a/src/include/access/rtree.h
+++ b/src/include/access/rtree.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/rtree.h,v 1.33 2004/07/01 00:51:38 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/rtree.h,v 1.34 2004/07/17 03:30:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -129,8 +129,7 @@ extern void rtree_desc(char *buf, uint8 xl_info, char *rec);
 /* rtscan.c */
 extern void rtadjscans(Relation r, int op, BlockNumber blkno,
 		   OffsetNumber offnum);
-extern void AtEOXact_rtree(void);
-extern void AtEOSubXact_rtree(TransactionId childXid);
+extern void ReleaseResources_rtree(void);
 
 /* rtstrat.c */
 extern StrategyNumber RTMapToInternalOperator(StrategyNumber strat);
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index c5b66afd0df9fb19b9dab2488909dc7fda7411eb..714518d308cba2b894925787d604471368a0b4e3 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -7,17 +7,16 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.64 2004/07/01 00:51:38 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.65 2004/07/17 03:30:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef XACT_H
 #define XACT_H
 
-#include "access/transam.h"
 #include "access/xlog.h"
 #include "utils/nabstime.h"
-#include "utils/timestamp.h"
+
 
 /*
  * Xact isolation levels
@@ -40,63 +39,11 @@ extern int	XactIsoLevel;
 extern bool DefaultXactReadOnly;
 extern bool XactReadOnly;
 
-/*
- *	transaction states - transaction state from server perspective
- */
-typedef enum TransState
-{
-	TRANS_DEFAULT,
-	TRANS_START,
-	TRANS_INPROGRESS,
-	TRANS_COMMIT,
-	TRANS_ABORT
-} TransState;
-
-/*
- *	transaction block states - transaction state of client queries
- */
-typedef enum TBlockState
-{
-	TBLOCK_DEFAULT,
-	TBLOCK_STARTED,
-	TBLOCK_BEGIN,
-	TBLOCK_INPROGRESS,
-	TBLOCK_END,
-	TBLOCK_ABORT,
-	TBLOCK_ENDABORT,
-
-	TBLOCK_SUBBEGIN,
-	TBLOCK_SUBBEGINABORT,
-	TBLOCK_SUBINPROGRESS,
-	TBLOCK_SUBEND,
-	TBLOCK_SUBABORT,
-	TBLOCK_SUBENDABORT_OK,
-	TBLOCK_SUBENDABORT_ERROR
-} TBlockState;
-
 /*
  *	end-of-transaction cleanup callbacks for dynamically loaded modules
  */
 typedef void (*EOXactCallback) (bool isCommit, void *arg);
 
-/*
- *	transaction state structure
- */
-typedef struct TransactionStateData
-{
-	TransactionId	transactionIdData;		/* my XID */
-	CommandId		commandId;				/* current CID */
-	TransState		state;					/* low-level state */
-	TBlockState		blockState;				/* high-level state */
-	int				nestingLevel;			/* nest depth */
-	MemoryContext	curTransactionContext;	/* my xact-lifetime context */
-	List		   *childXids;				/* subcommitted child XIDs */
-	AclId			currentUser;			/* subxact start current_user */
-	struct TransactionStateData *parent;	/* back link to parent */
-} TransactionStateData;
-
-typedef TransactionStateData *TransactionState;
-
 
 /* ----------------
  *		transaction-related XLOG entries
@@ -168,7 +115,7 @@ extern void UnregisterEOXactCallback(EOXactCallback callback, void *arg);
 
 extern void RecordTransactionCommit(void);
 
-extern int	xactGetCommittedChildren(TransactionId **ptr, bool metoo);
+extern int	xactGetCommittedChildren(TransactionId **ptr);
 
 extern void XactPushRollback(void (*func) (void *), void *data);
 extern void XactPopRollback(void);
diff --git a/src/include/commands/portalcmds.h b/src/include/commands/portalcmds.h
index 0a2bc9ad19d082ea242d8153f1d220516923754c..60e7d524d29c5c88e890c8b60b2c1d7688f7b9de 100644
--- a/src/include/commands/portalcmds.h
+++ b/src/include/commands/portalcmds.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/commands/portalcmds.h,v 1.14 2003/11/29 22:40:59 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/commands/portalcmds.h,v 1.15 2004/07/17 03:30:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -24,7 +24,7 @@ extern void PerformPortalFetch(FetchStmt *stmt, DestReceiver *dest,
 
 extern void PerformPortalClose(const char *name);
 
-extern void PortalCleanup(Portal portal, bool isError);
+extern void PortalCleanup(Portal portal);
 
 extern void PersistHoldablePortal(Portal portal);
 
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index e992751f856b01751e79825e982a37a414e16db8..273e63a521597a2647955f26835729462775c307 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.83 2004/07/01 00:51:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.84 2004/07/17 03:31:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -92,31 +92,6 @@ extern int32 *LocalRefCount;
 	) \
 )
 
-/*
- * IncrBufferRefCount
- *		Increment the pin count on a buffer that we have *already* pinned
- *		at least once.
- *
- *		This macro cannot be used on a buffer we do not have pinned,
- *		because it doesn't change the shared buffer state.  Therefore the
- *		Assert checks are for refcount > 0.  Someone got this wrong once...
- */
-#define IncrBufferRefCount(buffer) \
-( \
-	BufferIsLocal(buffer) ? \
-	( \
-		(void) AssertMacro((buffer) >= -NLocBuffer), \
-		(void) AssertMacro(LocalRefCount[-(buffer) - 1] > 0), \
-		(void) LocalRefCount[-(buffer) - 1]++ \
-	) \
-	: \
-	( \
-		(void) AssertMacro(!BAD_BUFFER_ID(buffer)), \
-		(void) AssertMacro(PrivateRefCount[(buffer) - 1] > 0), \
-		(void) PrivateRefCount[(buffer) - 1]++ \
-	) \
-)
-
 /*
  * BufferGetBlock
  *		Returns a reference to a disk page image associated with a buffer.
@@ -138,6 +113,7 @@ extern int32 *LocalRefCount;
  */
 extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
 extern void ReleaseBuffer(Buffer buffer);
+extern void IncrBufferRefCount(Buffer buffer);
 extern void WriteBuffer(Buffer buffer);
 extern void WriteNoReleaseBuffer(Buffer buffer);
 extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
@@ -148,8 +124,6 @@ extern void InitBufferPoolAccess(void);
 extern char *ShowBufferUsage(void);
 extern void ResetBufferUsage(void);
 extern void AtEOXact_Buffers(bool isCommit);
-extern void AtSubStart_Buffers(void);
-extern void AtEOSubXact_Buffers(bool isCommit);
 extern void FlushBufferPool(void);
 extern BlockNumber BufferGetBlockNumber(Buffer buffer);
 extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index 650b326949733fd0a596ba224227324039f7bc6a..1289fee7eca41463e3fb92bae6db7b1d4c71f030 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.78 2004/07/01 00:51:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.79 2004/07/17 03:31:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,14 +26,6 @@ typedef struct PROC_QUEUE
 	int			size;			/* number of entries in list */
 } PROC_QUEUE;
 
-/* Release options for LockReleaseAll */
-typedef enum
-{
-	ReleaseAll,					/* All my locks */
-	ReleaseAllExceptSession,	/* All except session locks (Xid = 0) */
-	ReleaseGivenXids			/* Only locks with Xids in given array */
-} LockReleaseWhich;
-
 /* struct PGPROC is declared in storage/proc.h, but must forward-reference it */
 typedef struct PGPROC PGPROC;
 
@@ -248,7 +240,7 @@ extern bool LockAcquire(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
 extern bool LockRelease(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
 			TransactionId xid, LOCKMODE lockmode);
 extern bool LockReleaseAll(LOCKMETHODID lockmethodid, PGPROC *proc,
-			   LockReleaseWhich which, int nxids, TransactionId *xids);
+						   bool allxids);
 extern int LockCheckConflicts(LockMethod lockMethodTable,
 				   LOCKMODE lockmode,
 				   LOCK *lock, PROCLOCK *proclock, PGPROC *proc,
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 1551d7568c597718069529077a583b3fd950ef7c..8645fb1fdb55e701a29c558392bd5dc5ed64ad1f 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.68 2004/07/01 00:51:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.69 2004/07/17 03:31:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,7 +34,7 @@ struct PGPROC
 	SHM_QUEUE	links;			/* list link if process is in a list */
 
 	PGSemaphoreData sem;		/* ONE semaphore to sleep on */
-	int			errType;		/* STATUS_OK or STATUS_ERROR after wakeup */
+	int			waitStatus;		/* STATUS_OK or STATUS_ERROR after wakeup */
 
 	TransactionId xid;			/* transaction currently being executed by
 								 * this proc */
@@ -103,13 +103,12 @@ extern int	ProcGlobalSemas(int maxBackends);
 extern void InitProcGlobal(int maxBackends);
 extern void InitProcess(void);
 extern void InitDummyProcess(int proctype);
-extern void ProcReleaseLocks(LockReleaseWhich which,
-							 int nxids, TransactionId *xids);
+extern void ProcReleaseLocks(bool isCommit);
 
 extern void ProcQueueInit(PROC_QUEUE *queue);
 extern int ProcSleep(LockMethod lockMethodTable, LOCKMODE lockmode,
 		  LOCK *lock, PROCLOCK *proclock);
-extern PGPROC *ProcWakeup(PGPROC *proc, int errType);
+extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
 extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
 extern bool LockWaitCancel(void);
 
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index e4f0930ef7ab60f94889f8205b7afdadf3bf27ff..f808120682da5d8b3cb6c59311c55135130d2bbf 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.45 2004/07/01 00:51:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.46 2004/07/17 03:31:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,7 +66,6 @@ extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
 extern void smgrimmedsync(SMgrRelation reln);
 extern void smgrDoPendingDeletes(bool isCommit);
 extern int	smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
-extern void AtSubStart_smgr(void);
 extern void AtSubCommit_smgr(void);
 extern void AtSubAbort_smgr(void);
 extern void smgrcommit(void);
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index 3ce54b99a253880b91fe06d1b5479c491c6cd9ec..627c172793bdbde7c1228cafa215a4bdf9d3f1f6 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/catcache.h,v 1.49 2004/07/01 00:51:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/catcache.h,v 1.50 2004/07/17 03:31:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -101,9 +101,6 @@ typedef struct catctup
 	 * and negative entries is identical.
 	 */
 	int			refcount;		/* number of active references */
-	int		   *prev_refcount;	/* refcounts for upper subtransactions */
-	int			numpushes;		/* number of used refcounts in the array */
-	int			numalloc;		/* allocated size of array */
 	bool		dead;			/* dead but not yet removed? */
 	bool		negative;		/* negative cache entry? */
 	uint32		hash_value;		/* hash value for this tuple's keys */
@@ -142,9 +139,6 @@ typedef struct catclist
 	 */
 	Dlelem		cache_elem;		/* list member of per-catcache list */
 	int			refcount;		/* number of active references */
-	int		   *prev_refcount;	/* refcounts for upper subtransactions */
-	int			numpushes;		/* number of used refcounts in the array */
-	int			numalloc;		/* allocated size of array */
 	bool		dead;			/* dead but not yet removed? */
 	bool		ordered;		/* members listed in index order? */
 	short		nkeys;			/* number of lookup keys specified */
@@ -169,8 +163,6 @@ extern DLLIMPORT MemoryContext CacheMemoryContext;
 
 extern void CreateCacheMemoryContext(void);
 extern void AtEOXact_CatCache(bool isCommit);
-extern void AtSubStart_CatCache(void);
-extern void AtEOSubXact_CatCache(bool isCommit);
 
 extern CatCache *InitCatCache(int id, const char *relname, const char *indname,
 			 int reloidattr,
diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h
index 3437dc448a3d7bcbbc5614a9514d4aa67e84e3be..d5a12520d0e33cd4f35623f8a2aa61ce0e941558 100644
--- a/src/include/utils/portal.h
+++ b/src/include/utils/portal.h
@@ -39,7 +39,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.49 2004/07/01 00:51:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.50 2004/07/17 03:31:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -48,6 +48,7 @@
 
 #include "executor/execdesc.h"
 #include "nodes/memnodes.h"
+#include "utils/resowner.h"
 #include "utils/tuplestore.h"
 
 
@@ -79,6 +80,20 @@ typedef enum PortalStrategy
 	PORTAL_MULTI_QUERY
 } PortalStrategy;
 
+/*
+ * A portal is always in one of these states.  It is possible to transit
+ * from ACTIVE back to READY if the query is not run to completion;
+ * otherwise we never back up in status.
+ */
+typedef enum PortalStatus
+{
+	PORTAL_NEW,					/* in process of creation */
+	PORTAL_READY,				/* PortalStart complete, can run it */
+	PORTAL_ACTIVE,				/* portal is running (can't delete it) */
+	PORTAL_DONE,				/* portal is finished (don't re-run it) */
+	PORTAL_FAILED				/* portal got error (can't re-run it) */
+} PortalStatus;
+
 /*
  * Note: typedef Portal is declared in tcop/dest.h as
  *		typedef struct PortalData *Portal;
@@ -89,7 +104,8 @@ typedef struct PortalData
 	/* Bookkeeping data */
 	const char *name;			/* portal's name */
 	MemoryContext heap;			/* subsidiary memory for portal */
-	void		(*cleanup) (Portal portal, bool isError);		/* cleanup hook */
+	ResourceOwner resowner;		/* resources owned by portal */
+	void		(*cleanup) (Portal portal);			/* cleanup hook */
 	TransactionId createXact;	/* the xid of the creating xact */
 
 	/* The query or queries the portal will execute */
@@ -113,10 +129,8 @@ typedef struct PortalData
 	int			cursorOptions;	/* DECLARE CURSOR option bits */
 
 	/* Status data */
-	bool		portalReady;	/* PortalStart complete? */
+	PortalStatus status;		/* see above */
 	bool		portalUtilReady;	/* PortalRunUtility complete? */
-	bool		portalActive;	/* portal is running (can't delete it) */
-	bool		portalDone;		/* portal is finished (don't re-run it) */
 
 	/* If not NULL, Executor is active; call ExecutorEnd eventually: */
 	QueryDesc  *queryDesc;		/* info needed for executor invocation */
@@ -167,12 +181,14 @@ extern void EnablePortalManager(void);
 extern void AtCommit_Portals(void);
 extern void AtAbort_Portals(void);
 extern void AtCleanup_Portals(void);
-extern void AtSubCommit_Portals(TransactionId parentXid);
-extern void AtSubAbort_Portals(void);
+extern void AtSubCommit_Portals(TransactionId parentXid,
+								ResourceOwner parentXactOwner);
+extern void AtSubAbort_Portals(TransactionId parentXid,
+							   ResourceOwner parentXactOwner);
 extern void AtSubCleanup_Portals(void);
 extern Portal CreatePortal(const char *name, bool allowDup, bool dupSilent);
 extern Portal CreateNewPortal(void);
-extern void PortalDrop(Portal portal, bool isError);
+extern void PortalDrop(Portal portal, bool isTopCommit);
 extern void DropDependentPortals(MemoryContext queryContext);
 extern Portal GetPortalByName(const char *name);
 extern void PortalDefineQuery(Portal portal,
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index b7f85eda68e07464caae020c0468d76e4134ac82..481cdb2465fd625410641c4996cc64863dd3d7c3 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.75 2004/07/01 00:51:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.76 2004/07/17 03:31:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -110,9 +110,6 @@ typedef struct RelationData
 	BlockNumber rd_targblock;	/* current insertion target block, or
 								 * InvalidBlockNumber */
 	int			rd_refcnt;		/* reference count */
-	int		   *rd_prevrefcnt;	/* reference count stack */
-	int			rd_numalloc;	/* stack allocated size */
-	int			rd_numpushed;	/* stack used size */
 	bool		rd_isnew;		/* rel was created in current xact */
 
 	/*
@@ -190,28 +187,6 @@ typedef Relation *RelationPtr;
 #define RelationHasReferenceCountZero(relation) \
 		((bool)((relation)->rd_refcnt == 0))
 
-/*
- * RelationSetReferenceCount
- *		Sets relation reference count.
- */
-#define RelationSetReferenceCount(relation,count) \
-	((relation)->rd_refcnt = (count))
-
-/*
- * RelationIncrementReferenceCount
- *		Increments relation reference count.
- */
-#define RelationIncrementReferenceCount(relation) \
-	((relation)->rd_refcnt += 1)
-
-/*
- * RelationDecrementReferenceCount
- *		Decrements relation reference count.
- */
-#define RelationDecrementReferenceCount(relation) \
-	(AssertMacro((relation)->rd_refcnt > 0), \
-	 (relation)->rd_refcnt -= 1)
-
 /*
  * RelationGetForm
  *		Returns pg_class tuple for a relation.
@@ -255,4 +230,8 @@ typedef Relation *RelationPtr;
 #define RelationGetNamespace(relation) \
 	((relation)->rd_rel->relnamespace)
 
+/* routines in utils/cache/relcache.c */
+extern void RelationIncrementReferenceCount(Relation rel);
+extern void RelationDecrementReferenceCount(Relation rel);
+
 #endif   /* REL_H */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 47f46190df79c4d2c7f44b3f201af539fb2b5343..227ef591a2c8818fe8c38b9ca9536fb995970e0c 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.41 2004/07/01 00:51:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.42 2004/07/17 03:31:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,8 +66,6 @@ extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode);
 extern void RelationCacheInvalidate(void);
 
 extern void AtEOXact_RelationCache(bool isCommit);
-extern void AtSubStart_RelationCache(void);
-extern void AtEOSubXact_RelationCache(bool isCommit);
 
 /*
  * Routines to help manage rebuilding of relcache init file
diff --git a/src/include/utils/resowner.h b/src/include/utils/resowner.h
new file mode 100644
index 0000000000000000000000000000000000000000..6de270d2b0efeb9e2a3b8de883ae6e61e4fa409e
--- /dev/null
+++ b/src/include/utils/resowner.h
@@ -0,0 +1,121 @@
+/*-------------------------------------------------------------------------
+ *
+ * resowner.h
+ *	  POSTGRES resource owner definitions.
+ *
+ * Query-lifespan resources are tracked by associating them with
+ * ResourceOwner objects.  This provides a simple mechanism for ensuring
+ * that such resources are freed at the right time.
+ * See utils/resowner/README for more info.
+ *
+ *
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/utils/resowner.h,v 1.1 2004/07/17 03:31:47 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RESOWNER_H
+#define RESOWNER_H
+
+#include "storage/buf.h"
+#include "storage/lock.h"
+#include "utils/catcache.h"
+#include "utils/rel.h"
+
+
+/*
+ * ResourceOwner objects are an opaque data structure known only within
+ * resowner.c.
+ */
+typedef struct ResourceOwnerData *ResourceOwner;
+
+
+/*
+ * Globally known ResourceOwners
+ */
+extern DLLIMPORT ResourceOwner CurrentResourceOwner;
+extern DLLIMPORT ResourceOwner CurTransactionResourceOwner;
+extern DLLIMPORT ResourceOwner TopTransactionResourceOwner;
+
+/*
+ * Resource releasing is done in three phases: pre-locks, locks, and
+ * post-locks.  The pre-lock phase must release any resources that are
+ * visible to other backends (such as pinned buffers); this ensures that
+ * when we release a lock that another backend may be waiting on, it will
+ * see us as being fully out of our transaction.  The post-lock phase
+ * should be used for backend-internal cleanup.
+ */
+typedef enum
+{
+	RESOURCE_RELEASE_BEFORE_LOCKS,
+	RESOURCE_RELEASE_LOCKS,
+	RESOURCE_RELEASE_AFTER_LOCKS
+} ResourceReleasePhase;
+
+/*
+ *	Dynamically loaded modules can get control during ResourceOwnerRelease
+ *	by providing a callback of this form.
+ */
+typedef void (*ResourceReleaseCallback) (ResourceReleasePhase phase,
+										 bool isCommit,
+										 bool isTopLevel,
+										 void *arg);
+
+
+/*
+ * Functions in resowner.c
+ */
+
+/* generic routines */
+extern ResourceOwner ResourceOwnerCreate(ResourceOwner parent,
+										 const char *name);
+extern void ResourceOwnerRelease(ResourceOwner owner,
+								 ResourceReleasePhase phase,
+								 bool isCommit,
+								 bool isTopLevel);
+extern void ResourceOwnerDelete(ResourceOwner owner);
+extern void ResourceOwnerNewParent(ResourceOwner owner,
+								   ResourceOwner newparent);
+extern void RegisterResourceReleaseCallback(ResourceReleaseCallback callback,
+											void *arg);
+extern void UnregisterResourceReleaseCallback(ResourceReleaseCallback callback,
+											  void *arg);
+
+/* support for buffer refcount management */
+extern void ResourceOwnerEnlargeBuffers(ResourceOwner owner);
+extern void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer);
+extern void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer);
+
+/* support for lock management */
+extern void ResourceOwnerEnlargeLocks(ResourceOwner owner);
+extern void ResourceOwnerRememberLock(ResourceOwner owner,
+									  LOCKTAG *locktag,
+									  TransactionId xid,
+									  LOCKMODE lockmode);
+extern void ResourceOwnerForgetLock(ResourceOwner owner,
+									LOCKTAG *locktag,
+									TransactionId xid,
+									LOCKMODE lockmode);
+
+/* support for catcache refcount management */
+extern void ResourceOwnerEnlargeCatCacheRefs(ResourceOwner owner);
+extern void ResourceOwnerRememberCatCacheRef(ResourceOwner owner,
+											 HeapTuple tuple);
+extern void ResourceOwnerForgetCatCacheRef(ResourceOwner owner,
+										   HeapTuple tuple);
+extern void ResourceOwnerEnlargeCatCacheListRefs(ResourceOwner owner);
+extern void ResourceOwnerRememberCatCacheListRef(ResourceOwner owner,
+												 CatCList *list);
+extern void ResourceOwnerForgetCatCacheListRef(ResourceOwner owner,
+											   CatCList *list);
+
+/* support for relcache refcount management */
+extern void ResourceOwnerEnlargeRelationRefs(ResourceOwner owner);
+extern void ResourceOwnerRememberRelationRef(ResourceOwner owner,
+											 Relation rel);
+extern void ResourceOwnerForgetRelationRef(ResourceOwner owner,
+										   Relation rel);
+
+#endif   /* RESOWNER_H */
diff --git a/src/test/regress/expected/transactions.out b/src/test/regress/expected/transactions.out
index c2fdc23103981ff498dff72b8d2303bf56c33694..cc3004dbb28d505371654fec3f78a24785ffbc5d 100644
--- a/src/test/regress/expected/transactions.out
+++ b/src/test/regress/expected/transactions.out
@@ -191,6 +191,72 @@ SELECT 1;			-- this should work
         1
 (1 row)
 
+-- check non-transactional behavior of cursors
+BEGIN;
+	DECLARE c CURSOR FOR SELECT unique2 FROM tenk1;
+	BEGIN;
+		FETCH 10 FROM c;
+ unique2 
+---------
+       0
+       1
+       2
+       3
+       4
+       5
+       6
+       7
+       8
+       9
+(10 rows)
+
+	ROLLBACK;
+	BEGIN;
+		FETCH 10 FROM c;
+ unique2 
+---------
+      10
+      11
+      12
+      13
+      14
+      15
+      16
+      17
+      18
+      19
+(10 rows)
+
+	COMMIT;
+	FETCH 10 FROM c;
+ unique2 
+---------
+      20
+      21
+      22
+      23
+      24
+      25
+      26
+      27
+      28
+      29
+(10 rows)
+
+	CLOSE c;
+	DECLARE c CURSOR FOR SELECT unique2/0 FROM tenk1;
+	BEGIN;
+		FETCH 10 FROM c;
+ERROR:  division by zero
+	ROLLBACK;
+	-- c is now dead to the world ...
+	BEGIN;
+		FETCH 10 FROM c;
+ERROR:  portal "c" cannot be run
+	ROLLBACK;
+	FETCH 10 FROM c;
+ERROR:  portal "c" cannot be run
+COMMIT;
 DROP TABLE foo;
 DROP TABLE baz;
 DROP TABLE barbaz;
diff --git a/src/test/regress/sql/transactions.sql b/src/test/regress/sql/transactions.sql
index 5af024fdfe6a30566311f42e2df6e2f10319e637..f2a206979fe076ab9400075e851384be87a929a5 100644
--- a/src/test/regress/sql/transactions.sql
+++ b/src/test/regress/sql/transactions.sql
@@ -127,6 +127,28 @@ BEGIN;
 COMMIT;
 SELECT 1;			-- this should work
 
+-- check non-transactional behavior of cursors
+BEGIN;
+	DECLARE c CURSOR FOR SELECT unique2 FROM tenk1;
+	BEGIN;
+		FETCH 10 FROM c;
+	ROLLBACK;
+	BEGIN;
+		FETCH 10 FROM c;
+	COMMIT;
+	FETCH 10 FROM c;
+	CLOSE c;
+	DECLARE c CURSOR FOR SELECT unique2/0 FROM tenk1;
+	BEGIN;
+		FETCH 10 FROM c;
+	ROLLBACK;
+	-- c is now dead to the world ...
+	BEGIN;
+		FETCH 10 FROM c;
+	ROLLBACK;
+	FETCH 10 FROM c;
+COMMIT;
+
 
 DROP TABLE foo;
 DROP TABLE baz;