diff --git a/contrib/userlock/user_locks.c b/contrib/userlock/user_locks.c index e1ee603f80e3d19e661a4a594762d4384a6e28e3..0996970a9f485240d9b3058a0498bdb13973b5c6 100644 --- a/contrib/userlock/user_locks.c +++ b/contrib/userlock/user_locks.c @@ -75,8 +75,7 @@ user_write_unlock_oid(Oid oid) int user_unlock_all(void) { - return LockReleaseAll(USER_LOCKMETHOD, MyProc, false, - InvalidTransactionId); + return LockReleaseAll(USER_LOCKMETHOD, MyProc, ReleaseAll, 0, NULL); } /* end of file */ diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 30bb9b810afe0837e0e23e15470ff4e0f0ee7dda..dc424a6773d45e2dec779686142690f423eba0df 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.51 2004/01/07 18:56:23 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.52 2004/07/01 00:49:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -41,6 +41,7 @@ static void adjustiptr(IndexScanDesc s, ItemPointer iptr, typedef struct GISTScanListData { IndexScanDesc gsl_scan; + TransactionId gsl_creatingXid; struct GISTScanListData *gsl_next; } GISTScanListData; @@ -223,6 +224,7 @@ gistregscan(IndexScanDesc s) l = (GISTScanList) palloc(sizeof(GISTScanListData)); l->gsl_scan = s; + l->gsl_creatingXid = GetCurrentTransactionId(); l->gsl_next = GISTScans; GISTScans = l; } @@ -271,6 +273,46 @@ AtEOXact_gist(void) GISTScans = NULL; } +/* + * AtEOSubXact_gist() --- clean up gist subsystem at subxact abort or commit. + * + * This is here because it needs to touch this module's static var GISTScans. + */ +void +AtEOSubXact_gist(TransactionId childXid) +{ + GISTScanList l; + GISTScanList prev; + GISTScanList next; + + /* + * Note: these actions should only be necessary during xact abort; but + * they can't hurt during a commit. + */ + + /* + * Forget active scans that were started in this subtransaction. + */ + prev = NULL; + + for (l = GISTScans; l != NULL; l = next) + { + next = l->gsl_next; + if (l->gsl_creatingXid == childXid) + { + if (prev == NULL) + GISTScans = next; + else + prev->gsl_next = next; + + pfree(l); + /* prev does not change */ + } + else + prev = l; + } +} + void gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum) { diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c index fcf2a01cddb4f0a150ba73d3ebe426f1c410f659..d107596c75063d541f53c657b4a6cf7006279ede 100644 --- a/src/backend/access/hash/hashscan.c +++ b/src/backend/access/hash/hashscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashscan.c,v 1.33 2004/01/07 18:56:23 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashscan.c,v 1.34 2004/07/01 00:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,6 +21,7 @@ typedef struct HashScanListData { IndexScanDesc hashsl_scan; + TransactionId hashsl_creatingXid; struct HashScanListData *hashsl_next; } HashScanListData; @@ -50,6 +51,46 @@ AtEOXact_hash(void) HashScans = NULL; } +/* + * AtEOSubXact_hash() --- clean up hash subsystem at subxact abort or commit. + * + * This is here because it needs to touch this module's static var HashScans. + */ +void +AtEOSubXact_hash(TransactionId childXid) +{ + HashScanList l; + HashScanList prev; + HashScanList next; + + /* + * Note: these actions should only be necessary during xact abort; but + * they can't hurt during a commit. + */ + + /* + * Forget active scans that were started in this subtransaction. + */ + prev = NULL; + + for (l = HashScans; l != NULL; l = next) + { + next = l->hashsl_next; + if (l->hashsl_creatingXid == childXid) + { + if (prev == NULL) + HashScans = next; + else + prev->hashsl_next = next; + + pfree(l); + /* prev does not change */ + } + else + prev = l; + } +} + /* * _Hash_regscan() -- register a new scan. */ @@ -60,6 +101,7 @@ _hash_regscan(IndexScanDesc scan) new_el = (HashScanList) palloc(sizeof(HashScanListData)); new_el->hashsl_scan = scan; + new_el->hashsl_creatingXid = GetCurrentTransactionId(); new_el->hashsl_next = HashScans; HashScans = new_el; } diff --git a/src/backend/access/rtree/rtscan.c b/src/backend/access/rtree/rtscan.c index 9dac2a15c06bda71be5a1387702a0ffd52243d1d..d3530966e6d1bad29d57aee82b3ed4f63e3e9f5d 100644 --- a/src/backend/access/rtree/rtscan.c +++ b/src/backend/access/rtree/rtscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.51 2004/01/07 18:56:24 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.52 2004/07/01 00:49:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -42,6 +42,7 @@ static void adjustiptr(IndexScanDesc s, ItemPointer iptr, typedef struct RTScanListData { IndexScanDesc rtsl_scan; + TransactionId rtsl_creatingXid; struct RTScanListData *rtsl_next; } RTScanListData; @@ -240,6 +241,7 @@ rtregscan(IndexScanDesc s) l = (RTScanList) palloc(sizeof(RTScanListData)); l->rtsl_scan = s; + l->rtsl_creatingXid = GetCurrentTransactionId(); l->rtsl_next = RTScans; RTScans = l; } @@ -290,6 +292,46 @@ AtEOXact_rtree(void) RTScans = NULL; } +/* + * AtEOSubXact_rtree() --- clean up rtree subsystem at subxact abort or commit. + * + * This is here because it needs to touch this module's static var RTScans. + */ +void +AtEOSubXact_rtree(TransactionId childXid) +{ + RTScanList l; + RTScanList prev; + RTScanList next; + + /* + * Note: these actions should only be necessary during xact abort; but + * they can't hurt during a commit. + */ + + /* + * Forget active scans that were started in this subtransaction. + */ + prev = NULL; + + for (l = RTScans; l != NULL; l = next) + { + next = l->rtsl_next; + if (l->rtsl_creatingXid == childXid) + { + if (prev == NULL) + RTScans = next; + else + prev->rtsl_next = next; + + pfree(l); + /* prev does not change */ + } + else + prev = l; + } +} + void rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum) { diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile index 762ecf0ab7fcff97af391a3164948f60b64795ee..fe740a045f8ce55bef533ea90bd576a48521cdab 100644 --- a/src/backend/access/transam/Makefile +++ b/src/backend/access/transam/Makefile @@ -4,7 +4,7 @@ # Makefile for access/transam # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/access/transam/Makefile,v 1.18 2003/11/29 19:51:40 pgsql Exp $ +# $PostgreSQL: pgsql/src/backend/access/transam/Makefile,v 1.19 2004/07/01 00:49:42 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/access/transam top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o +OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o all: SUBSYS.o diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 97f887d0a06ce234e256dda5bf204333ec7033b0..54514a24e71ccd727374c8c5f5afe46f0db0e07c 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -13,7 +13,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.20 2004/05/31 03:47:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.21 2004/07/01 00:49:42 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,14 +21,13 @@ #include <fcntl.h> #include <dirent.h> -#include <errno.h> #include <sys/stat.h> #include <unistd.h> #include "access/clog.h" #include "access/slru.h" -#include "storage/lwlock.h" #include "miscadmin.h" +#include "storage/lwlock.h" /* @@ -65,7 +64,7 @@ * is guaranteed flushed through the XLOG commit record before we are called * to log a commit, so the WAL rule "write xlog before data" is satisfied * automatically for commits, and we don't really care for aborts. Therefore, - * we don't need to mark XLOG pages with LSN information; we have enough + * we don't need to mark CLOG pages with LSN information; we have enough * synchronization already. *---------- */ @@ -95,20 +94,22 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) char *byteptr; Assert(status == TRANSACTION_STATUS_COMMITTED || - status == TRANSACTION_STATUS_ABORTED); + status == TRANSACTION_STATUS_ABORTED || + status == TRANSACTION_STATUS_SUB_COMMITTED); LWLockAcquire(ClogCtl->ControlLock, LW_EXCLUSIVE); byteptr = SimpleLruReadPage(ClogCtl, pageno, xid, true); byteptr += byteno; - /* Current state should be 0 or target state */ + /* Current state should be 0, subcommitted or target state */ Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 || + ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED || ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status); *byteptr |= (status << bshift); - /* ...->page_status[slotno] = CLOG_PAGE_DIRTY; already done */ + /* ...->page_status[slotno] = SLRU_PAGE_DIRTY; already done */ LWLockRelease(ClogCtl->ControlLock); } @@ -117,7 +118,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) * Interrogate the state of a transaction in the commit log. * * NB: this is a low-level routine and is NOT the preferred entry point - * for most uses; TransactionLogTest() in transam.c is the intended caller. + * for most uses; TransactionLogFetch() in transam.c is the intended caller. */ XidStatus TransactionIdGetStatus(TransactionId xid) @@ -176,7 +177,7 @@ BootStrapCLOG(void) /* Make sure it's written out */ SimpleLruWritePage(ClogCtl, slotno, NULL); - /* Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); */ + /* Assert(ClogCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ LWLockRelease(ClogCtl->ControlLock); } @@ -211,7 +212,8 @@ StartupCLOG(void) /* * Initialize our idea of the latest page number. */ - SimpleLruSetLatestPage(ClogCtl, TransactionIdToPage(ShmemVariableCache->nextXid)); + SimpleLruSetLatestPage(ClogCtl, + TransactionIdToPage(ShmemVariableCache->nextXid)); } /* @@ -333,51 +335,20 @@ WriteZeroPageXlogRec(int pageno) rdata.data = (char *) (&pageno); rdata.len = sizeof(int); rdata.next = NULL; - (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata); -} - -/* - * CLOG resource manager's routines - */ -void -clog_redo(XLogRecPtr lsn, XLogRecord *record) -{ - uint8 info = record->xl_info & ~XLR_INFO_MASK; - - if (info == CLOG_ZEROPAGE) - { - int pageno; - int slotno; - - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); - - LWLockAcquire(ClogCtl->ControlLock, LW_EXCLUSIVE); - - slotno = ZeroCLOGPage(pageno, false); - SimpleLruWritePage(ClogCtl, slotno, NULL); - /* Assert(ClogCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ - - LWLockRelease(ClogCtl->ControlLock); - } + (void) XLogInsert(RM_SLRU_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata); } +/* Redo a ZEROPAGE action during WAL replay */ void -clog_undo(XLogRecPtr lsn, XLogRecord *record) +clog_zeropage_redo(int pageno) { -} + int slotno; -void -clog_desc(char *buf, uint8 xl_info, char *rec) -{ - uint8 info = xl_info & ~XLR_INFO_MASK; + LWLockAcquire(ClogCtl->ControlLock, LW_EXCLUSIVE); - if (info == CLOG_ZEROPAGE) - { - int pageno; + slotno = ZeroCLOGPage(pageno, false); + SimpleLruWritePage(ClogCtl, slotno, NULL); + /* Assert(ClogCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ - memcpy(&pageno, rec, sizeof(int)); - sprintf(buf + strlen(buf), "zeropage: %d", pageno); - } - else - strcat(buf, "UNKNOWN"); + LWLockRelease(ClogCtl->ControlLock); } diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c index 112363bf2914252f47fb5dcc4c428a07cecbfb66..d6c8c93ca6ebfe3389571e363f410f50d4c2e888 100644 --- a/src/backend/access/transam/rmgr.c +++ b/src/backend/access/transam/rmgr.c @@ -3,16 +3,16 @@ * * Resource managers definition * - * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.12 2003/11/29 19:51:40 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.13 2004/07/01 00:49:42 tgl Exp $ */ #include "postgres.h" -#include "access/clog.h" #include "access/gist.h" #include "access/hash.h" #include "access/heapam.h" #include "access/nbtree.h" #include "access/rtree.h" +#include "access/slru.h" #include "access/xact.h" #include "access/xlog.h" #include "storage/smgr.h" @@ -23,7 +23,7 @@ RmgrData RmgrTable[RM_MAX_ID + 1] = { {"XLOG", xlog_redo, xlog_undo, xlog_desc, NULL, NULL}, {"Transaction", xact_redo, xact_undo, xact_desc, NULL, NULL}, {"Storage", smgr_redo, smgr_undo, smgr_desc, NULL, NULL}, - {"CLOG", clog_redo, clog_undo, clog_desc, NULL, NULL}, + {"SLRU", slru_redo, slru_undo, slru_desc, NULL, NULL}, {"Reserved 4", NULL, NULL, NULL, NULL, NULL}, {"Reserved 5", NULL, NULL, NULL, NULL, NULL}, {"Reserved 6", NULL, NULL, NULL, NULL, NULL}, diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 58798d0f07fcb56d230f886258c94bc07e5a4f4a..0181e2d626048a476aaf0f60f183cc65814c0225 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.16 2004/05/31 03:47:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.17 2004/07/01 00:49:42 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,8 +16,9 @@ #include <sys/stat.h> #include <unistd.h> +#include "access/clog.h" #include "access/slru.h" -#include "access/clog.h" /* only for NUM_CLOG_BUFFERS */ +#include "access/subtrans.h" #include "postmaster/bgwriter.h" #include "storage/fd.h" #include "storage/lwlock.h" @@ -1025,3 +1026,55 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) return found; } + +/* + * SLRU resource manager's routines + */ +void +slru_redo(XLogRecPtr lsn, XLogRecord *record) +{ + uint8 info = record->xl_info & ~XLR_INFO_MASK; + int pageno; + + memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + + switch (info) + { + case CLOG_ZEROPAGE: + clog_zeropage_redo(pageno); + break; + case SUBTRANS_ZEROPAGE: + subtrans_zeropage_redo(pageno); + break; + default: + elog(PANIC, "slru_redo: unknown op code %u", info); + } +} + +void +slru_undo(XLogRecPtr lsn, XLogRecord *record) +{ +} + +void +slru_desc(char *buf, uint8 xl_info, char *rec) +{ + uint8 info = xl_info & ~XLR_INFO_MASK; + + if (info == CLOG_ZEROPAGE) + { + int pageno; + + memcpy(&pageno, rec, sizeof(int)); + sprintf(buf + strlen(buf), "clog zeropage: %d", pageno); + } + else if (info == SUBTRANS_ZEROPAGE) + { + int pageno; + + memcpy(&pageno, rec, sizeof(int)); + sprintf(buf + strlen(buf), "subtrans zeropage: %d", pageno); + } + else + strcat(buf, "UNKNOWN"); +} diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c new file mode 100644 index 0000000000000000000000000000000000000000..1babedbe5900ba127ae2ad56be26d5f5497a5191 --- /dev/null +++ b/src/backend/access/transam/subtrans.c @@ -0,0 +1,388 @@ +/*------------------------------------------------------------------------- + * + * subtrans.c + * PostgreSQL subtrans-log manager + * + * The pg_subtrans manager is a pg_clog-like manager which stores the parent + * transaction Id for each transaction. It is a fundamental part of the + * nested transactions implementation. A main transaction has a parent + * of InvalidTransactionId, and each subtransaction has its immediate parent. + * The tree can easily be walked from child to parent, but not in the + * opposite direction. + * + * This code is mostly derived from clog.c. + * + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.1 2004/07/01 00:49:42 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <fcntl.h> +#include <dirent.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "access/slru.h" +#include "access/subtrans.h" +#include "miscadmin.h" +#include "storage/lwlock.h" + + +/* + * Defines for SubTrans page and segment sizes. A page is the same BLCKSZ + * as is used everywhere else in Postgres. + * + * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, + * SubTrans page numbering also wraps around at + * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at + * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no + * explicit notice of that fact in this module, except when comparing segment + * and page numbers in TruncateSubTrans (see SubTransPagePrecedes). + */ + +/* We need four bytes per xact */ +#define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId)) + +#define TransactionIdToPage(xid) ((xid) / (TransactionId) SUBTRANS_XACTS_PER_PAGE) +#define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE) + + +/*---------- + * Shared-memory data structures for SUBTRANS control + * + * XLOG interactions: this module generates an XLOG record whenever a new + * SUBTRANS page is initialized to zeroes. Other writes of SUBTRANS come from + * recording of transaction commit or abort in xact.c, which generates its + * own XLOG records for these events and will re-perform the status update + * on redo; so we need make no additional XLOG entry here. Also, the XLOG + * is guaranteed flushed through the XLOG commit record before we are called + * to log a commit, so the WAL rule "write xlog before data" is satisfied + * automatically for commits, and we don't really care for aborts. Therefore, + * we don't need to mark SUBTRANS pages with LSN information; we have enough + * synchronization already. + *---------- + */ + + +static SlruCtlData SubTransCtlData; +static SlruCtl SubTransCtl = &SubTransCtlData; + + +static int ZeroSUBTRANSPage(int pageno, bool writeXlog); +static bool SubTransPagePrecedes(int page1, int page2); +static void WriteZeroPageXlogRec(int pageno); + + +/* + * Record the parent of a subtransaction in the subtrans log. + */ +void +SubTransSetParent(TransactionId xid, TransactionId parent) +{ + int pageno = TransactionIdToPage(xid); + int entryno = TransactionIdToEntry(xid); + TransactionId *ptr; + + LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + + ptr = (TransactionId *) SimpleLruReadPage(SubTransCtl, pageno, xid, true); + ptr += entryno; + + /* Current state should be 0 or target state */ + Assert(*ptr == InvalidTransactionId || *ptr == parent); + + *ptr = parent; + + /* ...->page_status[slotno] = SLRU_PAGE_DIRTY; already done */ + + LWLockRelease(SubTransCtl->ControlLock); +} + +/* + * Interrogate the parent of a transaction in the subtrans log. + */ +TransactionId +SubTransGetParent(TransactionId xid) +{ + int pageno = TransactionIdToPage(xid); + int entryno = TransactionIdToEntry(xid); + TransactionId *ptr; + TransactionId parent; + + /* Bootstrap and frozen XIDs have no parent */ + if (!TransactionIdIsNormal(xid)) + return InvalidTransactionId; + + LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + + ptr = (TransactionId *) SimpleLruReadPage(SubTransCtl, pageno, xid, false); + ptr += entryno; + + parent = *ptr; + + LWLockRelease(SubTransCtl->ControlLock); + + return parent; +} + +/* + * SubTransGetTopmostTransaction + * + * Returns the topmost transaction of the given transaction id. + */ +TransactionId +SubTransGetTopmostTransaction(TransactionId xid) +{ + TransactionId parentXid = xid, + previousXid = xid; + + while (TransactionIdIsValid(parentXid)) + { + previousXid = parentXid; + parentXid = SubTransGetParent(parentXid); + } + + Assert(TransactionIdIsValid(previousXid)); + + return previousXid; +} + +/* + * SubTransXidsHaveCommonAncestor + * + * Returns true iff the Xids have a common ancestor + */ +bool +SubTransXidsHaveCommonAncestor(TransactionId xid1, TransactionId xid2) +{ + if (TransactionIdEquals(xid1, xid2)) + return true; + + while (TransactionIdIsValid(xid1) && TransactionIdIsValid(xid2)) + { + if (TransactionIdPrecedes(xid2, xid1)) + xid1 = SubTransGetParent(xid1); + else + xid2 = SubTransGetParent(xid2); + + if (TransactionIdEquals(xid1, xid2)) + return true; + } + + return false; +} + +/* + * Initialization of shared memory for Subtrans + */ + +int +SUBTRANSShmemSize(void) +{ + return SimpleLruShmemSize(); +} + +void +SUBTRANSShmemInit(void) +{ + SimpleLruInit(SubTransCtl, "SUBTRANS Ctl", "pg_subtrans"); + SubTransCtl->PagePrecedes = SubTransPagePrecedes; +} + +/* + * This func must be called ONCE on system install. It creates + * the initial SubTrans segment. (The SubTrans directory is assumed to + * have been created by initdb, and SubTransShmemInit must have been called + * already.) + */ +void +BootStrapSUBTRANS(void) +{ + int slotno; + + LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + + /* Create and zero the first page of the commit log */ + slotno = ZeroSUBTRANSPage(0, false); + + /* Make sure it's written out */ + SimpleLruWritePage(SubTransCtl, slotno, NULL); + /* Assert(SubTransCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ + + LWLockRelease(SubTransCtl->ControlLock); +} + +/* + * Initialize (or reinitialize) a page of SubTrans to zeroes. + * If writeXlog is TRUE, also emit an XLOG record saying we did this. + * + * The page is not actually written, just set up in shared memory. + * The slot number of the new page is returned. + * + * Control lock must be held at entry, and will be held at exit. + */ +static int +ZeroSUBTRANSPage(int pageno, bool writeXlog) +{ + int slotno = SimpleLruZeroPage(SubTransCtl, pageno); + + if (writeXlog) + WriteZeroPageXlogRec(pageno); + + return slotno; +} + +/* + * This must be called ONCE during postmaster or standalone-backend startup, + * after StartupXLOG has initialized ShmemVariableCache->nextXid. + */ +void +StartupSUBTRANS(void) +{ + /* + * Initialize our idea of the latest page number. + */ + SimpleLruSetLatestPage(SubTransCtl, + TransactionIdToPage(ShmemVariableCache->nextXid)); +} + +/* + * This must be called ONCE during postmaster or standalone-backend shutdown + */ +void +ShutdownSUBTRANS(void) +{ + SimpleLruFlush(SubTransCtl, false); +} + +/* + * Perform a checkpoint --- either during shutdown, or on-the-fly + */ +void +CheckPointSUBTRANS(void) +{ + SimpleLruFlush(SubTransCtl, true); +} + + +/* + * Make sure that SubTrans has room for a newly-allocated XID. + * + * NB: this is called while holding XidGenLock. We want it to be very fast + * most of the time; even when it's not so fast, no actual I/O need happen + * unless we're forced to write out a dirty subtrans or xlog page to make room + * in shared memory. + */ +void +ExtendSUBTRANS(TransactionId newestXact) +{ + int pageno; + + /* + * No work except at first XID of a page. But beware: just after + * wraparound, the first XID of page zero is FirstNormalTransactionId. + */ + if (TransactionIdToEntry(newestXact) != 0 && + !TransactionIdEquals(newestXact, FirstNormalTransactionId)) + return; + + pageno = TransactionIdToPage(newestXact); + + LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + + /* Zero the page and make an XLOG entry about it */ + ZeroSUBTRANSPage(pageno, true); + + LWLockRelease(SubTransCtl->ControlLock); +} + + +/* + * Remove all SubTrans segments before the one holding the passed transaction ID + * + * When this is called, we know that the database logically contains no + * reference to transaction IDs older than oldestXact. However, we must + * not truncate the SubTrans until we have performed a checkpoint, to ensure + * that no such references remain on disk either; else a crash just after + * the truncation might leave us with a problem. Since SubTrans segments hold + * a large number of transactions, the opportunity to actually remove a + * segment is fairly rare, and so it seems best not to do the checkpoint + * unless we have confirmed that there is a removable segment. Therefore + * we issue the checkpoint command here, not in higher-level code as might + * seem cleaner. + */ +void +TruncateSUBTRANS(TransactionId oldestXact) +{ + int cutoffPage; + + /* + * The cutoff point is the start of the segment containing oldestXact. + * We pass the *page* containing oldestXact to SimpleLruTruncate. + */ + cutoffPage = TransactionIdToPage(oldestXact); + SimpleLruTruncate(SubTransCtl, cutoffPage); +} + + +/* + * Decide which of two SubTrans page numbers is "older" for truncation purposes. + * + * We need to use comparison of TransactionIds here in order to do the right + * thing with wraparound XID arithmetic. However, if we are asked about + * page number zero, we don't want to hand InvalidTransactionId to + * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, + * offset both xids by FirstNormalTransactionId to avoid that. + */ +static bool +SubTransPagePrecedes(int page1, int page2) +{ + TransactionId xid1; + TransactionId xid2; + + xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE; + xid1 += FirstNormalTransactionId; + xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE; + xid2 += FirstNormalTransactionId; + + return TransactionIdPrecedes(xid1, xid2); +} + + +/* + * Write a ZEROPAGE xlog record + * + * Note: xlog record is marked as outside transaction control, since we + * want it to be redone whether the invoking transaction commits or not. + * (Besides which, this is normally done just before entering a transaction.) + */ +static void +WriteZeroPageXlogRec(int pageno) +{ + XLogRecData rdata; + + rdata.buffer = InvalidBuffer; + rdata.data = (char *) (&pageno); + rdata.len = sizeof(int); + rdata.next = NULL; + (void) XLogInsert(RM_SLRU_ID, SUBTRANS_ZEROPAGE | XLOG_NO_TRAN, &rdata); +} + +/* Redo a ZEROPAGE action during WAL replay */ +void +subtrans_zeropage_redo(int pageno) +{ + int slotno; + + LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + + slotno = ZeroSUBTRANSPage(pageno, false); + SimpleLruWritePage(SubTransCtl, slotno, NULL); + /* Assert(SubTransCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ + + LWLockRelease(SubTransCtl->ControlLock); +} diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index bbd4f08bf063e41b108f4835a452ebf86424d0f0..34d281de58785684f6452370ab65432e803b10cf 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.56 2003/11/29 19:51:40 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.57 2004/07/01 00:49:42 tgl Exp $ * * NOTES * This file contains the high level access-method interface to the @@ -20,6 +20,7 @@ #include "postgres.h" #include "access/clog.h" +#include "access/subtrans.h" #include "access/transam.h" @@ -35,44 +36,40 @@ bool AMI_OVERRIDE = false; -static bool TransactionLogTest(TransactionId transactionId, XidStatus status); +static XidStatus TransactionLogFetch(TransactionId transactionId); static void TransactionLogUpdate(TransactionId transactionId, XidStatus status); /* ---------------- - * Single-item cache for results of TransactionLogTest. + * Single-item cache for results of TransactionLogFetch. * ---------------- */ -static TransactionId cachedTestXid = InvalidTransactionId; -static XidStatus cachedTestXidStatus; +static TransactionId cachedFetchXid = InvalidTransactionId; +static XidStatus cachedFetchXidStatus; /* ---------------------------------------------------------------- * postgres log access method interface * - * TransactionLogTest + * TransactionLogFetch * TransactionLogUpdate * ---------------------------------------------------------------- */ -/* -------------------------------- - * TransactionLogTest - * -------------------------------- +/* + * TransactionLogFetch --- fetch commit status of specified transaction id */ - -static bool /* true/false: does transaction id have - * specified status? */ -TransactionLogTest(TransactionId transactionId, /* transaction id to test */ - XidStatus status) /* transaction status */ +static XidStatus +TransactionLogFetch(TransactionId transactionId) { - XidStatus xidstatus; /* recorded status of xid */ + XidStatus xidstatus; /* * Before going to the commit log manager, check our single item cache * to see if we didn't just check the transaction status a moment ago. */ - if (TransactionIdEquals(transactionId, cachedTestXid)) - return (status == cachedTestXidStatus); + if (TransactionIdEquals(transactionId, cachedFetchXid)) + return cachedFetchXidStatus; /* * Also, check to see if the transaction ID is a permanent one. @@ -80,10 +77,10 @@ TransactionLogTest(TransactionId transactionId, /* transaction id to test */ if (!TransactionIdIsNormal(transactionId)) { if (TransactionIdEquals(transactionId, BootstrapTransactionId)) - return (status == TRANSACTION_STATUS_COMMITTED); + return TRANSACTION_STATUS_COMMITTED; if (TransactionIdEquals(transactionId, FrozenTransactionId)) - return (status == TRANSACTION_STATUS_COMMITTED); - return (status == TRANSACTION_STATUS_ABORTED); + return TRANSACTION_STATUS_COMMITTED; + return TRANSACTION_STATUS_ABORTED; } /* @@ -92,15 +89,17 @@ TransactionLogTest(TransactionId transactionId, /* transaction id to test */ xidstatus = TransactionIdGetStatus(transactionId); /* - * DO NOT cache status for unfinished transactions! + * DO NOT cache status for unfinished or sub-committed transactions! + * We only cache status that is guaranteed not to change. */ - if (xidstatus != TRANSACTION_STATUS_IN_PROGRESS) + if (xidstatus != TRANSACTION_STATUS_IN_PROGRESS && + xidstatus != TRANSACTION_STATUS_SUB_COMMITTED) { - TransactionIdStore(transactionId, &cachedTestXid); - cachedTestXidStatus = xidstatus; + TransactionIdStore(transactionId, &cachedFetchXid); + cachedFetchXidStatus = xidstatus; } - return (status == xidstatus); + return xidstatus; } /* -------------------------------- @@ -115,12 +114,23 @@ TransactionLogUpdate(TransactionId transactionId, /* trans id to update */ * update the commit log */ TransactionIdSetStatus(transactionId, status); +} - /* - * update (invalidate) our single item TransactionLogTest cache. - */ - TransactionIdStore(transactionId, &cachedTestXid); - cachedTestXidStatus = status; +/* + * TransactionLogMultiUpdate + * + * Update multiple transaction identifiers to a given status. + * Don't depend on this being atomic; it's not. + */ +static void +TransactionLogMultiUpdate(int nxids, TransactionId *xids, XidStatus status) +{ + int i; + + Assert(nxids != 0); + + for (i = 0; i < nxids; i++) + TransactionIdSetStatus(xids[i], status); } /* -------------------------------- @@ -171,13 +181,38 @@ AmiTransactionOverride(bool flag) bool /* true if given transaction committed */ TransactionIdDidCommit(TransactionId transactionId) { + XidStatus xidstatus; + if (AMI_OVERRIDE) { Assert(transactionId == BootstrapTransactionId); return true; } - return TransactionLogTest(transactionId, TRANSACTION_STATUS_COMMITTED); + xidstatus = TransactionLogFetch(transactionId); + + /* + * If it's marked committed, it's committed. + */ + if (xidstatus == TRANSACTION_STATUS_COMMITTED) + return true; + + /* + * If it's marked subcommitted, we have to check the parent recursively. + */ + if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED) + { + TransactionId parentXid; + + parentXid = SubTransGetParent(transactionId); + Assert(TransactionIdIsValid(parentXid)); + return TransactionIdDidCommit(parentXid); + } + + /* + * It's not committed. + */ + return false; } /* @@ -190,35 +225,49 @@ TransactionIdDidCommit(TransactionId transactionId) bool /* true if given transaction aborted */ TransactionIdDidAbort(TransactionId transactionId) { + XidStatus xidstatus; + if (AMI_OVERRIDE) { Assert(transactionId == BootstrapTransactionId); return false; } - return TransactionLogTest(transactionId, TRANSACTION_STATUS_ABORTED); -} + xidstatus = TransactionLogFetch(transactionId); -/* - * Now this func in shmem.c and gives quality answer by scanning - * PGPROC structures of all running backend. - vadim 11/26/96 - * - * Old comments: - * true if given transaction has neither committed nor aborted - */ -#ifdef NOT_USED -bool -TransactionIdIsInProgress(TransactionId transactionId) -{ - if (AMI_OVERRIDE) + /* + * If it's marked aborted, it's aborted. + */ + if (xidstatus == TRANSACTION_STATUS_ABORTED) + return true; + + /* + * If it's marked subcommitted, we have to check the parent recursively. + * + * If we detect that the parent has aborted, update pg_clog to show the + * subtransaction as aborted. This is only needed when the parent + * crashed before either committing or aborting. We want to clean up + * pg_clog so future visitors don't need to make this check again. + */ + if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED) { - Assert(transactionId == BootstrapTransactionId); - return false; + TransactionId parentXid; + bool parentAborted; + + parentXid = SubTransGetParent(transactionId); + parentAborted = TransactionIdDidAbort(parentXid); + + if (parentAborted) + TransactionIdAbort(transactionId); + + return parentAborted; } - return TransactionLogTest(transactionId, TRANSACTION_STATUS_IN_PROGRESS); + /* + * It's not aborted. + */ + return false; } -#endif /* NOT_USED */ /* -------------------------------- * TransactionId Commit @@ -252,6 +301,46 @@ TransactionIdAbort(TransactionId transactionId) TransactionLogUpdate(transactionId, TRANSACTION_STATUS_ABORTED); } +/* + * TransactionIdSubCommit + * Marks the subtransaction associated with the identifier as + * sub-committed. + */ +void +TransactionIdSubCommit(TransactionId transactionId) +{ + TransactionLogUpdate(transactionId, TRANSACTION_STATUS_SUB_COMMITTED); +} + +/* + * TransactionIdCommitTree + * Marks all the given transaction ids as committed. + * + * The caller has to be sure that this is used only to mark subcommitted + * subtransactions as committed, and only *after* marking the toplevel + * parent as committed. Otherwise there is a race condition against + * TransactionIdDidCommit. + */ +void +TransactionIdCommitTree(int nxids, TransactionId *xids) +{ + if (nxids > 0) + TransactionLogMultiUpdate(nxids, xids, TRANSACTION_STATUS_COMMITTED); +} + +/* + * TransactionIdAbortTree + * Marks all the given transaction ids as aborted. + * + * We don't need to worry about the non-atomic behavior, since any onlookers + * will consider all the xacts as not-yet-committed anyway. + */ +void +TransactionIdAbortTree(int nxids, TransactionId *xids) +{ + if (nxids > 0) + TransactionLogMultiUpdate(nxids, xids, TRANSACTION_STATUS_ABORTED); +} /* * TransactionIdPrecedes --- is id1 logically < id2? diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 617c7d19c4350538ce138b94a70fcd0577b9a5b1..9d3b0b323aab9d4e3dad7ccab0fb64fa568a9c0a 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -6,7 +6,7 @@ * Copyright (c) 2000-2003, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.55 2004/01/26 19:15:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.56 2004/07/01 00:49:42 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -14,6 +14,7 @@ #include "postgres.h" #include "access/clog.h" +#include "access/subtrans.h" #include "access/transam.h" #include "storage/ipc.h" #include "storage/proc.h" @@ -30,7 +31,7 @@ VariableCache ShmemVariableCache = NULL; * Allocate the next XID for my new transaction. */ TransactionId -GetNewTransactionId(void) +GetNewTransactionId(bool isSubXact) { TransactionId xid; @@ -52,8 +53,11 @@ GetNewTransactionId(void) * commit a later XID before we zero the page. Fortunately, a page of * the commit log holds 32K or more transactions, so we don't have to * do this very often. + * + * Extend pg_subtrans too. */ ExtendCLOG(xid); + ExtendSUBTRANS(xid); /* * Now advance the nextXid counter. This must not happen until after @@ -82,8 +86,11 @@ GetNewTransactionId(void) * its own spinlock used only for fetching/storing that PGPROC's xid. * (SInvalLock would then mean primarily that PGPROCs couldn't be added/ * removed while holding the lock.) + * + * We don't want a subtransaction to update the stored Xid; we'll check + * if a transaction Xid is a running subxact by checking pg_subtrans. */ - if (MyProc != NULL) + if (MyProc != NULL && !isSubXact) MyProc->xid = xid; LWLockRelease(XidGenLock); diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 2ae0fc5b21d0c450064e50199a767fe901571801..fcf5b37445343cbd002e728a392bb5592e5ce67f 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.168 2004/06/03 02:08:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.169 2004/07/01 00:49:42 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -148,6 +148,7 @@ #include "access/hash.h" #include "access/nbtree.h" #include "access/rtree.h" +#include "access/subtrans.h" #include "access/xact.h" #include "catalog/heap.h" #include "catalog/index.h" @@ -190,20 +191,53 @@ static void CommitTransaction(void); static void RecordTransactionAbort(void); static void StartTransaction(void); +static void RecordSubTransactionCommit(void); +static void StartSubTransaction(void); +static void CommitSubTransaction(void); +static void AbortSubTransaction(void); +static void CleanupSubTransaction(void); +static void PushTransaction(void); +static void PopTransaction(void); + +static void AtSubAbort_Locks(void); +static void AtSubAbort_Memory(void); +static void AtSubCleanup_Memory(void); +static void AtSubCommit_Memory(void); +static void AtSubStart_Memory(void); + +static void ShowTransactionState(const char *str); +static void ShowTransactionStateRec(TransactionState state); +static const char *BlockStateAsString(TBlockState blockState); +static const char *TransStateAsString(TransState state); + /* - * global variables holding the current transaction state. + * CurrentTransactionState always points to the current transaction state + * block. It will point to TopTransactionStateData when not in a + * transaction at all, or when in a top-level transaction. */ -static TransactionStateData CurrentTransactionStateData = { +static TransactionStateData TopTransactionStateData = { 0, /* transaction id */ FirstCommandId, /* command id */ - 0, /* scan command id */ - 0x0, /* start time */ TRANS_DEFAULT, /* transaction state */ - TBLOCK_DEFAULT /* transaction block state from the client + TBLOCK_DEFAULT, /* transaction block state from the client * perspective */ + 0, /* nesting level */ + NULL, /* cur transaction context */ + NIL, /* subcommitted child Xids */ + 0, /* entry-time current userid */ + NULL /* link to parent state block */ }; -static TransactionState CurrentTransactionState = &CurrentTransactionStateData; +static TransactionState CurrentTransactionState = &TopTransactionStateData; + +/* + * These vars hold the value of now(), ie, the transaction start time. + * This does not change as we enter and exit subtransactions, so we don't + * keep it inside the TransactionState stack. + */ +static AbsoluteTime xactStartTime; /* integer part */ +static int xactStartTimeUsec; /* microsecond part */ + /* * User-tweakable parameters @@ -282,13 +316,27 @@ IsAbortedTransactionBlockState(void) { TransactionState s = CurrentTransactionState; - if (s->blockState == TBLOCK_ABORT) + if (s->blockState == TBLOCK_ABORT || + s->blockState == TBLOCK_SUBABORT) return true; return false; } +/* + * GetTopTransactionId + * + * Get the ID of the main transaction, even if we are currently inside + * a subtransaction. + */ +TransactionId +GetTopTransactionId(void) +{ + return TopTransactionStateData.transactionIdData; +} + + /* * GetCurrentTransactionId */ @@ -319,9 +367,7 @@ GetCurrentCommandId(void) AbsoluteTime GetCurrentTransactionStartTime(void) { - TransactionState s = CurrentTransactionState; - - return s->startTime; + return xactStartTime; } @@ -331,11 +377,23 @@ GetCurrentTransactionStartTime(void) AbsoluteTime GetCurrentTransactionStartTimeUsec(int *msec) { - TransactionState s = CurrentTransactionState; + *msec = xactStartTimeUsec; + return xactStartTime; +} + - *msec = s->startTimeUsec; +/* + * GetCurrentTransactionNestLevel + * + * Note: this will return zero when not inside any transaction, one when + * inside a top-level transaction, etc. + */ +int +GetCurrentTransactionNestLevel(void) +{ + TransactionState s = CurrentTransactionState; - return s->startTime; + return s->nestingLevel; } @@ -358,19 +416,27 @@ TransactionIdIsCurrentTransactionId(TransactionId xid) return false; } - return TransactionIdEquals(xid, s->transactionIdData); -} + /* + * We will return true for the Xid of the current subtransaction, + * any of its subcommitted children, any of its parents, or any of + * their previously subcommitted children. + */ + while (s != NULL) + { + ListCell *cell; + if (TransactionIdEquals(xid, s->transactionIdData)) + return true; + foreach(cell, s->childXids) + { + if (TransactionIdEquals(xid, lfirst_int(cell))) + return true; + } -/* - * CommandIdIsCurrentCommandId - */ -bool -CommandIdIsCurrentCommandId(CommandId cid) -{ - TransactionState s = CurrentTransactionState; + s = s->parent; + } - return (cid == s->commandId); + return false; } @@ -437,13 +503,15 @@ AtStart_Locks(void) static void AtStart_Memory(void) { + TransactionState s = CurrentTransactionState; + /* * We shouldn't have a transaction context already. */ Assert(TopTransactionContext == NULL); /* - * Create a toplevel context for the transaction, and make it active. + * Create a toplevel context for the transaction. */ TopTransactionContext = AllocSetContextCreate(TopMemoryContext, @@ -452,9 +520,47 @@ AtStart_Memory(void) ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); - MemoryContextSwitchTo(TopTransactionContext); + /* + * In a top-level transaction, CurTransactionContext is the same as + * TopTransactionContext. + */ + CurTransactionContext = TopTransactionContext; + s->curTransactionContext = CurTransactionContext; + + /* Make the CurTransactionContext active. */ + MemoryContextSwitchTo(CurTransactionContext); } +/* ---------------------------------------------------------------- + * StartSubTransaction stuff + * ---------------------------------------------------------------- + */ + +/* + * AtSubStart_Memory + */ +static void +AtSubStart_Memory(void) +{ + TransactionState s = CurrentTransactionState; + + Assert(CurTransactionContext != NULL); + + /* + * Create a CurTransactionContext, which will be used to hold data that + * survives subtransaction commit but disappears on subtransaction abort. + * We make it a child of the immediate parent's CurTransactionContext. + */ + CurTransactionContext = AllocSetContextCreate(CurTransactionContext, + "CurTransactionContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + s->curTransactionContext = CurTransactionContext; + + /* Make the CurTransactionContext active. */ + MemoryContextSwitchTo(CurTransactionContext); +} /* ---------------------------------------------------------------- * CommitTransaction stuff @@ -467,13 +573,25 @@ AtStart_Memory(void) void RecordTransactionCommit(void) { + int nrels; + RelFileNode *rptr; + int nchildren; + TransactionId *children; + + /* Get data needed for commit record */ + nrels = smgrGetPendingDeletes(true, &rptr); + nchildren = xactGetCommittedChildren(&children, false); + /* - * If we made neither any XLOG entries nor any temp-rel updates, we - * can omit recording the transaction commit at all. + * If we made neither any XLOG entries nor any temp-rel updates, + * and have no files to be deleted, we can omit recording the transaction + * commit at all. (This test includes the effects of subtransactions, + * so the presence of committed subxacts need not alone force a write.) */ - if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate) + if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate || nrels > 0) { TransactionId xid = GetCurrentTransactionId(); + bool madeTCentries; XLogRecPtr recptr; /* Tell bufmgr and smgr to prepare for commit */ @@ -482,40 +600,46 @@ RecordTransactionCommit(void) START_CRIT_SECTION(); /* - * We only need to log the commit in xlog if the transaction made - * any transaction-controlled XLOG entries. (Otherwise, its XID - * appears nowhere in permanent storage, so no one else will ever - * care if it committed.) + * We only need to log the commit in XLOG if the transaction made + * any transaction-controlled XLOG entries or will delete files. + * (If it made no transaction-controlled XLOG entries, its XID + * appears nowhere in permanent storage, so no one else will ever care + * if it committed.) */ - if (MyLastRecPtr.xrecoff != 0) + madeTCentries = (MyLastRecPtr.xrecoff != 0); + if (madeTCentries || nrels > 0) { - /* Need to emit a commit record */ - XLogRecData rdata[2]; + XLogRecData rdata[3]; + int lastrdata = 0; xl_xact_commit xlrec; - int nrels; - RelFileNode *rptr; - - nrels = smgrGetPendingDeletes(true, &rptr); xlrec.xtime = time(NULL); + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) (&xlrec); rdata[0].len = MinSizeOfXactCommit; + /* dump rels to delete */ if (nrels > 0) { rdata[0].next = &(rdata[1]); rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) rptr; rdata[1].len = nrels * sizeof(RelFileNode); - rdata[1].next = NULL; + lastrdata = 1; } - else - rdata[0].next = NULL; + /* dump committed child Xids */ + if (nchildren > 0) + { + rdata[lastrdata].next = &(rdata[2]); + rdata[2].buffer = InvalidBuffer; + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + lastrdata = 2; + } + rdata[lastrdata].next = NULL; recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata); - - if (rptr) - pfree(rptr); } else { @@ -529,6 +653,9 @@ RecordTransactionCommit(void) * example, if we reported a nextval() result to the client, this * ensures that any XLOG record generated by nextval will hit the * disk before we report the transaction committed. + * + * Note: if we generated a commit record above, MyXactMadeXLogEntry + * will certainly be set now. */ if (MyXactMadeXLogEntry) { @@ -560,8 +687,12 @@ RecordTransactionCommit(void) * is okay because no one else will ever care whether we * committed. */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) + if (madeTCentries || MyXactMadeTempRelUpdate) + { TransactionIdCommit(xid); + /* to avoid race conditions, the parent must commit first */ + TransactionIdCommitTree(nchildren, children); + } END_CRIT_SECTION(); } @@ -573,6 +704,12 @@ RecordTransactionCommit(void) /* Show myself as out of the transaction in PGPROC array */ MyProc->logRec.xrecoff = 0; + + /* And clean up local data */ + if (rptr) + pfree(rptr); + if (children) + pfree(children); } @@ -590,7 +727,7 @@ AtCommit_Cache(void) /* * Make catalog changes visible to all backends. */ - AtEOXactInvalidationMessages(true); + AtEOXact_Inval(true); } /* @@ -602,7 +739,7 @@ AtCommit_LocalCache(void) /* * Make catalog changes visible to me for the next command. */ - CommandEndInvalidationMessages(true); + CommandEndInvalidationMessages(); } /* @@ -616,7 +753,7 @@ AtCommit_Locks(void) * * Then you're up a creek! -mer 5/24/92 */ - ProcReleaseLocks(true); + ProcReleaseLocks(ReleaseAllExceptSession, 0, NULL); } /* @@ -638,6 +775,88 @@ AtCommit_Memory(void) Assert(TopTransactionContext != NULL); MemoryContextDelete(TopTransactionContext); TopTransactionContext = NULL; + CurTransactionContext = NULL; + CurrentTransactionState->curTransactionContext = NULL; +} + +/* ---------------------------------------------------------------- + * CommitSubTransaction stuff + * ---------------------------------------------------------------- + */ + +/* + * AtSubCommit_Memory + * + * We do not throw away the child's CurTransactionContext, since the data + * it contains will be needed at upper commit. + */ +static void +AtSubCommit_Memory(void) +{ + TransactionState s = CurrentTransactionState; + + Assert(s->parent != NULL); + + /* Return to parent transaction level's memory context. */ + CurTransactionContext = s->parent->curTransactionContext; + MemoryContextSwitchTo(CurTransactionContext); +} + +/* + * AtSubCommit_childXids + * + * Pass my own XID and my child XIDs up to my parent as committed children. + */ +static void +AtSubCommit_childXids(void) +{ + TransactionState s = CurrentTransactionState; + MemoryContext old_cxt; + + Assert(s->parent != NULL); + + old_cxt = MemoryContextSwitchTo(s->parent->curTransactionContext); + + s->parent->childXids = list_concat(s->parent->childXids, s->childXids); + s->childXids = NIL; /* ensure list not doubly referenced */ + + s->parent->childXids = lappend_int(s->parent->childXids, + s->transactionIdData); + + MemoryContextSwitchTo(old_cxt); +} + +/* + * RecordSubTransactionCommit + */ +static void +RecordSubTransactionCommit(void) +{ + /* + * We do not log the subcommit in XLOG; it doesn't matter until + * the top-level transaction commits. + * + * We must mark the subtransaction subcommitted in clog if its XID + * appears either in permanent rels or in local temporary rels. We + * test this by seeing if we made transaction-controlled entries + * *OR* local-rel tuple updates. (The test here actually covers the + * entire transaction tree so far, so it may mark subtransactions that + * don't really need it, but it's probably not worth being tenser. + * Note that if a prior subtransaction dirtied these variables, then + * RecordTransactionCommit will have to do the full pushup anyway...) + */ + if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) + { + TransactionId xid = GetCurrentTransactionId(); + + /* XXX does this really need to be a critical section? */ + START_CRIT_SECTION(); + + /* Record subtransaction subcommit */ + TransactionIdSubCommit(xid); + + END_CRIT_SECTION(); + } } /* ---------------------------------------------------------------- @@ -651,14 +870,24 @@ AtCommit_Memory(void) static void RecordTransactionAbort(void) { + int nrels; + RelFileNode *rptr; + int nchildren; + TransactionId *children; + + /* Get data needed for abort record */ + nrels = smgrGetPendingDeletes(false, &rptr); + nchildren = xactGetCommittedChildren(&children, false); + /* * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, we can omit recording the transaction abort at - * all. No one will ever care that it aborted. + * temp-rel updates, and are not going to delete any files, we can omit + * recording the transaction abort at all. No one will ever care that + * it aborted. (These tests cover our whole transaction tree.) */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) + if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) { - TransactionId xid = GetCurrentTransactionId(); + TransactionId xid = GetCurrentTransactionId(); /* * Catch the scenario where we aborted partway through @@ -671,50 +900,64 @@ RecordTransactionAbort(void) /* * We only need to log the abort in XLOG if the transaction made - * any transaction-controlled XLOG entries. (Otherwise, its XID - * appears nowhere in permanent storage, so no one else will ever - * care if it committed.) We do not flush XLOG to disk unless - * deleting files, since the default assumption after a crash - * would be that we aborted, anyway. + * any transaction-controlled XLOG entries or will delete files. + * (If it made no transaction-controlled XLOG entries, its XID + * appears nowhere in permanent storage, so no one else will ever care + * if it committed.) + * + * We do not flush XLOG to disk unless deleting files, since the + * default assumption after a crash would be that we aborted, anyway. */ - if (MyLastRecPtr.xrecoff != 0) + if (MyLastRecPtr.xrecoff != 0 || nrels > 0) { - XLogRecData rdata[2]; + XLogRecData rdata[3]; + int lastrdata = 0; xl_xact_abort xlrec; - int nrels; - RelFileNode *rptr; XLogRecPtr recptr; - nrels = smgrGetPendingDeletes(false, &rptr); - xlrec.xtime = time(NULL); + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) (&xlrec); rdata[0].len = MinSizeOfXactAbort; + /* dump rels to delete */ if (nrels > 0) { rdata[0].next = &(rdata[1]); rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) rptr; rdata[1].len = nrels * sizeof(RelFileNode); - rdata[1].next = NULL; + lastrdata = 1; } - else - rdata[0].next = NULL; + /* dump committed child Xids */ + if (nchildren > 0) + { + rdata[lastrdata].next = &(rdata[2]); + rdata[2].buffer = InvalidBuffer; + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + lastrdata = 2; + } + rdata[lastrdata].next = NULL; recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); + /* Must flush if we are deleting files... */ if (nrels > 0) XLogFlush(recptr); - - if (rptr) - pfree(rptr); } /* * Mark the transaction aborted in clog. This is not absolutely * necessary but we may as well do it while we are here. + * + * The ordering here isn't critical but it seems best to mark the + * parent last. That reduces the chance that concurrent + * TransactionIdDidAbort calls will decide they need to do redundant + * work. */ + TransactionIdAbortTree(nchildren, children); TransactionIdAbort(xid); END_CRIT_SECTION(); @@ -727,6 +970,12 @@ RecordTransactionAbort(void) /* Show myself as out of the transaction in PGPROC array */ MyProc->logRec.xrecoff = 0; + + /* And clean up local data */ + if (rptr) + pfree(rptr); + if (children) + pfree(children); } /* @@ -736,7 +985,7 @@ static void AtAbort_Cache(void) { AtEOXact_RelationCache(false); - AtEOXactInvalidationMessages(false); + AtEOXact_Inval(false); } /* @@ -750,7 +999,7 @@ AtAbort_Locks(void) * * Then you're up a creek without a paddle! -mer */ - ProcReleaseLocks(false); + ProcReleaseLocks(ReleaseAll, 0, NULL); } @@ -779,6 +1028,127 @@ AtAbort_Memory(void) MemoryContextSwitchTo(TopMemoryContext); } +/* + * AtSubAbort_Locks + */ +static void +AtSubAbort_Locks(void) +{ + int nxids; + TransactionId *xids; + + nxids = xactGetCommittedChildren(&xids, true); + + ProcReleaseLocks(ReleaseGivenXids, nxids, xids); + + pfree(xids); +} + + +/* + * AtSubAbort_Memory + */ +static void +AtSubAbort_Memory(void) +{ + Assert(TopTransactionContext != NULL); + + MemoryContextSwitchTo(TopTransactionContext); +} + +/* + * RecordSubTransactionAbort + */ +static void +RecordSubTransactionAbort(void) +{ + int nrels; + RelFileNode *rptr; + int nchildren; + TransactionId *children; + + /* Get data needed for abort record */ + nrels = smgrGetPendingDeletes(false, &rptr); + nchildren = xactGetCommittedChildren(&children, false); + + /* + * If we made neither any transaction-controlled XLOG entries nor any + * temp-rel updates, and are not going to delete any files, we can omit + * recording the transaction abort at all. No one will ever care that + * it aborted. (These tests cover our whole transaction tree, and + * therefore may mark subxacts that don't really need it, but it's + * probably not worth being tenser.) + * + * In this case we needn't worry about marking subcommitted children as + * aborted, because they didn't mark themselves as subcommitted in the + * first place; see the optimization in RecordSubTransactionCommit. + */ + if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) + { + TransactionId xid = GetCurrentTransactionId(); + + START_CRIT_SECTION(); + + /* + * We only need to log the abort in XLOG if the transaction made + * any transaction-controlled XLOG entries or will delete files. + */ + if (MyLastRecPtr.xrecoff != 0 || nrels > 0) + { + XLogRecData rdata[3]; + int lastrdata = 0; + xl_xact_abort xlrec; + XLogRecPtr recptr; + + xlrec.xtime = time(NULL); + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *) (&xlrec); + rdata[0].len = MinSizeOfXactAbort; + /* dump rels to delete */ + if (nrels > 0) + { + rdata[0].next = &(rdata[1]); + rdata[1].buffer = InvalidBuffer; + rdata[1].data = (char *) rptr; + rdata[1].len = nrels * sizeof(RelFileNode); + lastrdata = 1; + } + /* dump committed child Xids */ + if (nchildren > 0) + { + rdata[lastrdata].next = &(rdata[2]); + rdata[2].buffer = InvalidBuffer; + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + lastrdata = 2; + } + rdata[lastrdata].next = NULL; + + recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); + + /* Must flush if we are deleting files... */ + if (nrels > 0) + XLogFlush(recptr); + } + + /* + * Mark the transaction aborted in clog. This is not absolutely + * necessary but we may as well do it while we are here. + */ + TransactionIdAbortTree(nchildren, children); + TransactionIdAbort(xid); + + END_CRIT_SECTION(); + } + + /* And clean up local data */ + if (rptr) + pfree(rptr); + if (children) + pfree(children); +} /* ---------------------------------------------------------------- * CleanupTransaction stuff @@ -798,15 +1168,46 @@ AtCleanup_Memory(void) */ MemoryContextSwitchTo(TopMemoryContext); + Assert(CurrentTransactionState->parent == NULL); + /* * Release all transaction-local memory. */ if (TopTransactionContext != NULL) MemoryContextDelete(TopTransactionContext); TopTransactionContext = NULL; + CurTransactionContext = NULL; + CurrentTransactionState->curTransactionContext = NULL; } +/* ---------------------------------------------------------------- + * CleanupSubTransaction stuff + * ---------------------------------------------------------------- + */ + +/* + * AtSubCleanup_Memory + */ +static void +AtSubCleanup_Memory(void) +{ + TransactionState s = CurrentTransactionState; + + Assert(s->parent != NULL); + + /* Make sure we're not in an about-to-be-deleted context */ + MemoryContextSwitchTo(s->parent->curTransactionContext); + CurTransactionContext = s->parent->curTransactionContext; + + /* + * Delete the subxact local memory contexts. Its CurTransactionContext + * can go too (note this also kills CurTransactionContexts from any + * children of the subxact). + */ + MemoryContextDelete(s->curTransactionContext); +} + /* ---------------------------------------------------------------- * interface routines * ---------------------------------------------------------------- @@ -842,20 +1243,34 @@ StartTransaction(void) /* * generate a new transaction id */ - s->transactionIdData = GetNewTransactionId(); + s->transactionIdData = GetNewTransactionId(false); XactLockTableInsert(s->transactionIdData); + /* + * set now() + */ + xactStartTime = GetCurrentAbsoluteTimeUsec(&(xactStartTimeUsec)); + /* * initialize current transaction state fields */ s->commandId = FirstCommandId; - s->startTime = GetCurrentAbsoluteTimeUsec(&(s->startTimeUsec)); + s->nestingLevel = 1; + s->childXids = NIL; + + /* + * You might expect to see "s->currentUser = GetUserId();" here, but + * you won't because it doesn't work during startup; the userid isn't + * set yet during a backend's first transaction start. We only use + * the currentUser field in sub-transaction state structs. + */ /* * initialize the various transaction subsystems */ AtStart_Memory(); + AtStart_Inval(); AtStart_Cache(); AtStart_Locks(); @@ -870,6 +1285,7 @@ StartTransaction(void) */ s->state = TRANS_INPROGRESS; + ShowTransactionState("StartTransaction"); } /* @@ -880,11 +1296,14 @@ CommitTransaction(void) { TransactionState s = CurrentTransactionState; + ShowTransactionState("CommitTransaction"); + /* * check the current transaction state */ if (s->state != TRANS_INPROGRESS) elog(WARNING, "CommitTransaction and not in in-progress state"); + Assert(s->parent == NULL); /* * Tell the trigger manager that this transaction is about to be @@ -970,19 +1389,22 @@ CommitTransaction(void) AtCommit_Locks(); CallEOXactCallbacks(true); - AtEOXact_GUC(true); + AtEOXact_GUC(true, false); AtEOXact_SPI(true); AtEOXact_gist(); AtEOXact_hash(); AtEOXact_nbtree(); AtEOXact_rtree(); - AtEOXact_on_commit_actions(true); + AtEOXact_on_commit_actions(true, s->transactionIdData); AtEOXact_Namespace(true); AtEOXact_CatCache(true); AtEOXact_Files(); pgstat_count_xact_commit(); AtCommit_Memory(); + s->nestingLevel = 0; + s->childXids = NIL; + /* * done with commit processing, set current transaction state back to * default @@ -1026,6 +1448,7 @@ AbortTransaction(void) */ if (s->state != TRANS_INPROGRESS) elog(WARNING, "AbortTransaction and not in in-progress state"); + Assert(s->parent == NULL); /* * set the current transaction state information appropriately during @@ -1037,7 +1460,14 @@ AbortTransaction(void) AtAbort_Memory(); /* - * Reset user id which might have been changed transiently + * Reset user id which might have been changed transiently. We cannot + * use s->currentUser, but must get the session userid from miscinit.c. + * + * (Note: it is not necessary to restore session authorization here + * because that can only be changed via GUC, and GUC will take care of + * rolling it back if need be. However, an error within a SECURITY + * DEFINER function could send control here with the wrong current + * userid.) */ SetUserId(GetSessionUserId()); @@ -1080,13 +1510,13 @@ AbortTransaction(void) AtAbort_Locks(); CallEOXactCallbacks(false); - AtEOXact_GUC(false); + AtEOXact_GUC(false, false); AtEOXact_SPI(false); AtEOXact_gist(); AtEOXact_hash(); AtEOXact_nbtree(); AtEOXact_rtree(); - AtEOXact_on_commit_actions(false); + AtEOXact_on_commit_actions(false, s->transactionIdData); AtEOXact_Namespace(false); AtEOXact_CatCache(false); AtEOXact_Files(); @@ -1119,6 +1549,9 @@ CleanupTransaction(void) AtCleanup_Portals(); /* now safe to release portal memory */ AtCleanup_Memory(); /* and transaction memory */ + s->nestingLevel = 0; + s->childXids = NIL; + /* * done with abort processing, set current transaction state back to * default @@ -1145,25 +1578,6 @@ StartTransactionCommand(void) s->blockState = TBLOCK_STARTED; break; - /* - * We should never experience this -- it means the STARTED state - * was not changed in the previous CommitTransactionCommand. - */ - case TBLOCK_STARTED: - elog(WARNING, "StartTransactionCommand: unexpected TBLOCK_STARTED"); - break; - - /* - * We should never experience this -- if we do it means the - * BEGIN state was not changed in the previous - * CommitTransactionCommand(). If we get it, we print a - * warning and change to the in-progress state. - */ - case TBLOCK_BEGIN: - elog(WARNING, "StartTransactionCommand: unexpected TBLOCK_BEGIN"); - s->blockState = TBLOCK_INPROGRESS; - break; - /* * This is the case when are somewhere in a transaction block * and about to start a new command. For now we do nothing @@ -1171,20 +1585,7 @@ StartTransactionCommand(void) * initialization. */ case TBLOCK_INPROGRESS: - break; - - /* - * As with BEGIN, we should never experience this if we do it - * means the END state was not changed in the previous - * CommitTransactionCommand(). If we get it, we print a - * warning, commit the transaction, start a new transaction - * and change to the default state. - */ - case TBLOCK_END: - elog(WARNING, "StartTransactionCommand: unexpected TBLOCK_END"); - CommitTransaction(); - StartTransaction(); - s->blockState = TBLOCK_DEFAULT; + case TBLOCK_SUBINPROGRESS: break; /* @@ -1194,26 +1595,30 @@ StartTransactionCommand(void) * TRANSACTION" which will set things straight. */ case TBLOCK_ABORT: + case TBLOCK_SUBABORT: break; - /* - * This means we somehow aborted and the last call to - * CommitTransactionCommand() didn't clear the state so we - * remain in the ENDABORT state and maybe next time we get to - * CommitTransactionCommand() the state will get reset to - * default. - */ + /* These cases are invalid. */ + case TBLOCK_STARTED: + case TBLOCK_BEGIN: + case TBLOCK_SUBBEGIN: + case TBLOCK_SUBBEGINABORT: + case TBLOCK_END: + case TBLOCK_SUBEND: + case TBLOCK_SUBENDABORT_OK: + case TBLOCK_SUBENDABORT_ERROR: case TBLOCK_ENDABORT: - elog(WARNING, "StartTransactionCommand: unexpected TBLOCK_ENDABORT"); + elog(FATAL, "StartTransactionCommand: unexpected state %s", + BlockStateAsString(s->blockState)); break; } /* - * We must switch to TopTransactionContext before returning. This is + * We must switch to CurTransactionContext before returning. This is * already done if we called StartTransaction, otherwise not. */ - Assert(TopTransactionContext != NULL); - MemoryContextSwitchTo(TopTransactionContext); + Assert(CurTransactionContext != NULL); + MemoryContextSwitchTo(CurTransactionContext); } /* @@ -1232,7 +1637,7 @@ CommitTransactionCommand(void) * appropiately. */ case TBLOCK_DEFAULT: - elog(WARNING, "CommitTransactionCommand: unexpected TBLOCK_DEFAULT"); + elog(FATAL, "CommitTransactionCommand: unexpected TBLOCK_DEFAULT"); break; /* @@ -1291,18 +1696,83 @@ CommitTransactionCommand(void) CleanupTransaction(); s->blockState = TBLOCK_DEFAULT; break; - } -} -/* - * AbortCurrentTransaction - */ -void -AbortCurrentTransaction(void) -{ - TransactionState s = CurrentTransactionState; + /* + * We were just issued a BEGIN inside a transaction block. + * Start a subtransaction. + */ + case TBLOCK_SUBBEGIN: + StartSubTransaction(); + s->blockState = TBLOCK_SUBINPROGRESS; + break; - switch (s->blockState) + /* + * We were issued a BEGIN inside an aborted transaction block. + * Start a subtransaction, and put it in aborted state. + */ + case TBLOCK_SUBBEGINABORT: + StartSubTransaction(); + AbortSubTransaction(); + s->blockState = TBLOCK_SUBABORT; + break; + + /* + * Inside a subtransaction, increment the command counter. + */ + case TBLOCK_SUBINPROGRESS: + CommandCounterIncrement(); + break; + + /* + * We where issued a COMMIT command, so we end the current + * subtransaction and return to the parent transaction. + */ + case TBLOCK_SUBEND: + CommitSubTransaction(); + PopTransaction(); + s = CurrentTransactionState; /* changed by pop */ + break; + + /* + * If we are in an aborted subtransaction, do nothing. + */ + case TBLOCK_SUBABORT: + break; + + /* + * We are ending a subtransaction that aborted nicely, + * so the parent can be allowed to live. + */ + case TBLOCK_SUBENDABORT_OK: + CleanupSubTransaction(); + PopTransaction(); + s = CurrentTransactionState; /* changed by pop */ + break; + + /* + * We are ending a subtransaction that aborted in a unclean + * way (e.g. the user issued COMMIT in an aborted subtrasaction.) + * Abort the subtransaction, and abort the parent too. + */ + case TBLOCK_SUBENDABORT_ERROR: + CleanupSubTransaction(); + PopTransaction(); + s = CurrentTransactionState; /* changed by pop */ + Assert(s->blockState != TBLOCK_SUBENDABORT_ERROR); + AbortCurrentTransaction(); + break; + } +} + +/* + * AbortCurrentTransaction + */ +void +AbortCurrentTransaction(void) +{ + TransactionState s = CurrentTransactionState; + + switch (s->blockState) { /* * we aren't in a transaction, so we do nothing. @@ -1362,6 +1832,7 @@ AbortCurrentTransaction(void) * state. */ case TBLOCK_ABORT: + case TBLOCK_SUBABORT: break; /* @@ -1374,6 +1845,53 @@ AbortCurrentTransaction(void) CleanupTransaction(); s->blockState = TBLOCK_DEFAULT; break; + + /* + * If we are just starting a subtransaction, put it + * in aborted state. + */ + case TBLOCK_SUBBEGIN: + case TBLOCK_SUBBEGINABORT: + PushTransaction(); + s = CurrentTransactionState; /* changed by push */ + StartSubTransaction(); + AbortSubTransaction(); + s->blockState = TBLOCK_SUBABORT; + break; + + case TBLOCK_SUBINPROGRESS: + AbortSubTransaction(); + s->blockState = TBLOCK_SUBABORT; + break; + + /* + * If we are aborting an ending transaction, + * we have to abort the parent transaction too. + */ + case TBLOCK_SUBEND: + AbortSubTransaction(); + CleanupSubTransaction(); + PopTransaction(); + s = CurrentTransactionState; /* changed by pop */ + Assert(s->blockState != TBLOCK_SUBEND && + s->blockState != TBLOCK_SUBENDABORT_OK && + s->blockState != TBLOCK_SUBENDABORT_ERROR); + AbortCurrentTransaction(); + break; + + /* + * Same as above, except the Abort() was already done. + */ + case TBLOCK_SUBENDABORT_OK: + case TBLOCK_SUBENDABORT_ERROR: + CleanupSubTransaction(); + PopTransaction(); + s = CurrentTransactionState; /* changed by pop */ + Assert(s->blockState != TBLOCK_SUBEND && + s->blockState != TBLOCK_SUBENDABORT_OK && + s->blockState != TBLOCK_SUBENDABORT_ERROR); + AbortCurrentTransaction(); + break; } } @@ -1387,7 +1905,7 @@ AbortCurrentTransaction(void) * If we have already started a transaction block, issue an error; also issue * an error if we appear to be running inside a user-defined function (which * could issue more commands and possibly cause a failure after the statement - * completes). + * completes). Subtransactions are verboten too. * * stmtNode: pointer to parameter block for statement; this is used in * a very klugy way to determine whether we are inside a function. @@ -1406,6 +1924,16 @@ PreventTransactionChain(void *stmtNode, const char *stmtType) errmsg("%s cannot run inside a transaction block", stmtType))); + /* + * subtransaction? + */ + if (IsSubTransaction()) + ereport(ERROR, + (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + /* translator: %s represents an SQL statement name */ + errmsg("%s cannot run inside a subtransaction", + stmtType))); + /* * Are we inside a function call? If the statement's parameter block * was allocated in QueryContext, assume it is an interactive command. @@ -1416,10 +1944,11 @@ PreventTransactionChain(void *stmtNode, const char *stmtType) (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), /* translator: %s represents an SQL statement name */ errmsg("%s cannot be executed from a function", stmtType))); + /* If we got past IsTransactionBlock test, should be in default state */ if (CurrentTransactionState->blockState != TBLOCK_DEFAULT && CurrentTransactionState->blockState != TBLOCK_STARTED) - elog(ERROR, "cannot prevent transaction chain"); + elog(FATAL, "cannot prevent transaction chain"); /* all okay */ } @@ -1433,8 +1962,8 @@ PreventTransactionChain(void *stmtNode, const char *stmtType) * * If we appear to be running inside a user-defined function, we do not * issue an error, since the function could issue more commands that make - * use of the current statement's results. Thus this is an inverse for - * PreventTransactionChain. + * use of the current statement's results. Likewise subtransactions. + * Thus this is an inverse for PreventTransactionChain. * * stmtNode: pointer to parameter block for statement; this is used in * a very klugy way to determine whether we are inside a function. @@ -1449,6 +1978,12 @@ RequireTransactionChain(void *stmtNode, const char *stmtType) if (IsTransactionBlock()) return; + /* + * subtransaction? + */ + if (IsSubTransaction()) + return; + /* * Are we inside a function call? If the statement's parameter block * was allocated in QueryContext, assume it is an interactive command. @@ -1483,6 +2018,9 @@ IsInTransactionChain(void *stmtNode) if (IsTransactionBlock()) return true; + if (IsSubTransaction()) + return true; + if (!MemoryContextContains(QueryContext, stmtNode)) return true; @@ -1571,26 +2109,40 @@ BeginTransactionBlock(void) s->blockState = TBLOCK_BEGIN; break; - /* Already a transaction block in progress. */ + /* + * Already a transaction block in progress. + * Start a subtransaction. + */ case TBLOCK_INPROGRESS: - ereport(WARNING, - (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), - errmsg("there is already a transaction in progress"))); + case TBLOCK_SUBINPROGRESS: + PushTransaction(); + s = CurrentTransactionState; /* changed by push */ + s->blockState = TBLOCK_SUBBEGIN; + break; /* - * This shouldn't happen, because a transaction in aborted state - * will not be allowed to call BeginTransactionBlock. + * An aborted transaction block should be allowed to start + * a subtransaction, but it must put it in aborted state. */ case TBLOCK_ABORT: - elog(WARNING, "BeginTransactionBlock: unexpected TBLOCK_ABORT"); + case TBLOCK_SUBABORT: + PushTransaction(); + s = CurrentTransactionState; /* changed by push */ + s->blockState = TBLOCK_SUBBEGINABORT; break; /* These cases are invalid. Reject them altogether. */ case TBLOCK_DEFAULT: case TBLOCK_BEGIN: + case TBLOCK_SUBBEGIN: + case TBLOCK_SUBBEGINABORT: case TBLOCK_ENDABORT: case TBLOCK_END: - elog(FATAL, "BeginTransactionBlock: not in a user-allowed state!"); + case TBLOCK_SUBENDABORT_OK: + case TBLOCK_SUBENDABORT_ERROR: + case TBLOCK_SUBEND: + elog(FATAL, "BeginTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); break; } } @@ -1614,6 +2166,15 @@ EndTransactionBlock(void) s->blockState = TBLOCK_END; break; + /* + * here we are in a subtransaction block. Signal + * CommitTransactionCommand() to end it and return to the + * parent transaction. + */ + case TBLOCK_SUBINPROGRESS: + s->blockState = TBLOCK_SUBEND; + break; + /* * here, we are in a transaction block which aborted and since the * AbortTransaction() was already done, we do whatever is needed @@ -1625,12 +2186,21 @@ EndTransactionBlock(void) s->blockState = TBLOCK_ENDABORT; break; + /* + * here we are in an aborted subtransaction. Signal + * CommitTransactionCommand() to clean up and return to the + * parent transaction. + */ + case TBLOCK_SUBABORT: + s->blockState = TBLOCK_SUBENDABORT_ERROR; + break; + case TBLOCK_STARTED: /* - * here, the user issued COMMIT when not inside a transaction. Issue a - * WARNING and go to abort state. The upcoming call to - * CommitTransactionCommand() will then put us back into the default - * state. + * here, the user issued COMMIT when not inside a + * transaction. Issue a WARNING and go to abort state. The + * upcoming call to CommitTransactionCommand() will then put us + * back into the default state. */ ereport(WARNING, (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION), @@ -1644,7 +2214,13 @@ EndTransactionBlock(void) case TBLOCK_BEGIN: case TBLOCK_ENDABORT: case TBLOCK_END: - elog(FATAL, "EndTransactionBlock and not in a user-allowed state"); + case TBLOCK_SUBBEGIN: + case TBLOCK_SUBBEGINABORT: + case TBLOCK_SUBEND: + case TBLOCK_SUBENDABORT_OK: + case TBLOCK_SUBENDABORT_ERROR: + elog(FATAL, "EndTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); break; } } @@ -1657,42 +2233,68 @@ UserAbortTransactionBlock(void) { TransactionState s = CurrentTransactionState; - /* - * if the transaction has already been automatically aborted with an - * error, and the user subsequently types 'abort', allow it. (the - * behavior is the same as if they had typed 'end'.) - */ - if (s->blockState == TBLOCK_ABORT) - { - s->blockState = TBLOCK_ENDABORT; - return; - } - - if (s->blockState == TBLOCK_INPROGRESS) - { + switch (s->blockState) { /* - * here we were inside a transaction block and we got an abort - * command from the user, so we move to the ENDABORT state and - * do abort processing so we will end up in the default state - * after the upcoming CommitTransactionCommand(). + * here we are inside a failed transaction block and we got an abort + * command from the user. Abort processing is already done, we just + * need to move to the ENDABORT state so we will end up in the default + * state after the upcoming CommitTransactionCommand(). */ - s->blockState = TBLOCK_ABORT; - AbortTransaction(); - s->blockState = TBLOCK_ENDABORT; - return; + case TBLOCK_ABORT: + s->blockState = TBLOCK_ENDABORT; + break; + + /* Ditto, for a subtransaction. */ + case TBLOCK_SUBABORT: + s->blockState = TBLOCK_SUBENDABORT_OK; + break; + + /* + * here we are inside a transaction block and we got an abort + * command from the user, so we move to the ENDABORT state and + * do abort processing so we will end up in the default state + * after the upcoming CommitTransactionCommand(). + */ + case TBLOCK_INPROGRESS: + AbortTransaction(); + s->blockState = TBLOCK_ENDABORT; + break; + + /* Ditto, for a subtransaction. */ + case TBLOCK_SUBINPROGRESS: + AbortSubTransaction(); + s->blockState = TBLOCK_SUBENDABORT_OK; + break; + + /* + * here, the user issued ABORT when not inside a + * transaction. Issue a WARNING and go to abort state. The + * upcoming call to CommitTransactionCommand() will then put us + * back into the default state. + */ + case TBLOCK_STARTED: + ereport(WARNING, + (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION), + errmsg("there is no transaction in progress"))); + AbortTransaction(); + s->blockState = TBLOCK_ENDABORT; + break; + + /* these cases are invalid. */ + case TBLOCK_DEFAULT: + case TBLOCK_BEGIN: + case TBLOCK_END: + case TBLOCK_ENDABORT: + case TBLOCK_SUBEND: + case TBLOCK_SUBENDABORT_OK: + case TBLOCK_SUBENDABORT_ERROR: + case TBLOCK_SUBBEGIN: + case TBLOCK_SUBBEGINABORT: + elog(FATAL, "UserAbortTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); + break; } - /* - * here, the user issued ABORT when not inside a transaction. Issue a - * WARNING and go to abort state. The upcoming call to - * CommitTransactionCommand() will then put us back into the default - * state. - */ - ereport(WARNING, - (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION), - errmsg("there is no transaction in progress"))); - AbortTransaction(); - s->blockState = TBLOCK_ENDABORT; } /* @@ -1708,32 +2310,58 @@ AbortOutOfAnyTransaction(void) TransactionState s = CurrentTransactionState; /* - * Get out of any transaction + * Get out of any transaction or nested transaction */ - switch (s->blockState) - { - case TBLOCK_DEFAULT: - /* Not in a transaction, do nothing */ - break; - case TBLOCK_STARTED: - case TBLOCK_BEGIN: - case TBLOCK_INPROGRESS: - case TBLOCK_END: - /* In a transaction, so clean up */ - AbortTransaction(); - CleanupTransaction(); - break; - case TBLOCK_ABORT: - case TBLOCK_ENDABORT: - /* AbortTransaction already done, still need Cleanup */ - CleanupTransaction(); - break; - } + do { + switch (s->blockState) + { + case TBLOCK_DEFAULT: + /* Not in a transaction, do nothing */ + break; + case TBLOCK_STARTED: + case TBLOCK_BEGIN: + case TBLOCK_INPROGRESS: + case TBLOCK_END: + /* In a transaction, so clean up */ + AbortTransaction(); + CleanupTransaction(); + s->blockState = TBLOCK_DEFAULT; + break; + case TBLOCK_ABORT: + case TBLOCK_ENDABORT: + /* AbortTransaction already done, still need Cleanup */ + CleanupTransaction(); + s->blockState = TBLOCK_DEFAULT; + break; + case TBLOCK_SUBBEGIN: + case TBLOCK_SUBBEGINABORT: + /* + * Just starting a new transaction -- return to parent. + * FIXME -- Is this correct? + */ + PopTransaction(); + s = CurrentTransactionState; /* changed by pop */ + break; + case TBLOCK_SUBINPROGRESS: + case TBLOCK_SUBEND: + /* In a subtransaction, so clean it up and abort parent too */ + AbortSubTransaction(); + CleanupSubTransaction(); + PopTransaction(); + s = CurrentTransactionState; /* changed by pop */ + break; + case TBLOCK_SUBABORT: + case TBLOCK_SUBENDABORT_OK: + case TBLOCK_SUBENDABORT_ERROR: + CleanupSubTransaction(); + PopTransaction(); + s = CurrentTransactionState; /* changed by pop */ + break; + } + } while (s->blockState != TBLOCK_DEFAULT); - /* - * Now reset the transaction state - */ - s->blockState = TBLOCK_DEFAULT; + /* Should be out of all subxacts now */ + Assert(s->parent == NULL); } /* @@ -1784,18 +2412,436 @@ TransactionBlockStatusCode(void) case TBLOCK_BEGIN: case TBLOCK_INPROGRESS: case TBLOCK_END: + case TBLOCK_SUBINPROGRESS: + case TBLOCK_SUBBEGIN: + case TBLOCK_SUBEND: return 'T'; /* in transaction */ case TBLOCK_ABORT: case TBLOCK_ENDABORT: + case TBLOCK_SUBABORT: + case TBLOCK_SUBENDABORT_OK: + case TBLOCK_SUBENDABORT_ERROR: + case TBLOCK_SUBBEGINABORT: return 'E'; /* in failed transaction */ } /* should never get here */ - elog(ERROR, "invalid transaction block state: %d", - (int) s->blockState); + elog(FATAL, "invalid transaction block state: %s", + BlockStateAsString(s->blockState)); return 0; /* keep compiler quiet */ } +/* + * IsSubTransaction + */ +bool +IsSubTransaction(void) +{ + TransactionState s = CurrentTransactionState; + + switch (s->blockState) { + case TBLOCK_DEFAULT: + case TBLOCK_STARTED: + case TBLOCK_BEGIN: + case TBLOCK_INPROGRESS: + case TBLOCK_END: + case TBLOCK_ABORT: + case TBLOCK_ENDABORT: + return false; + case TBLOCK_SUBBEGIN: + case TBLOCK_SUBBEGINABORT: + case TBLOCK_SUBINPROGRESS: + case TBLOCK_SUBABORT: + case TBLOCK_SUBEND: + case TBLOCK_SUBENDABORT_OK: + case TBLOCK_SUBENDABORT_ERROR: + return true; + } + + /* should never get here */ + elog(FATAL, "invalid transaction block state: %s", + BlockStateAsString(s->blockState)); + return false; /* keep compiler quiet */ +} + +/* + * StartSubTransaction + */ +static void +StartSubTransaction(void) +{ + TransactionState s = CurrentTransactionState; + + if (s->state != TRANS_DEFAULT) + elog(WARNING, "StartSubTransaction and not in default state"); + + s->state = TRANS_START; + + /* + * Generate a new Xid and record it in pg_subtrans. + */ + s->transactionIdData = GetNewTransactionId(true); + + SubTransSetParent(s->transactionIdData, s->parent->transactionIdData); + + /* + * Finish setup of other transaction state fields. + */ + s->currentUser = GetUserId(); + + /* Initialize the various transaction subsystems */ + AtSubStart_Memory(); + AtSubStart_Inval(); + AtSubStart_RelationCache(); + AtSubStart_CatCache(); + AtSubStart_Buffers(); + AtSubStart_smgr(); + AtSubStart_Notify(); + DeferredTriggerBeginSubXact(); + + s->state = TRANS_INPROGRESS; + + ShowTransactionState("StartSubTransaction"); +} + +/* + * CommitSubTransaction + */ +static void +CommitSubTransaction(void) +{ + TransactionState s = CurrentTransactionState; + + ShowTransactionState("CommitSubTransaction"); + + if (s->state != TRANS_INPROGRESS) + elog(WARNING, "CommitSubTransaction and not in in-progress state"); + + /* Pre-commit processing */ + AtSubCommit_Portals(s->parent->transactionIdData); + DeferredTriggerEndSubXact(true); + + /* Mark subtransaction as subcommitted */ + CommandCounterIncrement(); + RecordSubTransactionCommit(); + AtSubCommit_childXids(); + + /* Post-commit cleanup */ + AtSubCommit_smgr(); + + AtSubEOXact_Inval(true); + AtEOSubXact_SPI(true, s->transactionIdData); + AtSubCommit_Notify(); + AtEOXact_GUC(true, true); + AtEOSubXact_gist(s->transactionIdData); + AtEOSubXact_hash(s->transactionIdData); + AtEOSubXact_rtree(s->transactionIdData); + AtEOSubXact_on_commit_actions(true, s->transactionIdData, + s->parent->transactionIdData); + + AtEOSubXact_CatCache(true); + AtEOSubXact_RelationCache(true); + AtEOSubXact_Buffers(true); + AtSubCommit_Memory(); + + s->state = TRANS_DEFAULT; +} + +/* + * AbortSubTransaction + */ +static void +AbortSubTransaction(void) +{ + TransactionState s = CurrentTransactionState; + + ShowTransactionState("AbortSubTransaction"); + + HOLD_INTERRUPTS(); + + s->state = TRANS_ABORT; + + /* + * Release any LW locks we might be holding as quickly as possible. + * (Regular locks, however, must be held till we finish aborting.) + * Releasing LW locks is critical since we might try to grab them + * again while cleaning up! + * + * FIXME This may be incorrect --- Are there some locks we should keep? + * Buffer locks, for example? I don't think so but I'm not sure. + */ + LWLockReleaseAll(); + + AbortBufferIO(); + UnlockBuffers(); + + LockWaitCancel(); + + AtSubAbort_Memory(); + + /* + * do abort processing + */ + + RecordSubTransactionAbort(); + + /* Post-abort cleanup */ + AtSubAbort_smgr(); + + DeferredTriggerEndSubXact(false); + AtSubAbort_Portals(); + AtSubEOXact_Inval(false); + AtSubAbort_Locks(); + AtEOSubXact_SPI(false, s->transactionIdData); + AtSubAbort_Notify(); + AtEOXact_GUC(false, true); + AtEOSubXact_gist(s->transactionIdData); + AtEOSubXact_hash(s->transactionIdData); + AtEOSubXact_rtree(s->transactionIdData); + AtEOSubXact_on_commit_actions(false, s->transactionIdData, + s->parent->transactionIdData); + AtEOSubXact_RelationCache(false); + AtEOSubXact_CatCache(false); + AtEOSubXact_Buffers(false); + + /* + * Reset user id which might have been changed transiently. Here we + * want to restore to the userid that was current at subxact entry. + * (As in AbortTransaction, we need not worry about the session userid.) + * + * Must do this after AtEOXact_GUC to handle the case where we entered + * the subxact inside a SECURITY DEFINER function (hence current and + * session userids were different) and then session auth was changed + * inside the subxact. GUC will reset both current and session userids + * to the entry-time session userid. This is right in every other + * scenario so it seems simplest to let GUC do that and fix it here. + */ + SetUserId(s->currentUser); + + CommandCounterIncrement(); + + RESUME_INTERRUPTS(); +} + +/* + * CleanupSubTransaction + */ +static void +CleanupSubTransaction(void) +{ + TransactionState s = CurrentTransactionState; + + ShowTransactionState("CleanupSubTransaction"); + + if (s->state != TRANS_ABORT) + elog(WARNING, "CleanupSubTransaction and not in aborted state"); + + AtSubCleanup_Portals(); + AtSubCleanup_Memory(); + + s->state = TRANS_DEFAULT; +} + +/* + * PushTransaction + * Set up transaction state for a subtransaction + */ +static void +PushTransaction(void) +{ + TransactionState p = CurrentTransactionState; + TransactionState s; + + /* + * We keep subtransaction state nodes in TopTransactionContext. + */ + s = (TransactionState) + MemoryContextAllocZero(TopTransactionContext, + sizeof(TransactionStateData)); + s->parent = p; + s->nestingLevel = p->nestingLevel + 1; + s->state = TRANS_DEFAULT; + s->blockState = TBLOCK_SUBBEGIN; + + /* Command IDs count in a continuous sequence through subtransactions */ + s->commandId = p->commandId; + + /* + * Copy down some other data so that we will have valid state until + * StartSubTransaction runs. + */ + s->transactionIdData = p->transactionIdData; + s->curTransactionContext = p->curTransactionContext; + + CurrentTransactionState = s; +} + +/* + * PopTransaction + * Pop back to parent transaction state + */ +static void +PopTransaction(void) +{ + TransactionState s = CurrentTransactionState; + + if (s->state != TRANS_DEFAULT) + elog(WARNING, "PopTransaction and not in default state"); + + if (s->parent == NULL) + elog(FATAL, "PopTransaction with no parent"); + + /* Command IDs count in a continuous sequence through subtransactions */ + s->parent->commandId = s->commandId; + + CurrentTransactionState = s->parent; + + /* Let's just make sure CurTransactionContext is good */ + CurTransactionContext = s->parent->curTransactionContext; + MemoryContextSwitchTo(CurTransactionContext); + + /* Free the old child structure */ + pfree(s); +} + +/* + * ShowTransactionState + * Debug support + */ +static void +ShowTransactionState(const char *str) +{ + /* skip work if message will definitely not be printed */ + if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2) + { + elog(DEBUG2, "%s", str); + ShowTransactionStateRec(CurrentTransactionState); + } +} + +/* + * ShowTransactionStateRec + * Recursive subroutine for ShowTransactionState + */ +static void +ShowTransactionStateRec(TransactionState s) +{ + if (s->parent) + ShowTransactionStateRec(s->parent); + + /* use ereport to suppress computation if msg will not be printed */ + ereport(DEBUG2, + (errmsg_internal("blockState: %13s; state: %7s, xid/cid: %u/%02u, nestlvl: %d, children: %s", + BlockStateAsString(s->blockState), + TransStateAsString(s->state), + (unsigned int) s->transactionIdData, + (unsigned int) s->commandId, + s->nestingLevel, + nodeToString(s->childXids)))); +} + +/* + * BlockStateAsString + * Debug support + */ +static const char * +BlockStateAsString(TBlockState blockState) +{ + switch (blockState) { + case TBLOCK_DEFAULT: + return "DEFAULT"; + case TBLOCK_STARTED: + return "STARTED"; + case TBLOCK_BEGIN: + return "BEGIN"; + case TBLOCK_INPROGRESS: + return "INPROGRESS"; + case TBLOCK_END: + return "END"; + case TBLOCK_ABORT: + return "ABORT"; + case TBLOCK_ENDABORT: + return "ENDABORT"; + case TBLOCK_SUBBEGIN: + return "SUB BEGIN"; + case TBLOCK_SUBBEGINABORT: + return "SUB BEGIN AB"; + case TBLOCK_SUBINPROGRESS: + return "SUB INPROGRS"; + case TBLOCK_SUBEND: + return "SUB END"; + case TBLOCK_SUBABORT: + return "SUB ABORT"; + case TBLOCK_SUBENDABORT_OK: + return "SUB ENDAB OK"; + case TBLOCK_SUBENDABORT_ERROR: + return "SUB ENDAB ERR"; + } + return "UNRECOGNIZED"; +} + +/* + * TransStateAsString + * Debug support + */ +static const char * +TransStateAsString(TransState state) +{ + switch (state) { + case TRANS_DEFAULT: + return "DEFAULT"; + case TRANS_START: + return "START"; + case TRANS_COMMIT: + return "COMMIT"; + case TRANS_ABORT: + return "ABORT"; + case TRANS_INPROGRESS: + return "INPROGR"; + } + return "UNRECOGNIZED"; +} + +/* + * xactGetCommittedChildren + * + * Gets the list of committed children of the current transaction. The return + * value is the number of child transactions. *children is set to point to a + * palloc'd array of TransactionIds. If there are no subxacts, *children is + * set to NULL. + * + * If metoo is true, include the current TransactionId. + */ +int +xactGetCommittedChildren(TransactionId **ptr, bool metoo) +{ + TransactionState s = CurrentTransactionState; + int nchildren; + TransactionId *children; + ListCell *p; + + nchildren = list_length(s->childXids); + if (metoo) + nchildren++; + if (nchildren == 0) + { + *ptr = NULL; + return 0; + } + + children = (TransactionId *) palloc(nchildren * sizeof(TransactionId)); + *ptr = children; + + foreach(p, s->childXids) + { + TransactionId child = lfirst_int(p); + *children++ = (TransactionId)child; + } + if (metoo) + *children = s->transactionIdData; + + return nchildren; +} /* * XLOG support routines @@ -1809,13 +2855,14 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record) if (info == XLOG_XACT_COMMIT) { xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record); - int nfiles; int i; TransactionIdCommit(record->xl_xid); + /* Mark committed subtransactions as committed */ + TransactionIdCommitTree(xlrec->nsubxacts, + (TransactionId *) &(xlrec->xnodes[xlrec->nrels])); /* Make sure files supposed to be dropped are dropped */ - nfiles = (record->xl_len - MinSizeOfXactCommit) / sizeof(RelFileNode); - for (i = 0; i < nfiles; i++) + for (i = 0; i < xlrec->nrels; i++) { XLogCloseRelation(xlrec->xnodes[i]); smgrdounlink(smgropen(xlrec->xnodes[i]), false, true); @@ -1824,13 +2871,14 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record) else if (info == XLOG_XACT_ABORT) { xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record); - int nfiles; int i; TransactionIdAbort(record->xl_xid); + /* mark subtransactions as aborted */ + TransactionIdAbortTree(xlrec->nsubxacts, + (TransactionId *) &(xlrec->xnodes[xlrec->nrels])); /* Make sure files supposed to be dropped are dropped */ - nfiles = (record->xl_len - MinSizeOfXactAbort) / sizeof(RelFileNode); - for (i = 0; i < nfiles; i++) + for (i = 0; i < xlrec->nrels; i++) { XLogCloseRelation(xlrec->xnodes[i]); smgrdounlink(smgropen(xlrec->xnodes[i]), false, true); @@ -1855,6 +2903,7 @@ void xact_desc(char *buf, uint8 xl_info, char *rec) { uint8 info = xl_info & ~XLR_INFO_MASK; + int i; if (info == XLOG_XACT_COMMIT) { @@ -1864,7 +2913,25 @@ xact_desc(char *buf, uint8 xl_info, char *rec) sprintf(buf + strlen(buf), "commit: %04u-%02u-%02u %02u:%02u:%02u", tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); - /* XXX can't show RelFileNodes for lack of access to record length */ + if (xlrec->nrels > 0) + { + sprintf(buf + strlen(buf), "; rels:"); + for (i = 0; i < xlrec->nrels; i++) + { + RelFileNode rnode = xlrec->xnodes[i]; + sprintf(buf + strlen(buf), " %u/%u/%u", + rnode.spcNode, rnode.dbNode, rnode.relNode); + } + } + if (xlrec->nsubxacts > 0) + { + TransactionId *xacts = (TransactionId *) + &xlrec->xnodes[xlrec->nrels]; + + sprintf(buf + strlen(buf), "; subxacts:"); + for (i = 0; i < xlrec->nsubxacts; i++) + sprintf(buf + strlen(buf), " %u", xacts[i]); + } } else if (info == XLOG_XACT_ABORT) { @@ -1874,7 +2941,25 @@ xact_desc(char *buf, uint8 xl_info, char *rec) sprintf(buf + strlen(buf), "abort: %04u-%02u-%02u %02u:%02u:%02u", tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); - /* XXX can't show RelFileNodes for lack of access to record length */ + if (xlrec->nrels > 0) + { + sprintf(buf + strlen(buf), "; rels:"); + for (i = 0; i < xlrec->nrels; i++) + { + RelFileNode rnode = xlrec->xnodes[i]; + sprintf(buf + strlen(buf), " %u/%u/%u", + rnode.spcNode, rnode.dbNode, rnode.relNode); + } + } + if (xlrec->nsubxacts > 0) + { + TransactionId *xacts = (TransactionId *) + &xlrec->xnodes[xlrec->nrels]; + + sprintf(buf + strlen(buf), "; subxacts:"); + for (i = 0; i < xlrec->nsubxacts; i++) + sprintf(buf + strlen(buf), " %u", xacts[i]); + } } else strcat(buf, "UNKNOWN"); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f12056406156a07d7e241ee066dad5c8760e78d2..a6f53ba79f1c33069ac721cdced1d20e50630960 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.146 2004/06/03 02:08:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.147 2004/07/01 00:49:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,7 @@ #include <sys/time.h> #include "access/clog.h" +#include "access/subtrans.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlog.h" @@ -2755,6 +2756,7 @@ BootStrapXLOG(void) /* Bootstrap the commit log, too */ BootStrapCLOG(); + BootStrapSUBTRANS(); } static char * @@ -3154,6 +3156,7 @@ StartupXLOG(void) /* Start up the commit log, too */ StartupCLOG(); + StartupSUBTRANS(); ereport(LOG, (errmsg("database system is ready"))); @@ -3292,6 +3295,7 @@ ShutdownXLOG(int code, Datum arg) CritSectionCount++; CreateCheckPoint(true, true); ShutdownCLOG(); + ShutdownSUBTRANS(); CritSectionCount--; ereport(LOG, @@ -3467,6 +3471,7 @@ CreateCheckPoint(bool shutdown, bool force) END_CRIT_SECTION(); CheckPointCLOG(); + CheckPointSUBTRANS(); FlushBufferPool(); START_CRIT_SECTION(); diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 847f73ff06ade9b0defbe791015ec0507b9d706d..8e53d6af7d79d57b183e12e66bf3086a32ef0537 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.112 2004/05/26 04:41:10 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.113 2004/07/01 00:50:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -97,11 +97,17 @@ * State for outbound notifies consists of a list of all relnames NOTIFYed * in the current transaction. We do not actually perform a NOTIFY until * and unless the transaction commits. pendingNotifies is NIL if no - * NOTIFYs have been done in the current transaction. The List nodes and - * referenced strings are all palloc'd in TopTransactionContext. + * NOTIFYs have been done in the current transaction. + * + * The list is kept in CurTransactionContext. In subtransactions, each + * subtransaction has its own list in its own CurTransactionContext, but + * successful subtransactions attach their lists to their parent's list. + * Failed subtransactions simply discard their lists. */ static List *pendingNotifies = NIL; +static List *upperPendingNotifies = NIL; /* list of upper-xact lists */ + /* * State for inbound notifies consists of two flags: one saying whether * the signal handler is currently allowed to call ProcessIncomingNotify @@ -155,11 +161,11 @@ Async_Notify(char *relname) { /* * The name list needs to live until end of transaction, so store - * it in the top transaction context. + * it in the transaction context. */ MemoryContext oldcontext; - oldcontext = MemoryContextSwitchTo(TopTransactionContext); + oldcontext = MemoryContextSwitchTo(CurTransactionContext); pendingNotifies = lcons(pstrdup(relname), pendingNotifies); @@ -606,6 +612,60 @@ AtAbort_Notify(void) ClearPendingNotifies(); } +/* + * AtSubStart_Notify() --- Take care of subtransaction start. + * + * Push empty state for the new subtransaction. + */ +void +AtSubStart_Notify(void) +{ + MemoryContext old_cxt; + + /* Keep the list-of-lists in TopTransactionContext for simplicity */ + old_cxt = MemoryContextSwitchTo(TopTransactionContext); + + upperPendingNotifies = lcons(pendingNotifies, upperPendingNotifies); + + pendingNotifies = NIL; + + MemoryContextSwitchTo(old_cxt); +} + +/* + * AtSubCommit_Notify() --- Take care of subtransaction commit. + * + * Reassign all items in the pending notifies list to the parent transaction. + */ +void +AtSubCommit_Notify(void) +{ + List *parentPendingNotifies; + + parentPendingNotifies = (List *) linitial(upperPendingNotifies); + upperPendingNotifies = list_delete_first(upperPendingNotifies); + + /* + * We could try to eliminate duplicates here, but it seems not worthwhile. + */ + pendingNotifies = list_concat(parentPendingNotifies, pendingNotifies); +} + +/* + * AtSubAbort_Notify() --- Take care of subtransaction abort. + */ +void +AtSubAbort_Notify(void) +{ + /* + * All we have to do is pop the stack --- the notifies made in this + * subxact are no longer interesting, and the space will be freed when + * CurTransactionContext is recycled. + */ + pendingNotifies = (List *) linitial(upperPendingNotifies); + upperPendingNotifies = list_delete_first(upperPendingNotifies); +} + /* *-------------------------------------------------------------- * NotifyInterruptHandler @@ -951,7 +1011,7 @@ ClearPendingNotifies(void) /* * We used to have to explicitly deallocate the list members and * nodes, because they were malloc'd. Now, since we know they are - * palloc'd in TopTransactionContext, we need not do that --- they'll + * palloc'd in CurTransactionContext, we need not do that --- they'll * go away automatically at transaction exit. We need only reset the * list head pointer. */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index cfd8bd80cc0e6f41ff2d42996b3d127070426566..392822abf503c785587920a8ca9a74f9089ea00d 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.117 2004/06/25 21:55:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.118 2004/07/01 00:50:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -76,8 +76,8 @@ typedef struct OnCommitItem * entries in the list until commit so that we can roll back if * needed. */ - bool created_in_cur_xact; - bool deleted_in_cur_xact; + TransactionId creating_xid; + TransactionId deleting_xid; } OnCommitItem; static List *on_commits = NIL; @@ -5483,8 +5483,8 @@ register_on_commit_action(Oid relid, OnCommitAction action) oc = (OnCommitItem *) palloc(sizeof(OnCommitItem)); oc->relid = relid; oc->oncommit = action; - oc->created_in_cur_xact = true; - oc->deleted_in_cur_xact = false; + oc->creating_xid = GetCurrentTransactionId(); + oc->deleting_xid = InvalidTransactionId; on_commits = lcons(oc, on_commits); @@ -5507,7 +5507,7 @@ remove_on_commit_action(Oid relid) if (oc->relid == relid) { - oc->deleted_in_cur_xact = true; + oc->deleting_xid = GetCurrentTransactionId(); break; } } @@ -5522,6 +5522,7 @@ remove_on_commit_action(Oid relid) void PreCommit_on_commit_actions(void) { + TransactionId xid = GetCurrentTransactionId(); ListCell *l; foreach(l, on_commits) @@ -5529,7 +5530,7 @@ PreCommit_on_commit_actions(void) OnCommitItem *oc = (OnCommitItem *) lfirst(l); /* Ignore entry if already dropped in this xact */ - if (oc->deleted_in_cur_xact) + if (oc->deleting_xid == xid) continue; switch (oc->oncommit) @@ -5556,7 +5557,7 @@ PreCommit_on_commit_actions(void) * remove_on_commit_action, so the entry should get * marked as deleted. */ - Assert(oc->deleted_in_cur_xact); + Assert(oc->deleting_xid == xid); break; } } @@ -5572,7 +5573,7 @@ PreCommit_on_commit_actions(void) * during abort, remove those created during this transaction. */ void -AtEOXact_on_commit_actions(bool isCommit) +AtEOXact_on_commit_actions(bool isCommit, TransactionId xid) { ListCell *cur_item; ListCell *prev_item; @@ -5584,8 +5585,8 @@ AtEOXact_on_commit_actions(bool isCommit) { OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item); - if (isCommit ? oc->deleted_in_cur_xact : - oc->created_in_cur_xact) + if (isCommit ? TransactionIdEquals(oc->deleting_xid, xid) : + TransactionIdEquals(oc->creating_xid, xid)) { /* cur_item must be removed */ on_commits = list_delete_cell(on_commits, cur_item, prev_item); @@ -5598,8 +5599,52 @@ AtEOXact_on_commit_actions(bool isCommit) else { /* cur_item must be preserved */ - oc->deleted_in_cur_xact = false; - oc->created_in_cur_xact = false; + oc->creating_xid = InvalidTransactionId; + oc->deleting_xid = InvalidTransactionId; + prev_item = cur_item; + cur_item = lnext(prev_item); + } + } +} + +/* + * Post-subcommit or post-subabort cleanup for ON COMMIT management. + * + * During subabort, we can immediately remove entries created during this + * subtransaction. During subcommit, just relabel entries marked during + * this subtransaction as being the parent's responsibility. + */ +void +AtEOSubXact_on_commit_actions(bool isCommit, TransactionId childXid, + TransactionId parentXid) +{ + ListCell *cur_item; + ListCell *prev_item; + + prev_item = NULL; + cur_item = list_head(on_commits); + + while (cur_item != NULL) + { + OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item); + + if (!isCommit && TransactionIdEquals(oc->creating_xid, childXid)) + { + /* cur_item must be removed */ + on_commits = list_delete_cell(on_commits, cur_item, prev_item); + pfree(oc); + if (prev_item) + cur_item = lnext(prev_item); + else + cur_item = list_head(on_commits); + } + else + { + /* cur_item must be preserved */ + if (TransactionIdEquals(oc->creating_xid, childXid)) + oc->creating_xid = parentXid; + if (TransactionIdEquals(oc->deleting_xid, childXid)) + oc->deleting_xid = isCommit ? parentXid : InvalidTransactionId; prev_item = cur_item; cur_item = lnext(prev_item); } diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index cfbd58e4282a540528a64d8062bddc5d67c0fdb4..15f4cfa8dcb164f84b73b1da01fe1aeb95fbf0dd 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.165 2004/05/26 04:41:12 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.166 2004/07/01 00:50:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -50,9 +50,6 @@ static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata, MemoryContext per_tuple_context); static void DeferredTriggerSaveEvent(ResultRelInfo *relinfo, int event, bool row_trigger, HeapTuple oldtup, HeapTuple newtup); -static void DeferredTriggerExecute(DeferredTriggerEvent event, int itemno, - Relation rel, TriggerDesc *trigdesc, FmgrInfo *finfo, - MemoryContext per_tuple_context); /* @@ -1639,47 +1636,130 @@ ltrmark:; /* ---------- * Deferred trigger stuff + * + * The DeferredTriggersData struct holds data about pending deferred + * trigger events during the current transaction tree. The struct and + * most of its subsidiary data are kept in TopTransactionContext; however + * the individual event records are kept in CurTransactionContext, so that + * they will easily go away during subtransaction abort. + * + * DeferredTriggersData has the following fields: + * + * state keeps track of the deferred state of each trigger + * (including the global state). This is saved and restored across + * failed subtransactions. + * + * events is the head of the list of events. + * + * tail_thisxact points to the tail of the list, for the current + * transaction (whether main transaction or subtransaction). We always + * append to the list using this pointer. + * + * events_imm points to the last element scanned by the last + * deferredTriggerInvokeEvents call. We can use this to avoid rescanning + * unnecessarily; if it's NULL, the scan should start at the head of the + * list. Its name comes from the fact that it's set to the last event fired + * by the last call to immediate triggers. + * + * tail_stack and imm_stack are stacks of pointer, which hold the pointers + * to the tail and the "immediate" events as of the start of a subtransaction. + * We use to revert them when aborting the subtransaction. + * + * state_stack is a stack of pointers to saved copies of the deferred-trigger + * state data; each subtransaction level that modifies that state first + * saves a copy, which we use to restore the state if we abort. + * + * numpushed and numalloc keep control of allocation and storage in the above + * stacks. numpushed is essentially the current subtransaction nesting depth. + * + * XXX We need to be able to save the per-event data in a file if it grows too + * large. * ---------- */ -typedef struct DeferredTriggersData +/* Per-item data */ +typedef struct DeferredTriggerEventItem { - /* Internal data is held in a per-transaction memory context */ - MemoryContext deftrig_cxt; - /* ALL DEFERRED or ALL IMMEDIATE */ - bool deftrig_all_isset; - bool deftrig_all_isdeferred; - /* Per trigger state */ - List *deftrig_trigstates; - /* List of pending deferred triggers. Previous comment below */ - DeferredTriggerEvent deftrig_events; - DeferredTriggerEvent deftrig_events_imm; - DeferredTriggerEvent deftrig_event_tail; -} DeferredTriggersData; + Oid dti_tgoid; + TransactionId dti_done_xid; + int32 dti_state; +} DeferredTriggerEventItem; -/* ---------- - * deftrig_events, deftrig_event_tail: - * The list of pending deferred trigger events during the current transaction. +typedef struct DeferredTriggerEventData *DeferredTriggerEvent; + +/* Per-event data */ +typedef struct DeferredTriggerEventData +{ + DeferredTriggerEvent dte_next; /* list link */ + int32 dte_event; + Oid dte_relid; + TransactionId dte_done_xid; + ItemPointerData dte_oldctid; + ItemPointerData dte_newctid; + int32 dte_n_items; + /* dte_item is actually a variable-size array, of length dte_n_items */ + DeferredTriggerEventItem dte_item[1]; +} DeferredTriggerEventData; + +/* Per-trigger status data */ +typedef struct DeferredTriggerStatusData +{ + Oid dts_tgoid; + bool dts_tgisdeferred; +} DeferredTriggerStatusData; + +typedef struct DeferredTriggerStatusData *DeferredTriggerStatus; + + +/* + * Trigger deferral status data. * - * deftrig_events is the head, deftrig_event_tail is the last entry. - * Because this can grow pretty large, we don't use separate List nodes, - * but instead thread the list through the dte_next fields of the member - * nodes. Saves just a few bytes per entry, but that adds up. + * We make this a single palloc'd object so it can be copied and freed easily. * - * deftrig_events_imm holds the tail pointer as of the last - * deferredTriggerInvokeEvents call; we can use this to avoid rescanning - * entries unnecessarily. It is NULL if deferredTriggerInvokeEvents - * hasn't run since the last state change. + * all_isset and all_isdeferred are used to keep track + * of SET CONSTRAINTS ALL {DEFERRED, IMMEDIATE}. * - * XXX Need to be able to shove this data out to a file if it grows too - * large... - * ---------- + * trigstates[] stores per-trigger tgisdeferred settings. */ +typedef struct DeferredTriggerStateData +{ + bool all_isset; + bool all_isdeferred; + int numstates; /* number of trigstates[] entries in use */ + int numalloc; /* allocated size of trigstates[] */ + DeferredTriggerStatusData trigstates[1]; /* VARIABLE LENGTH ARRAY */ +} DeferredTriggerStateData; + +typedef DeferredTriggerStateData *DeferredTriggerState; + +/* Per-transaction data */ +typedef struct DeferredTriggersData +{ + DeferredTriggerState state; + DeferredTriggerEvent events; + DeferredTriggerEvent tail_thisxact; + DeferredTriggerEvent events_imm; + DeferredTriggerEvent *tail_stack; + DeferredTriggerEvent *imm_stack; + DeferredTriggerState *state_stack; + int numpushed; + int numalloc; +} DeferredTriggersData; typedef DeferredTriggersData *DeferredTriggers; static DeferredTriggers deferredTriggers; + +static void DeferredTriggerExecute(DeferredTriggerEvent event, int itemno, + Relation rel, TriggerDesc *trigdesc, FmgrInfo *finfo, + MemoryContext per_tuple_context); +static DeferredTriggerState DeferredTriggerStateCreate(int numalloc); +static DeferredTriggerState DeferredTriggerStateCopy(DeferredTriggerState state); +static DeferredTriggerState DeferredTriggerStateAddItem(DeferredTriggerState state, + Oid tgoid, bool tgisdeferred); + + /* ---------- * deferredTriggerCheckState() * @@ -1690,13 +1770,12 @@ static DeferredTriggers deferredTriggers; static bool deferredTriggerCheckState(Oid tgoid, int32 itemstate) { - MemoryContext oldcxt; - ListCell *sl; - DeferredTriggerStatus trigstate; + bool tgisdeferred; + int i; /* - * Not deferrable triggers (i.e. normal AFTER ROW triggers and - * constraints declared NOT DEFERRABLE, the state is always false. + * For not-deferrable triggers (i.e. normal AFTER ROW triggers and + * constraints declared NOT DEFERRABLE), the state is always false. */ if ((itemstate & TRIGGER_DEFERRED_DEFERRABLE) == 0) return false; @@ -1704,37 +1783,29 @@ deferredTriggerCheckState(Oid tgoid, int32 itemstate) /* * Lookup if we know an individual state for this trigger */ - foreach(sl, deferredTriggers->deftrig_trigstates) + for (i = 0; i < deferredTriggers->state->numstates; i++) { - trigstate = (DeferredTriggerStatus) lfirst(sl); - if (trigstate->dts_tgoid == tgoid) - return trigstate->dts_tgisdeferred; + if (deferredTriggers->state->trigstates[i].dts_tgoid == tgoid) + return deferredTriggers->state->trigstates[i].dts_tgisdeferred; } /* * No individual state known - so if the user issued a SET CONSTRAINT * ALL ..., we return that instead of the triggers default state. */ - if (deferredTriggers->deftrig_all_isset) - return deferredTriggers->deftrig_all_isdeferred; + if (deferredTriggers->state->all_isset) + return deferredTriggers->state->all_isdeferred; /* * No ALL state known either, remember the default state as the - * current and return that. + * current and return that. (XXX why do we bother making a state entry?) */ - oldcxt = MemoryContextSwitchTo(deferredTriggers->deftrig_cxt); + tgisdeferred = ((itemstate & TRIGGER_DEFERRED_INITDEFERRED) != 0); + deferredTriggers->state = + DeferredTriggerStateAddItem(deferredTriggers->state, + tgoid, tgisdeferred); - trigstate = (DeferredTriggerStatus) - palloc(sizeof(DeferredTriggerStatusData)); - trigstate->dts_tgoid = tgoid; - trigstate->dts_tgisdeferred = - ((itemstate & TRIGGER_DEFERRED_INITDEFERRED) != 0); - deferredTriggers->deftrig_trigstates = - lappend(deferredTriggers->deftrig_trigstates, trigstate); - - MemoryContextSwitchTo(oldcxt); - - return trigstate->dts_tgisdeferred; + return tgisdeferred; } @@ -1747,22 +1818,18 @@ deferredTriggerCheckState(Oid tgoid, int32 itemstate) static void deferredTriggerAddEvent(DeferredTriggerEvent event) { - /* - * Since the event list could grow quite long, we keep track of the - * list tail and append there, rather than just doing a stupid - * "lappend". This avoids O(N^2) behavior for large numbers of events. - */ - event->dte_next = NULL; - if (deferredTriggers->deftrig_event_tail == NULL) + Assert(event->dte_next == NULL); + + if (deferredTriggers->tail_thisxact == NULL) { /* first list entry */ - deferredTriggers->deftrig_events = event; - deferredTriggers->deftrig_event_tail = event; + deferredTriggers->events = event; + deferredTriggers->tail_thisxact = event; } else { - deferredTriggers->deftrig_event_tail->dte_next = event; - deferredTriggers->deftrig_event_tail = event; + deferredTriggers->tail_thisxact->dte_next = event; + deferredTriggers->tail_thisxact = event; } } @@ -1915,18 +1982,18 @@ deferredTriggerInvokeEvents(bool immediate_only) /* * If immediate_only is true, then the only events that could need - * firing are those since deftrig_events_imm. (But if - * deftrig_events_imm is NULL, we must scan the entire list.) + * firing are those since events_imm. (But if + * events_imm is NULL, we must scan the entire list.) */ - if (immediate_only && deferredTriggers->deftrig_events_imm != NULL) + if (immediate_only && deferredTriggers->events_imm != NULL) { - prev_event = deferredTriggers->deftrig_events_imm; + prev_event = deferredTriggers->events_imm; event = prev_event->dte_next; } else { prev_event = NULL; - event = deferredTriggers->deftrig_events; + event = deferredTriggers->events; } while (event != NULL) @@ -1936,10 +2003,13 @@ deferredTriggerInvokeEvents(bool immediate_only) int i; /* - * Check if event is already completely done. + * Skip executing cancelled events, and events done by transactions + * that are not aborted. */ - if (!(event->dte_event & (TRIGGER_DEFERRED_DONE | - TRIGGER_DEFERRED_CANCELED))) + if (!(event->dte_event & TRIGGER_DEFERRED_CANCELED) || + (event->dte_event & TRIGGER_DEFERRED_DONE && + TransactionIdIsValid(event->dte_done_xid) && + !TransactionIdDidAbort(event->dte_done_xid))) { MemoryContextReset(per_tuple_context); @@ -1948,7 +2018,9 @@ deferredTriggerInvokeEvents(bool immediate_only) */ for (i = 0; i < event->dte_n_items; i++) { - if (event->dte_item[i].dti_state & TRIGGER_DEFERRED_DONE) + if (event->dte_item[i].dti_state & TRIGGER_DEFERRED_DONE && + TransactionIdIsValid(event->dte_item[i].dti_done_xid) && + !(TransactionIdDidAbort(event->dte_item[i].dti_done_xid))) continue; /* @@ -2003,6 +2075,7 @@ deferredTriggerInvokeEvents(bool immediate_only) per_tuple_context); event->dte_item[i].dti_state |= TRIGGER_DEFERRED_DONE; + event->dte_item[i].dti_done_xid = GetCurrentTransactionId(); } /* end loop over items within event */ } @@ -2022,23 +2095,27 @@ deferredTriggerInvokeEvents(bool immediate_only) } else { - /* Done */ - if (immediate_only) + /* + * We can drop an item if it's done, but only if we're not + * inside a subtransaction because it could abort later on. + * We will want to check the item again if it does. + */ + if (immediate_only && !IsSubTransaction()) { /* delink it from list and free it */ if (prev_event) prev_event->dte_next = next_event; else - deferredTriggers->deftrig_events = next_event; + deferredTriggers->events = next_event; pfree(event); } else { /* - * We will clean up later, but just for paranoia's sake, - * mark the event done. + * Mark the event done. */ event->dte_event |= TRIGGER_DEFERRED_DONE; + event->dte_done_xid = GetCurrentTransactionId(); } } @@ -2046,10 +2123,10 @@ deferredTriggerInvokeEvents(bool immediate_only) } /* Update list tail pointer in case we just deleted tail event */ - deferredTriggers->deftrig_event_tail = prev_event; + deferredTriggers->tail_thisxact = prev_event; /* Set the immediate event pointer for next time */ - deferredTriggers->deftrig_events_imm = prev_event; + deferredTriggers->events_imm = prev_event; /* Release working resources */ if (rel) @@ -2060,23 +2137,6 @@ deferredTriggerInvokeEvents(bool immediate_only) MemoryContextDelete(per_tuple_context); } - -/* ---------- - * DeferredTriggerInit() - * - * Initialize the deferred trigger mechanism. This is called during - * backend startup and is guaranteed to be before the first of all - * transactions. - * ---------- - */ -void -DeferredTriggerInit(void) -{ - /* Nothing to do */ - ; -} - - /* ---------- * DeferredTriggerBeginXact() * @@ -2087,34 +2147,24 @@ DeferredTriggerInit(void) void DeferredTriggerBeginXact(void) { - /* - * This will be changed to a special context when the nested - * transactions project moves forward. - */ - MemoryContext cxt = TopTransactionContext; - - deferredTriggers = (DeferredTriggers) MemoryContextAlloc(TopTransactionContext, - sizeof(DeferredTriggersData)); + Assert(deferredTriggers == NULL); - /* - * Create the per transaction memory context - */ - deferredTriggers->deftrig_cxt = AllocSetContextCreate(cxt, - "DeferredTriggerXact", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + deferredTriggers = (DeferredTriggers) + MemoryContextAlloc(TopTransactionContext, + sizeof(DeferredTriggersData)); /* * If unspecified, constraints default to IMMEDIATE, per SQL */ - deferredTriggers->deftrig_all_isdeferred = false; - deferredTriggers->deftrig_all_isset = false; - - deferredTriggers->deftrig_trigstates = NIL; - deferredTriggers->deftrig_events = NULL; - deferredTriggers->deftrig_events_imm = NULL; - deferredTriggers->deftrig_event_tail = NULL; + deferredTriggers->state = DeferredTriggerStateCreate(8); + deferredTriggers->events = NULL; + deferredTriggers->events_imm = NULL; + deferredTriggers->tail_thisxact = NULL; + deferredTriggers->tail_stack = NULL; + deferredTriggers->imm_stack = NULL; + deferredTriggers->state_stack = NULL; + deferredTriggers->numalloc = 0; + deferredTriggers->numpushed = 0; } @@ -2156,6 +2206,12 @@ DeferredTriggerEndXact(void) deferredTriggerInvokeEvents(false); + /* + * Forget everything we know about deferred triggers. + * + * Since all the info is in TopTransactionContext or children thereof, + * we need do nothing special to reclaim memory. + */ deferredTriggers = NULL; } @@ -2179,10 +2235,217 @@ DeferredTriggerAbortXact(void) /* * Forget everything we know about deferred triggers. + * + * Since all the info is in TopTransactionContext or children thereof, + * we need do nothing special to reclaim memory. */ deferredTriggers = NULL; } +/* + * DeferredTriggerBeginSubXact() + * + * Start a subtransaction. + */ +void +DeferredTriggerBeginSubXact(void) +{ + /* + * Ignore call if the transaction is in aborted state. + */ + if (deferredTriggers == NULL) + return; + + /* + * Allocate more space in the stacks if needed. + */ + if (deferredTriggers->numpushed == deferredTriggers->numalloc) + { + if (deferredTriggers->numalloc == 0) + { + MemoryContext old_cxt; + + old_cxt = MemoryContextSwitchTo(TopTransactionContext); + +#define DEFTRIG_INITALLOC 8 + deferredTriggers->tail_stack = (DeferredTriggerEvent *) + palloc(DEFTRIG_INITALLOC * sizeof(DeferredTriggerEvent)); + deferredTriggers->imm_stack = (DeferredTriggerEvent *) + palloc(DEFTRIG_INITALLOC * sizeof(DeferredTriggerEvent)); + deferredTriggers->state_stack = (DeferredTriggerState *) + palloc(DEFTRIG_INITALLOC * sizeof(DeferredTriggerState)); + deferredTriggers->numalloc = DEFTRIG_INITALLOC; + + MemoryContextSwitchTo(old_cxt); + } + else + { + /* repalloc will keep the stacks in the same context */ + deferredTriggers->numalloc *= 2; + + deferredTriggers->tail_stack = (DeferredTriggerEvent *) + repalloc(deferredTriggers->tail_stack, + deferredTriggers->numalloc * sizeof(DeferredTriggerEvent)); + deferredTriggers->imm_stack = (DeferredTriggerEvent *) + repalloc(deferredTriggers->imm_stack, + deferredTriggers->numalloc * sizeof(DeferredTriggerEvent)); + deferredTriggers->state_stack = (DeferredTriggerState *) + repalloc(deferredTriggers->state_stack, + deferredTriggers->numalloc * sizeof(DeferredTriggerState)); + } + } + + /* + * Push the current list position into the stack and reset the + * pointer. + */ + deferredTriggers->tail_stack[deferredTriggers->numpushed] = + deferredTriggers->tail_thisxact; + deferredTriggers->imm_stack[deferredTriggers->numpushed] = + deferredTriggers->events_imm; + /* State is not saved until/unless changed */ + deferredTriggers->state_stack[deferredTriggers->numpushed] = NULL; + + deferredTriggers->numpushed++; +} + +/* + * DeferredTriggerEndSubXact() + * + * The current subtransaction is ending. + */ +void +DeferredTriggerEndSubXact(bool isCommit) +{ + DeferredTriggerState state; + + /* + * Ignore call if the transaction is in aborted state. + */ + if (deferredTriggers == NULL) + return; + + /* + * Move back the "top of the stack." + */ + Assert(deferredTriggers->numpushed > 0); + + deferredTriggers->numpushed--; + + if (isCommit) + { + /* If we saved a prior state, we don't need it anymore */ + state = deferredTriggers->state_stack[deferredTriggers->numpushed]; + if (state != NULL) + pfree(state); + } + else + { + /* + * Aborting --- restore the pointers from the stacks. + */ + deferredTriggers->tail_thisxact = + deferredTriggers->tail_stack[deferredTriggers->numpushed]; + deferredTriggers->events_imm = + deferredTriggers->imm_stack[deferredTriggers->numpushed]; + + /* + * Cleanup the head and the tail of the list. + */ + if (deferredTriggers->tail_thisxact == NULL) + deferredTriggers->events = NULL; + else + deferredTriggers->tail_thisxact->dte_next = NULL; + + /* + * We don't need to free the items, since the CurTransactionContext + * will be reset shortly. + */ + + /* + * Restore the trigger state. If the saved state is NULL, then + * this subxact didn't save it, so it doesn't need restoring. + */ + state = deferredTriggers->state_stack[deferredTriggers->numpushed]; + if (state != NULL) + { + pfree(deferredTriggers->state); + deferredTriggers->state = state; + } + } +} + +/* + * Create an empty DeferredTriggerState with room for numalloc trigstates + */ +static DeferredTriggerState +DeferredTriggerStateCreate(int numalloc) +{ + DeferredTriggerState state; + + /* Behave sanely with numalloc == 0 */ + if (numalloc <= 0) + numalloc = 1; + + /* + * We assume that zeroing will correctly initialize the state values. + */ + state = (DeferredTriggerState) + MemoryContextAllocZero(TopTransactionContext, + sizeof(DeferredTriggerStateData) + + (numalloc - 1) * sizeof(DeferredTriggerStatusData)); + + state->numalloc = numalloc; + + return state; +} + +/* + * Copy a DeferredTriggerState + */ +static DeferredTriggerState +DeferredTriggerStateCopy(DeferredTriggerState origstate) +{ + DeferredTriggerState state; + + state = DeferredTriggerStateCreate(origstate->numstates); + + state->all_isset = origstate->all_isset; + state->all_isdeferred = origstate->all_isdeferred; + state->numstates = origstate->numstates; + memcpy(state->trigstates, origstate->trigstates, + origstate->numstates * sizeof(DeferredTriggerStatusData)); + + return state; +} + +/* + * Add a per-trigger item to a DeferredTriggerState. Returns possibly-changed + * pointer to the state object (it will change if we have to repalloc). + */ +static DeferredTriggerState +DeferredTriggerStateAddItem(DeferredTriggerState state, + Oid tgoid, bool tgisdeferred) +{ + if (state->numstates >= state->numalloc) + { + int newalloc = state->numalloc * 2; + + newalloc = Max(newalloc, 8); /* in case original has size 0 */ + state = (DeferredTriggerState) + repalloc(state, + sizeof(DeferredTriggerStateData) + + (newalloc - 1) * sizeof(DeferredTriggerStatusData)); + state->numalloc = newalloc; + Assert(state->numstates < state->numalloc); + } + + state->trigstates[state->numstates].dts_tgoid = tgoid; + state->trigstates[state->numstates].dts_tgisdeferred = tgisdeferred; + state->numstates++; + + return state; +} /* ---------- * DeferredTriggerSetState() @@ -2193,14 +2456,23 @@ DeferredTriggerAbortXact(void) void DeferredTriggerSetState(ConstraintsSetStmt *stmt) { - ListCell *l; - /* * Ignore call if we aren't in a transaction. */ if (deferredTriggers == NULL) return; + /* + * If in a subtransaction, and we didn't save the current state already, + * save it so it can be restored if the subtransaction aborts. + */ + if (deferredTriggers->numpushed > 0 && + deferredTriggers->state_stack[deferredTriggers->numpushed - 1] == NULL) + { + deferredTriggers->state_stack[deferredTriggers->numpushed - 1] = + DeferredTriggerStateCopy(deferredTriggers->state); + } + /* * Handle SET CONSTRAINTS ALL ... */ @@ -2210,23 +2482,19 @@ DeferredTriggerSetState(ConstraintsSetStmt *stmt) * Drop all per-transaction information about individual trigger * states. */ - list_free_deep(deferredTriggers->deftrig_trigstates); - deferredTriggers->deftrig_trigstates = NIL; + deferredTriggers->state->numstates = 0; /* * Set the per-transaction ALL state to known. */ - deferredTriggers->deftrig_all_isset = true; - deferredTriggers->deftrig_all_isdeferred = stmt->deferred; + deferredTriggers->state->all_isset = true; + deferredTriggers->state->all_isdeferred = stmt->deferred; } else { Relation tgrel; - MemoryContext oldcxt; - bool found; - DeferredTriggerStatus state; - ListCell *ls; - List *loid = NIL; + ListCell *l; + List *oidlist = NIL; /* ---------- * Handle SET CONSTRAINTS constraint-name [, ...] @@ -2241,6 +2509,7 @@ DeferredTriggerSetState(ConstraintsSetStmt *stmt) ScanKeyData skey; SysScanDesc tgscan; HeapTuple htup; + bool found; /* * Check that only named constraints are set explicitly @@ -2285,7 +2554,7 @@ DeferredTriggerSetState(ConstraintsSetStmt *stmt) cname))); constr_oid = HeapTupleGetOid(htup); - loid = lappend_oid(loid, constr_oid); + oidlist = lappend_oid(oidlist, constr_oid); found = true; } @@ -2305,34 +2574,28 @@ DeferredTriggerSetState(ConstraintsSetStmt *stmt) * Inside of a transaction block set the trigger states of * individual triggers on transaction level. */ - oldcxt = MemoryContextSwitchTo(deferredTriggers->deftrig_cxt); - - foreach(l, loid) + foreach(l, oidlist) { - found = false; - foreach(ls, deferredTriggers->deftrig_trigstates) + Oid tgoid = lfirst_oid(l); + bool found = false; + int i; + + for (i = 0; i < deferredTriggers->state->numstates; i++) { - state = (DeferredTriggerStatus) lfirst(ls); - if (state->dts_tgoid == lfirst_oid(l)) + if (deferredTriggers->state->trigstates[i].dts_tgoid == tgoid) { - state->dts_tgisdeferred = stmt->deferred; + deferredTriggers->state->trigstates[i].dts_tgisdeferred = stmt->deferred; found = true; break; } } if (!found) { - state = (DeferredTriggerStatus) - palloc(sizeof(DeferredTriggerStatusData)); - state->dts_tgoid = lfirst_oid(l); - state->dts_tgisdeferred = stmt->deferred; - - deferredTriggers->deftrig_trigstates = - lappend(deferredTriggers->deftrig_trigstates, state); + deferredTriggers->state = + DeferredTriggerStateAddItem(deferredTriggers->state, + tgoid, stmt->deferred); } } - - MemoryContextSwitchTo(oldcxt); } /* @@ -2347,14 +2610,14 @@ DeferredTriggerSetState(ConstraintsSetStmt *stmt) * entire list, in case some deferred events are now immediately * invokable. */ - deferredTriggers->deftrig_events_imm = NULL; + deferredTriggers->events_imm = NULL; } /* ---------- * DeferredTriggerSaveEvent() * - * Called by ExecAR...Triggers() to add the event to the queue. + * Called by ExecA[RS]...Triggers() to add the event to the queue. * * NOTE: should be called only if we've determined that an event must * be added to the queue. @@ -2423,9 +2686,10 @@ DeferredTriggerSaveEvent(ResultRelInfo *relinfo, int event, bool row_trigger, return; /* - * Create a new event + * Create a new event. We use the CurTransactionContext so the event + * will automatically go away if the subtransaction aborts. */ - oldcxt = MemoryContextSwitchTo(deferredTriggers->deftrig_cxt); + oldcxt = MemoryContextSwitchTo(CurTransactionContext); new_size = offsetof(DeferredTriggerEventData, dte_item[0]) + n_enabled_triggers * sizeof(DeferredTriggerEventItem); @@ -2433,6 +2697,7 @@ DeferredTriggerSaveEvent(ResultRelInfo *relinfo, int event, bool row_trigger, new_event = (DeferredTriggerEvent) palloc(new_size); new_event->dte_next = NULL; new_event->dte_event = event & TRIGGER_EVENT_OPMASK; + new_event->dte_done_xid = InvalidTransactionId; if (row_trigger) new_event->dte_event |= TRIGGER_EVENT_ROW; new_event->dte_relid = rel->rd_id; @@ -2449,6 +2714,7 @@ DeferredTriggerSaveEvent(ResultRelInfo *relinfo, int event, bool row_trigger, ev_item = &(new_event->dte_item[i]); ev_item->dti_tgoid = trigger->tgoid; + ev_item->dti_done_xid = InvalidTransactionId; ev_item->dti_state = ((trigger->tgdeferrable) ? TRIGGER_DEFERRED_DEFERRABLE : 0) | @@ -2517,6 +2783,7 @@ DeferredTriggerSaveEvent(ResultRelInfo *relinfo, int event, bool row_trigger, * the trigger at all. */ new_event->dte_item[i].dti_state |= TRIGGER_DEFERRED_DONE; + new_event->dte_item[i].dti_done_xid = GetCurrentTransactionId(); } } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 80a021487f9d108e157f61a786b2cee272de5312..c62bc6eaf1e2a75044ab67f1572e2dba92888330 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.281 2004/06/08 13:59:36 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.282 2004/07/01 00:50:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,6 +25,7 @@ #include "access/clog.h" #include "access/genam.h" #include "access/heapam.h" +#include "access/subtrans.h" #include "access/xlog.h" #include "catalog/catalog.h" #include "catalog/catname.h" @@ -798,8 +799,9 @@ vac_truncate_clog(TransactionId vacuumXID, TransactionId frozenXID) return; } - /* Truncate CLOG to the oldest vacuumxid */ + /* Truncate CLOG and SUBTRANS to the oldest vacuumxid */ TruncateCLOG(vacuumXID); + TruncateSUBTRANS(vacuumXID); /* Give warning about impending wraparound problems */ if (frozenAlreadyWrapped) diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c index 4a58419079ada18a80ce980b416e6e072ad4c4d9..dfa3f7121edb83c47b0c396d58cea09c86b2a9be 100644 --- a/src/backend/commands/variable.c +++ b/src/backend/commands/variable.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/variable.c,v 1.97 2004/05/26 04:41:13 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/variable.c,v 1.98 2004/07/01 00:50:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -470,10 +470,17 @@ show_timezone(void) const char * assign_XactIsoLevel(const char *value, bool doit, GucSource source) { - if (doit && source >= PGC_S_INTERACTIVE && SerializableSnapshot != NULL) + if (doit && source >= PGC_S_INTERACTIVE) + { + if (SerializableSnapshot != NULL) ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("SET TRANSACTION ISOLATION LEVEL must be called before any query"))); + if (IsSubTransaction()) + ereport(ERROR, + (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("SET TRANSACTION ISOLATION LEVEL must not be called in a subtransaction"))); + } if (strcmp(value, "serializable") == 0) { diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 91b633d9bd8400d6f9fd4a481b5e9c62111bae10..7534ddd793388818f06a848244f7709492242157 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/spi.c,v 1.118 2004/06/11 01:08:43 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/spi.c,v 1.119 2004/07/01 00:50:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,6 +29,7 @@ int SPI_result; static _SPI_connection *_SPI_stack = NULL; static _SPI_connection *_SPI_current = NULL; +static int _SPI_stack_depth = 0; /* allocated size of _SPI_stack */ static int _SPI_connected = -1; static int _SPI_curid = -1; @@ -59,7 +60,7 @@ static bool _SPI_checktuples(void); int SPI_connect(void) { - _SPI_connection *new_SPI_stack; + int newdepth; /* * When procedure called by Executor _SPI_curid expected to be equal @@ -70,39 +71,46 @@ SPI_connect(void) if (_SPI_stack == NULL) { - if (_SPI_connected != -1) + if (_SPI_connected != -1 || _SPI_stack_depth != 0) elog(ERROR, "SPI stack corrupted"); - new_SPI_stack = (_SPI_connection *) malloc(sizeof(_SPI_connection)); + newdepth = 16; + _SPI_stack = (_SPI_connection *) + MemoryContextAlloc(TopTransactionContext, + newdepth * sizeof(_SPI_connection)); + _SPI_stack_depth = newdepth; } else { - if (_SPI_connected < 0) + if (_SPI_stack_depth <= 0 || _SPI_stack_depth <= _SPI_connected) elog(ERROR, "SPI stack corrupted"); - new_SPI_stack = (_SPI_connection *) realloc(_SPI_stack, - (_SPI_connected + 2) * sizeof(_SPI_connection)); + if (_SPI_stack_depth == _SPI_connected + 1) + { + newdepth = _SPI_stack_depth * 2; + _SPI_stack = (_SPI_connection *) + repalloc(_SPI_stack, + newdepth * sizeof(_SPI_connection)); + _SPI_stack_depth = newdepth; + } } - if (new_SPI_stack == NULL) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - /* - * We' returning to procedure where _SPI_curid == _SPI_connected - 1 + * We're entering procedure where _SPI_curid == _SPI_connected - 1 */ - _SPI_stack = new_SPI_stack; _SPI_connected++; + Assert(_SPI_connected >= 0 && _SPI_connected < _SPI_stack_depth); _SPI_current = &(_SPI_stack[_SPI_connected]); _SPI_current->processed = 0; _SPI_current->tuptable = NULL; + _SPI_current->connectXid = GetCurrentTransactionId(); /* * Create memory contexts for this procedure * - * XXX it would be better to use PortalContext as the parent context, but - * we may not be inside a portal (consider deferred-trigger - * execution). + * XXX it would be better to use PortalContext as the parent context, + * but we may not be inside a portal (consider deferred-trigger + * execution). Perhaps CurTransactionContext would do? For now it + * doesn't matter because we clean up explicitly in AtEOSubXact_SPI(). */ _SPI_current->procCxt = AllocSetContextCreate(TopTransactionContext, "SPI Proc", @@ -152,28 +160,11 @@ SPI_finish(void) _SPI_connected--; _SPI_curid--; if (_SPI_connected == -1) - { - free(_SPI_stack); - _SPI_stack = NULL; _SPI_current = NULL; - } else - { - _SPI_connection *new_SPI_stack; - - new_SPI_stack = (_SPI_connection *) realloc(_SPI_stack, - (_SPI_connected + 1) * sizeof(_SPI_connection)); - /* This could only fail with a pretty stupid malloc package ... */ - if (new_SPI_stack == NULL) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - _SPI_stack = new_SPI_stack; _SPI_current = &(_SPI_stack[_SPI_connected]); - } return SPI_OK_FINISH; - } /* @@ -187,23 +178,54 @@ AtEOXact_SPI(bool isCommit) * freed automatically, so we can ignore them here. We just need to * restore our static variables to initial state. */ - if (_SPI_stack != NULL) - { - free(_SPI_stack); - if (isCommit) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("freeing non-empty SPI stack"), - errhint("Check for missing \"SPI_finish\" calls"))); - } + if (isCommit && _SPI_connected != -1) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("transaction left non-empty SPI stack"), + errhint("Check for missing \"SPI_finish\" calls"))); _SPI_current = _SPI_stack = NULL; + _SPI_stack_depth = 0; _SPI_connected = _SPI_curid = -1; SPI_processed = 0; SPI_lastoid = InvalidOid; SPI_tuptable = NULL; } +/* + * Clean up SPI state at subtransaction commit or abort. + * + * During commit, there shouldn't be any unclosed entries remaining from + * the current transaction; we throw them away if found. + */ +void +AtEOSubXact_SPI(bool isCommit, TransactionId childXid) +{ + bool found = false; + + while (_SPI_connected >= 0) + { + _SPI_connection *connection = &(_SPI_stack[_SPI_connected]); + int res; + + if (connection->connectXid != childXid) + break; /* couldn't be any underneath it either */ + + found = true; + + _SPI_curid = _SPI_connected - 1; /* avoid begin_call error */ + res = SPI_finish(); + Assert(res == SPI_OK_FINISH); + } + + if (found && isCommit) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("subtransaction left non-empty SPI stack"), + errhint("Check for missing \"SPI_finish\" calls"))); +} + + /* Pushes SPI stack to allow recursive SPI calls */ void SPI_push(void) @@ -1148,16 +1170,18 @@ _SPI_execute(const char *src, int tcount, _SPI_plan *plan) res = SPI_ERROR_CURSOR; goto fail; } - else if (IsA(queryTree->utilityStmt, TransactionStmt)) - { - res = SPI_ERROR_TRANSACTION; - goto fail; - } res = SPI_OK_UTILITY; if (plan == NULL) { ProcessUtility(queryTree->utilityStmt, dest, NULL); - CommandCounterIncrement(); + + if (IsA(queryTree->utilityStmt, TransactionStmt)) + { + CommitTransactionCommand(); + StartTransactionCommand(); + } + else + CommandCounterIncrement(); } } else if (plan == NULL) @@ -1273,7 +1297,14 @@ _SPI_execute_plan(_SPI_plan *plan, Datum *Values, const char *Nulls, { ProcessUtility(queryTree->utilityStmt, dest, NULL); res = SPI_OK_UTILITY; - CommandCounterIncrement(); + + if (IsA(queryTree->utilityStmt, TransactionStmt)) + { + CommitTransactionCommand(); + StartTransactionCommand(); + } + else + CommandCounterIncrement(); } else { diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 5616d0b3cd1911b3f65e5d98c97d98218ecc230f..dbd4f15cefdfd5cbcfeef6b1691a11b39475e393 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -13,7 +13,7 @@ * * Copyright (c) 2001-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.76 2004/06/26 16:32:02 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.77 2004/07/01 00:50:36 tgl Exp $ * ---------- */ #include "postgres.h" @@ -167,6 +167,7 @@ static void pgstat_write_statsfile(void); static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb, PgStat_StatBeEntry **betab, int *numbackends); +static void backend_read_statsfile(void); static void pgstat_setheader(PgStat_MsgHdr *hdr, int mtype); static void pgstat_send(void *msg, int len); @@ -786,12 +787,7 @@ pgstat_vacuum_tabstat(void) * If not done for this transaction, read the statistics collector * stats file into some hash tables. */ - if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId())) - { - pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId, - &pgStatBeTable, &pgStatNumBackends); - pgStatDBHashXact = GetCurrentTransactionId(); - } + backend_read_statsfile(); /* * Lookup our own database entry @@ -1210,15 +1206,9 @@ pgstat_fetch_stat_dbentry(Oid dbid) /* * If not done for this transaction, read the statistics collector - * stats file into some hash tables. Be careful with the - * read_statsfile() call below! + * stats file into some hash tables. */ - if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId())) - { - pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId, - &pgStatBeTable, &pgStatNumBackends); - pgStatDBHashXact = GetCurrentTransactionId(); - } + backend_read_statsfile(); /* * Lookup the requested database @@ -1250,15 +1240,9 @@ pgstat_fetch_stat_tabentry(Oid relid) /* * If not done for this transaction, read the statistics collector - * stats file into some hash tables. Be careful with the - * read_statsfile() call below! + * stats file into some hash tables. */ - if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId())) - { - pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId, - &pgStatBeTable, &pgStatNumBackends); - pgStatDBHashXact = GetCurrentTransactionId(); - } + backend_read_statsfile(); /* * Lookup our database. @@ -1296,12 +1280,7 @@ pgstat_fetch_stat_tabentry(Oid relid) PgStat_StatBeEntry * pgstat_fetch_stat_beentry(int beid) { - if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId())) - { - pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId, - &pgStatBeTable, &pgStatNumBackends); - pgStatDBHashXact = GetCurrentTransactionId(); - } + backend_read_statsfile(); if (beid < 1 || beid > pgStatNumBackends) return NULL; @@ -1320,12 +1299,7 @@ pgstat_fetch_stat_beentry(int beid) int pgstat_fetch_stat_numbackends(void) { - if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId())) - { - pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId, - &pgStatBeTable, &pgStatNumBackends); - pgStatDBHashXact = GetCurrentTransactionId(); - } + backend_read_statsfile(); return pgStatNumBackends; } @@ -2759,11 +2733,32 @@ pgstat_read_statsfile(HTAB **dbhash, Oid onlydb, fclose(fpin); } +/* + * If not done for this transaction, read the statistics collector + * stats file into some hash tables. + * + * Because we store the hash tables in TopTransactionContext, the result + * is good for the entire current main transaction. + */ +static void +backend_read_statsfile(void) +{ + TransactionId topXid = GetTopTransactionId(); + + if (!TransactionIdEquals(pgStatDBHashXact, topXid)) + { + Assert(!pgStatRunningInCollector); + pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId, + &pgStatBeTable, &pgStatNumBackends); + pgStatDBHashXact = topXid; + } +} + /* ---------- * pgstat_recv_bestart() - * - * Process a backend starup message. + * Process a backend startup message. * ---------- */ static void diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 725b79cad388786a6c46fffcafa468f54ea06ab0..4a9ddc32432cc82e36c6fa21a4c0afc716d548ae 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.171 2004/06/18 06:13:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.172 2004/07/01 00:50:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,6 +45,7 @@ #include "storage/bufpage.h" #include "storage/proc.h" #include "storage/smgr.h" +#include "utils/memutils.h" #include "utils/relcache.h" #include "pgstat.h" @@ -64,9 +65,13 @@ long NDirectFileRead; /* some I/O's are direct file access. * bypass bufmgr */ long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */ +/* List of upper-level-transaction buffer refcount arrays */ +static List *upperRefCounts = NIL; + static void PinBuffer(BufferDesc *buf); static void UnpinBuffer(BufferDesc *buf); +static void BufferFixLeak(Buffer bufnum, int32 shouldBe, bool emitWarning); static void WaitIO(BufferDesc *buf); static void StartBufferIO(BufferDesc *buf, bool forInput); static void TerminateBufferIO(BufferDesc *buf, int err_flag); @@ -826,30 +831,104 @@ AtEOXact_Buffers(bool isCommit) for (i = 0; i < NBuffers; i++) { if (PrivateRefCount[i] != 0) - { - BufferDesc *buf = &(BufferDescriptors[i]); - - if (isCommit) - elog(WARNING, - "buffer refcount leak: [%03d] " - "(rel=%u/%u/%u, blockNum=%u, flags=0x%x, refcount=%u %d)", - i, - buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, - buf->tag.rnode.relNode, - buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); - - PrivateRefCount[i] = 1; /* make sure we release shared pin */ - LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); - UnpinBuffer(buf); - LWLockRelease(BufMgrLock); - Assert(PrivateRefCount[i] == 0); - } + BufferFixLeak(i, 0, isCommit); } AtEOXact_LocalBuffers(isCommit); } +/* + * During subtransaction start, save buffer reference counts. + */ +void +AtSubStart_Buffers(void) +{ + int32 *copyRefCounts; + Size rcSize; + MemoryContext old_cxt; + + /* this is probably the active context already, but be safe */ + old_cxt = MemoryContextSwitchTo(CurTransactionContext); + + /* + * We need to copy the current state of PrivateRefCount[]. In the typical + * scenario, few if any of the entries will be nonzero, and we could save + * space by storing only the nonzero ones. However, copying the whole + * thing is lots simpler and faster both here and in AtEOSubXact_Buffers, + * so it seems best to waste the space. + */ + rcSize = NBuffers * sizeof(int32); + copyRefCounts = (int32 *) palloc(rcSize); + memcpy(copyRefCounts, PrivateRefCount, rcSize); + + /* Attach to list */ + upperRefCounts = lcons(copyRefCounts, upperRefCounts); + + MemoryContextSwitchTo(old_cxt); +} + +/* + * AtEOSubXact_Buffers + * + * At subtransaction end, we restore the saved counts. If committing, we + * complain if the refcounts don't match; if aborting, just restore silently. + */ +void +AtEOSubXact_Buffers(bool isCommit) +{ + int32 *oldRefCounts; + int i; + + oldRefCounts = (int32 *) linitial(upperRefCounts); + upperRefCounts = list_delete_first(upperRefCounts); + + for (i = 0; i < NBuffers; i++) + { + if (PrivateRefCount[i] != oldRefCounts[i]) + BufferFixLeak(i, oldRefCounts[i], isCommit); + } + + pfree(oldRefCounts); +} + +/* + * Fix a buffer refcount leak. + * + * The caller does not hold the BufMgrLock. + */ +static void +BufferFixLeak(Buffer bufnum, int32 shouldBe, bool emitWarning) +{ + BufferDesc *buf = &(BufferDescriptors[bufnum]); + + if (emitWarning) + elog(WARNING, + "buffer refcount leak: [%03d] (rel=%u/%u/%u, blockNum=%u, flags=0x%x, refcount=%u %d, should be=%d)", + bufnum, + buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, + buf->tag.rnode.relNode, + buf->tag.blockNum, buf->flags, + buf->refcount, PrivateRefCount[bufnum], shouldBe); + + /* If it's less, we're in a heap o' trouble */ + if (PrivateRefCount[bufnum] <= shouldBe) + elog(FATAL, "buffer refcount was decreased by subtransaction"); + + if (shouldBe > 0) + { + /* We still keep the shared-memory pin */ + PrivateRefCount[bufnum] = shouldBe; + } + else + { + PrivateRefCount[bufnum] = 1; /* make sure we release shared pin */ + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); + UnpinBuffer(buf); + LWLockRelease(BufMgrLock); + Assert(PrivateRefCount[bufnum] == 0); + } +} + /* * FlushBufferPool * diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 69c460306b4199d51647abd2d3f409b95f2efd24..4c759db9d8e4663b97ceca6fcb61e0443686b2ad 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -8,16 +8,16 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.68 2004/05/29 22:48:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.69 2004/07/01 00:50:52 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" - -#include "miscadmin.h" #include "access/clog.h" +#include "access/subtrans.h" #include "access/xlog.h" +#include "miscadmin.h" #include "postmaster/bgwriter.h" #include "storage/bufmgr.h" #include "storage/freespace.h" @@ -70,6 +70,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, size += LockShmemSize(maxBackends); size += XLOGShmemSize(); size += CLOGShmemSize(); + size += SUBTRANSShmemSize(); size += LWLockShmemSize(); size += SInvalShmemSize(maxBackends); size += FreeSpaceShmemSize(); @@ -133,6 +134,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, */ XLOGShmemInit(); CLOGShmemInit(); + SUBTRANSShmemInit(); InitBufferPool(); /* diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index 856d0f0a73f2b65218a69212d33e945b79f079f9..bf4eb0f6293b60d80ab7a1c7b1edea000e1d6167 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/sinval.c,v 1.64 2004/06/02 21:29:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/sinval.c,v 1.65 2004/07/01 00:50:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,8 @@ #include <signal.h> +#include "access/subtrans.h" +#include "access/transam.h" #include "commands/async.h" #include "storage/ipc.h" #include "storage/proc.h" @@ -428,20 +430,40 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself) /* * TransactionIdIsInProgress -- is given transaction running by some backend + * + * There are three possibilities for finding a running transaction: + * + * 1. the given Xid is a main transaction Id. We will find this out cheaply + * by looking at the PGPROC struct for each backend. + * + * 2. the given Xid is one of the cached subxact Xids in the PGPROC array. + * We can find this out cheaply too. + * + * 3. Search the SubTrans tree. This is the slowest, but sadly it has to be + * done always if the other two failed. + * + * SInvalLock has to be held while we do 1 and 2. If we save all the Xids + * while doing 1, we can release the SInvalLock while we do 3. This buys back + * some concurrency (we can't retrieve the main Xids from PGPROC again anyway, + * see GetNewTransactionId) */ bool TransactionIdIsInProgress(TransactionId xid) { - bool result = false; - SISeg *segP = shmInvalBuffer; - ProcState *stateP = segP->procState; - int index; + bool result = false; + SISeg *segP = shmInvalBuffer; + ProcState *stateP = segP->procState; + int i; + int nxids = 0; + TransactionId *xids; + + xids = (TransactionId *)palloc(sizeof(TransactionId) * segP->maxBackends); LWLockAcquire(SInvalLock, LW_SHARED); - for (index = 0; index < segP->lastBackend; index++) + for (i = 0; i < segP->lastBackend; i++) { - SHMEM_OFFSET pOffset = stateP[index].procStruct; + SHMEM_OFFSET pOffset = stateP[i].procStruct; if (pOffset != INVALID_OFFSET) { @@ -450,16 +472,71 @@ TransactionIdIsInProgress(TransactionId xid) /* Fetch xid just once - see GetNewTransactionId */ TransactionId pxid = proc->xid; + /* + * check the main Xid (step 1 above) + */ if (TransactionIdEquals(pxid, xid)) { result = true; break; } + + /* + * save the main Xid for step 3. + */ + xids[nxids++] = pxid; + +#ifdef NOT_USED + FIXME -- waiting to save the Xids in PGPROC ... + + /* + * check the saved Xids array (step 2) + */ + for (j = 0; j < PGPROC_MAX_SAVED_XIDS; j++) + { + pxid = proc->savedxids[j]; + + if (!TransactionIdIsValid(pxids)) + break; + + if (TransactionIdEquals(pxid, xid)) + { + result = true; + break; + } + } +#endif + + if (result) + break; + } } LWLockRelease(SInvalLock); + /* + * Step 3: have to check pg_subtrans. Use the saved Xids. + * + * XXX Could save the cached Xids too for further improvement. + */ + if (!result) + { + /* this is a potentially expensive call. */ + xid = SubTransGetTopmostTransaction(xid); + + Assert(TransactionIdIsValid(xid)); + + /* + * We don't care if it aborted, because if it did, we won't find + * it in the array. + */ + + for (i = 0; i < nxids; i++) + if (TransactionIdEquals(xids[i], xid)) + return true; + } + return result; } @@ -596,7 +673,7 @@ GetSnapshotData(Snapshot snapshot, bool serializable) * This does open a possibility for avoiding repeated malloc/free: * since MaxBackends does not change at runtime, we can simply reuse * the previous xip array if any. (This relies on the fact that all - * calls pass static SnapshotData structs.) + * callers pass static SnapshotData structs.) */ if (snapshot->xip == NULL) { diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index e4e52b16abf33b283c1be267d5b93d84017a90ee..45305b4dea2f90c469dbbc7390e69429d76fd5fc 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -8,13 +8,14 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.63 2004/05/28 05:13:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.64 2004/07/01 00:50:59 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/subtrans.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -333,19 +334,21 @@ XactLockTableInsert(TransactionId xid) * XactLockTableWait * * Wait for the specified transaction to commit or abort. + * We actually wait on the topmost transaction of the transaction tree. */ void XactLockTableWait(TransactionId xid) { LOCKTAG tag; TransactionId myxid = GetCurrentTransactionId(); + TransactionId waitXid = SubTransGetTopmostTransaction(xid); - Assert(!TransactionIdEquals(xid, myxid)); + Assert(!SubTransXidsHaveCommonAncestor(waitXid, myxid)); MemSet(&tag, 0, sizeof(tag)); tag.relId = XactLockTableId; tag.dbId = InvalidOid; - tag.objId.xid = xid; + tag.objId.xid = waitXid; if (!LockAcquire(LockTableId, &tag, myxid, ShareLock, false)) @@ -355,8 +358,13 @@ XactLockTableWait(TransactionId xid) /* * Transaction was committed/aborted/crashed - we have to update - * pg_clog if transaction is still marked as running. + * pg_clog if transaction is still marked as running. If it's a + * subtransaction, we can update the parent status too. */ - if (!TransactionIdDidCommit(xid) && !TransactionIdDidAbort(xid)) - TransactionIdAbort(xid); + if (!TransactionIdDidCommit(waitXid) && !TransactionIdDidAbort(waitXid)) + { + TransactionIdAbort(waitXid); + if (waitXid != xid) + TransactionIdAbort(xid); + } } diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index c04f3b5c88a9042e5334f534e3d5bde8d14f24b2..6b7f43440e66f8de414229ac2d0ba83dc02f3daf 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.133 2004/06/05 19:48:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.134 2004/07/01 00:50:59 tgl Exp $ * * NOTES * Outside modules can create a lock table and acquire/release @@ -23,7 +23,7 @@ * Interface: * * LockAcquire(), LockRelease(), LockMethodTableInit(), - * LockMethodTableRename(), LockReleaseAll, + * LockMethodTableRename(), LockReleaseAll(), * LockCheckConflicts(), GrantLock() * *------------------------------------------------------------------------- @@ -1129,19 +1129,25 @@ LockRelease(LOCKMETHODID lockmethodid, LOCKTAG *locktag, } /* - * LockReleaseAll -- Release all locks in a process's lock list. + * LockReleaseAll -- Release all locks of the specified lock method that + * are held by the specified process. * - * Well, not really *all* locks. + * Well, not necessarily *all* locks. The available behaviors are: * - * If 'allxids' is TRUE, all locks of the specified lock method are - * released, regardless of transaction affiliation. + * which == ReleaseAll: release all locks regardless of transaction + * affiliation. * - * If 'allxids' is FALSE, all locks of the specified lock method and - * specified XID are released. + * which == ReleaseAllExceptSession: release all locks with Xid != 0 + * (zero is the Xid used for "session" locks). + * + * which == ReleaseGivenXids: release only locks whose Xids appear in + * the xids[] array (of length nxids). + * + * xids/nxids are ignored when which != ReleaseGivenXids. */ bool LockReleaseAll(LOCKMETHODID lockmethodid, PGPROC *proc, - bool allxids, TransactionId xid) + LockReleaseWhich which, int nxids, TransactionId *xids) { SHM_QUEUE *procHolders = &(proc->procHolders); PROCLOCK *proclock; @@ -1190,8 +1196,25 @@ LockReleaseAll(LOCKMETHODID lockmethodid, PGPROC *proc, if (LOCK_LOCKMETHOD(*lock) != lockmethodid) goto next_item; - /* If not allxids, ignore items that are of the wrong xid */ - if (!allxids && !TransactionIdEquals(xid, proclock->tag.xid)) + if (which == ReleaseGivenXids) + { + /* Ignore locks with an Xid not in the list */ + bool release = false; + + for (i = 0; i < nxids; i++) + { + if (TransactionIdEquals(proclock->tag.xid, xids[i])) + { + release = true; + break; + } + } + if (!release) + goto next_item; + } + /* Ignore locks with Xid=0 unless we are asked to release All locks */ + else if (TransactionIdEquals(proclock->tag.xid, InvalidTransactionId) + && which != ReleaseAll) goto next_item; PROCLOCK_PRINT("LockReleaseAll", proclock); diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index d1410d04a05a4d0ace6875f2e1a002a31f01f358..e48531c10accdcb93cf68b85c59fb6a670d291b3 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -15,13 +15,14 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.20 2004/06/11 16:43:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.21 2004/07/01 00:50:59 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/clog.h" +#include "access/subtrans.h" #include "storage/lwlock.h" #include "storage/proc.h" #include "storage/spin.h" @@ -111,6 +112,9 @@ NumLWLocks(void) /* clog.c needs one per CLOG buffer + one control lock */ numLocks += NUM_CLOG_BUFFERS + 1; + /* subtrans.c needs one per SubTrans buffer + one control lock */ + numLocks += NUM_SUBTRANS_BUFFERS + 1; + /* Perhaps create a few more for use by user-defined modules? */ return numLocks; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index dbf5b414153f01238234c0bbfba1e6e7947c01b2..abe44e808adde8f64439a85a12939b246e502665 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.148 2004/05/29 22:48:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.149 2004/07/01 00:50:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -380,26 +380,34 @@ LockWaitCancel(void) /* * ProcReleaseLocks() -- release locks associated with current transaction - * at transaction commit or abort + * at main transaction and subtransaction commit or abort * - * At commit, we release only locks tagged with the current transaction's XID, - * leaving those marked with XID 0 (ie, session locks) undisturbed. At abort, - * we release all locks including XID 0, because we need to clean up after - * a failure. This logic will need extension if we ever support nested - * transactions. + * The options for which locks to release are the same as for the underlying + * LockReleaseAll() function. * - * Note that user locks are not released in either case. + * Notes: + * + * At main transaction commit, we release all locks except session locks. + * At main transaction abort, we release all locks including session locks; + * this lets us clean up after a VACUUM FULL failure. + * + * At subtransaction commit, we don't release any locks (so this func is not + * called at all); we will defer the releasing to the parent transaction. + * At subtransaction abort, we release all locks held by the subtransaction; + * this is implemented by passing in the Xids of the failed subxact and its + * children in the xids[] array. + * + * Note that user locks are not released in any case. */ void -ProcReleaseLocks(bool isCommit) +ProcReleaseLocks(LockReleaseWhich which, int nxids, TransactionId *xids) { if (!MyProc) return; /* If waiting, get off wait queue (should only be needed after error) */ LockWaitCancel(); /* Release locks */ - LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, - !isCommit, GetCurrentTransactionId()); + LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, which, nxids, xids); } @@ -432,11 +440,11 @@ ProcKill(int code, Datum arg) LockWaitCancel(); /* Remove from the standard lock table */ - LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, true, InvalidTransactionId); + LockReleaseAll(DEFAULT_LOCKMETHOD, MyProc, ReleaseAll, 0, NULL); #ifdef USER_LOCKS /* Remove from the user lock table */ - LockReleaseAll(USER_LOCKMETHOD, MyProc, true, InvalidTransactionId); + LockReleaseAll(USER_LOCKMETHOD, MyProc, ReleaseAll, 0, NULL); #endif SpinLockAcquire(ProcStructLock); diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index c7783d878f258de072df54996c60e5f57b378fcc..5c53d48f83831057d0d08b07137366438065aa7f 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.74 2004/06/18 06:13:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.75 2004/07/01 00:51:07 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -80,9 +80,10 @@ static HTAB *SMgrRelationHash = NULL; * executed immediately, but is just entered in the list. When and if * the transaction commits, we can delete the physical file. * - * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear - * unbetimes. It'd probably be OK to keep it in TopTransactionContext, - * but I'm being paranoid. + * The list is kept in CurTransactionContext. In subtransactions, each + * subtransaction has its own list in its own CurTransactionContext, but + * successful subtransactions attach their lists to their parent's list. + * Failed subtransactions can immediately execute the abort-time actions. */ typedef struct PendingRelDelete @@ -91,10 +92,11 @@ typedef struct PendingRelDelete int which; /* which storage manager? */ bool isTemp; /* is it a temporary relation? */ bool atCommit; /* T=delete at commit; F=delete at abort */ - struct PendingRelDelete *next; /* linked-list link */ } PendingRelDelete; -static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ +static List *pendingDeletes = NIL; /* head of linked list */ + +static List *upperPendingDeletes = NIL; /* list of upper-xact lists */ /* @@ -305,6 +307,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) XLogRecData rdata; xl_smgr_create xlrec; PendingRelDelete *pending; + MemoryContext old_cxt; if (! (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo)) ereport(ERROR, @@ -332,14 +335,17 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLOG_NO_TRAN, &rdata); /* Add the relation to the list of stuff to delete at abort */ - pending = (PendingRelDelete *) - MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); + old_cxt = MemoryContextSwitchTo(CurTransactionContext); + + pending = (PendingRelDelete *) palloc(sizeof(PendingRelDelete)); pending->relnode = reln->smgr_rnode; pending->which = reln->smgr_which; pending->isTemp = isTemp; pending->atCommit = false; /* delete if abort */ - pending->next = pendingDeletes; - pendingDeletes = pending; + + pendingDeletes = lcons(pending, pendingDeletes); + + MemoryContextSwitchTo(old_cxt); } /* @@ -354,16 +360,20 @@ void smgrscheduleunlink(SMgrRelation reln, bool isTemp) { PendingRelDelete *pending; + MemoryContext old_cxt; /* Add the relation to the list of stuff to delete at commit */ - pending = (PendingRelDelete *) - MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); + old_cxt = MemoryContextSwitchTo(CurTransactionContext); + + pending = (PendingRelDelete *) palloc(sizeof(PendingRelDelete)); pending->relnode = reln->smgr_rnode; pending->which = reln->smgr_which; pending->isTemp = isTemp; pending->atCommit = true; /* delete if commit */ - pending->next = pendingDeletes; - pendingDeletes = pending; + + pendingDeletes = lcons(pending, pendingDeletes); + + MemoryContextSwitchTo(old_cxt); /* * NOTE: if the relation was created in this transaction, it will now @@ -627,18 +637,21 @@ smgrimmedsync(SMgrRelation reln) void smgrDoPendingDeletes(bool isCommit) { - while (pendingDeletes != NULL) + ListCell *p; + + foreach(p, pendingDeletes) { - PendingRelDelete *pending = pendingDeletes; + PendingRelDelete *pending = lfirst(p); - pendingDeletes = pending->next; if (pending->atCommit == isCommit) smgr_internal_unlink(pending->relnode, pending->which, pending->isTemp, false); - pfree(pending); } + + /* We needn't free the cells since they are in CurTransactionContext */ + pendingDeletes = NIL; } /* @@ -647,17 +660,22 @@ smgrDoPendingDeletes(bool isCommit) * The return value is the number of relations scheduled for termination. * *ptr is set to point to a freshly-palloc'd array of RelFileNodes. * If there are no relations to be deleted, *ptr is set to NULL. + * + * Note that the list does not include anything scheduled for termination + * by upper-level transactions. */ int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr) { int nrels; RelFileNode *rptr; - PendingRelDelete *pending; + ListCell *p; nrels = 0; - for (pending = pendingDeletes; pending != NULL; pending = pending->next) + foreach(p, pendingDeletes) { + PendingRelDelete *pending = lfirst(p); + if (pending->atCommit == forCommit) nrels++; } @@ -668,14 +686,69 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr) } rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode)); *ptr = rptr; - for (pending = pendingDeletes; pending != NULL; pending = pending->next) + foreach(p, pendingDeletes) { + PendingRelDelete *pending = lfirst(p); + if (pending->atCommit == forCommit) *rptr++ = pending->relnode; } return nrels; } +/* + * AtSubStart_smgr() --- Take care of subtransaction start. + * + * Push empty state for the new subtransaction. + */ +void +AtSubStart_smgr(void) +{ + MemoryContext old_cxt; + + /* Keep the list-of-lists in TopTransactionContext for simplicity */ + old_cxt = MemoryContextSwitchTo(TopTransactionContext); + + upperPendingDeletes = lcons(pendingDeletes, upperPendingDeletes); + + pendingDeletes = NIL; + + MemoryContextSwitchTo(old_cxt); +} + +/* + * AtSubCommit_smgr() --- Take care of subtransaction commit. + * + * Reassign all items in the pending deletes list to the parent transaction. + */ +void +AtSubCommit_smgr(void) +{ + List *parentPendingDeletes; + + parentPendingDeletes = (List *) linitial(upperPendingDeletes); + upperPendingDeletes = list_delete_first(upperPendingDeletes); + + pendingDeletes = list_concat(parentPendingDeletes, pendingDeletes); +} + +/* + * AtSubAbort_smgr() --- Take care of subtransaction abort. + * + * Delete created relations and forget about deleted relations. + * We can execute these operations immediately because we know this + * subtransaction will not commit. + */ +void +AtSubAbort_smgr(void) +{ + smgrDoPendingDeletes(false); + + /* Must pop the stack, too */ + pendingDeletes = (List *) linitial(upperPendingDeletes); + upperPendingDeletes = list_delete_first(upperPendingDeletes); +} + /* * smgrcommit() -- Prepare to commit changes made during the current * transaction. diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 9b7cfcd66814cce67dd5fefc8a15521ca7275bc5..c42bd6c7bfbb3c45be617965035928cc8033fc58 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.421 2004/06/24 21:03:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.422 2004/07/01 00:51:11 tgl Exp $ * * NOTES * this is the "main" module of the postgres backend and @@ -841,6 +841,7 @@ exec_simple_query(const char *query_string) TransactionStmt *stmt = (TransactionStmt *) parsetree; if (stmt->kind == TRANS_STMT_COMMIT || + stmt->kind == TRANS_STMT_BEGIN || stmt->kind == TRANS_STMT_ROLLBACK) allowit = true; } @@ -1161,6 +1162,7 @@ exec_parse_message(const char *query_string, /* string to execute */ TransactionStmt *stmt = (TransactionStmt *) parsetree; if (stmt->kind == TRANS_STMT_COMMIT || + stmt->kind == TRANS_STMT_BEGIN || stmt->kind == TRANS_STMT_ROLLBACK) allowit = true; } @@ -1623,6 +1625,7 @@ exec_execute_message(const char *portal_name, long max_rows) is_trans_stmt = true; if (stmt->kind == TRANS_STMT_COMMIT || + stmt->kind == TRANS_STMT_BEGIN || stmt->kind == TRANS_STMT_ROLLBACK) is_trans_exit = true; } diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 5e91a7283ecc0ae100a1f1da7d4124e5e463557d..8bfa3610bdb96e0b0d6246b63dd6f01e81b9aaef 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/catcache.c,v 1.112 2004/05/26 04:41:40 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/catcache.c,v 1.113 2004/07/01 00:51:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -360,6 +360,8 @@ CatCacheRemoveCTup(CatCache *cache, CatCTup *ct) /* free associated tuple data */ if (ct->tuple.t_data != NULL) pfree(ct->tuple.t_data); + if (ct->prev_refcount != NULL) + pfree(ct->prev_refcount); pfree(ct); --cache->cc_ntup; @@ -394,6 +396,8 @@ CatCacheRemoveCList(CatCache *cache, CatCList *cl) /* free associated tuple data */ if (cl->tuple.t_data != NULL) pfree(cl->tuple.t_data); + if (cl->prev_refcount != NULL) + pfree(cl->prev_refcount); pfree(cl); } @@ -518,9 +522,9 @@ CreateCacheMemoryContext(void) if (!CacheMemoryContext) CacheMemoryContext = AllocSetContextCreate(TopMemoryContext, "CacheMemoryContext", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); } @@ -560,6 +564,13 @@ AtEOXact_CatCache(bool isCommit) cl->refcount = 0; } + /* + * Reset the refcount stack. Drop the item count to zero, + * but don't deallocate the stack itself, so it can be used by + * future subtransactions. + */ + cl->numpushes = 0; + /* Clean up any now-deletable dead entries */ if (cl->dead) CatCacheRemoveCList(ccp, cl); @@ -585,12 +596,174 @@ AtEOXact_CatCache(bool isCommit) ct->refcount = 0; } + /* + * Reset the refcount stack. Drop the item count to zero, + * but don't deallocate the stack itself, so it can be used by + * future subtransactions. + */ + ct->numpushes = 0; + /* Clean up any now-deletable dead entries */ if (ct->dead) CatCacheRemoveCTup(ct->my_cache, ct); } } +/* + * AtSubStart_CatCache + * + * Saves reference counts of each entry at subtransaction start so they + * can be restored if the subtransaction later aborts. + */ +void +AtSubStart_CatCache(void) +{ + CatCache *ccp; + Dlelem *elt, + *nextelt; + MemoryContext old_cxt; + + + old_cxt = MemoryContextSwitchTo(CacheMemoryContext); + + /* + * Prepare CLists + */ + for (ccp = CacheHdr->ch_caches; ccp; ccp = ccp->cc_next) + { + for (elt = DLGetHead(&ccp->cc_lists); elt; elt = nextelt) + { + CatCList *cl = (CatCList *) DLE_VAL(elt); + + nextelt = DLGetSucc(elt); + + if (cl->numpushes == cl->numalloc) + { + if (cl->numalloc == 0) + { + cl->numalloc = 8; + cl->prev_refcount = palloc(sizeof(int) * cl->numalloc); + } + else + { + cl->numalloc *= 2; + cl->prev_refcount = repalloc(cl->prev_refcount, cl->numalloc * sizeof(int)); + } + } + + cl->prev_refcount[cl->numpushes++] = cl->refcount; + } + } + + /* + * Prepare CTuples + */ + for (elt = DLGetHead(&CacheHdr->ch_lrulist); elt; elt = nextelt) + { + CatCTup *ct = (CatCTup *) DLE_VAL(elt); + + nextelt = DLGetSucc(elt); + + if (ct->numpushes == ct->numalloc) + { + if (ct->numalloc == 0) + { + ct->numalloc = 8; + ct->prev_refcount = palloc(sizeof(int) * ct->numalloc); + } + else + { + ct->numalloc *= 2; + ct->prev_refcount = repalloc(ct->prev_refcount, sizeof(int) * ct->numalloc); + } + } + + ct->prev_refcount[ct->numpushes++] = ct->refcount; + } + + MemoryContextSwitchTo(old_cxt); +} + +void +AtEOSubXact_CatCache(bool isCommit) +{ + CatCache *ccp; + Dlelem *elt, + *nextelt; + + /* + * Restore CLists + */ + for (ccp = CacheHdr->ch_caches; ccp; ccp = ccp->cc_next) + { + for (elt = DLGetHead(&ccp->cc_lists); elt; elt = nextelt) + { + CatCList *cl = (CatCList *) DLE_VAL(elt); + + nextelt = DLGetSucc(elt); + + /* + * During commit, check whether the count is what + * we expect. + */ + if (isCommit) + { + int expected_refcount; + if (cl->numpushes > 0) + expected_refcount = cl->prev_refcount[cl->numpushes - 1]; + else + expected_refcount = 0; + + if (cl->refcount != expected_refcount) + elog(WARNING, "catcache reference leak"); + } + + /* + * During abort we have to restore the original count; + * during commit, we have to restore in case of a leak, + * and it won't harm if this is the expected count. + */ + if (cl->numpushes > 0) + cl->refcount = cl->prev_refcount[--cl->numpushes]; + else + cl->refcount = 0; + } + } + + /* + * Prepare CTuples + */ + for (elt = DLGetHead(&CacheHdr->ch_lrulist); elt; elt = nextelt) + { + CatCTup *ct = (CatCTup *) DLE_VAL(elt); + + nextelt = DLGetSucc(elt); + + if (isCommit) + { + int expected_refcount; + + if (ct->numpushes > 0) + expected_refcount = ct->prev_refcount[ct->numpushes - 1]; + else + expected_refcount = 0; + + if (ct->refcount != expected_refcount) + elog(WARNING, "catcache reference leak"); + } + + /* + * During abort we have to restore the original count; + * during commit, we have to restore in case of a leak, + * and it won't harm if this is the expected count. + */ + if (ct->numpushes > 0) + ct->refcount = ct->prev_refcount[--ct->numpushes]; + else + ct->refcount = 0; + } +} + /* * ResetCatalogCache * @@ -1505,6 +1678,9 @@ SearchCatCacheList(CatCache *cache, cl->my_cache = cache; DLInitElem(&cl->cache_elem, (void *) cl); cl->refcount = 1; /* count this first reference */ + cl->prev_refcount = NULL; + cl->numpushes = 0; + cl->numalloc = 0; cl->dead = false; cl->ordered = ordered; cl->nkeys = nkeys; @@ -1603,6 +1779,9 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, ct->dead = false; ct->negative = negative; ct->hash_value = hashValue; + ct->prev_refcount = NULL; + ct->numpushes = 0; + ct->numalloc = 0; DLAddHead(&CacheHdr->ch_lrulist, &ct->lrulist_elem); DLAddHead(&cache->cc_bucket[hashIndex], &ct->cache_elem); diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index ea958a27b46be097fc8ae701593bc3659207897e..e54a74fae4bea1028a90e94383b1868dcf838482 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -33,6 +33,10 @@ * to record the transaction commit before sending SI messages, otherwise * the other backends won't see our updated tuples as good. * + * When a subtransaction aborts, we can process and discard any events + * it has queued. When a subtransaction commits, we just add its events + * to the pending lists of the parent transaction. + * * In short, we need to remember until xact end every insert or delete * of a tuple that might be in the system caches. Updates are treated as * two events, delete + insert, for simplicity. (There are cases where @@ -66,15 +70,17 @@ * manipulating the init file is in relcache.c, but we keep track of the * need for it here. * - * All the request lists are kept in TopTransactionContext memory, since - * they need not live beyond the end of the current transaction. + * The request lists proper are kept in CurTransactionContext of their + * creating (sub)transaction, since they can be forgotten on abort of that + * transaction but must be kept till top-level commit otherwise. For + * simplicity we keep the controlling list-of-lists in TopTransactionContext. * * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.62 2004/06/18 06:13:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.63 2004/07/01 00:51:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -95,7 +101,7 @@ * To minimize palloc traffic, we keep pending requests in successively- * larger chunks (a slightly more sophisticated version of an expansible * array). All request types can be stored as SharedInvalidationMessage - * records. + * records. The ordering of requests within a list is never significant. */ typedef struct InvalidationChunk { @@ -112,12 +118,15 @@ typedef struct InvalidationListHeader } InvalidationListHeader; /*---------------- - * Invalidation info is divided into two lists: + * Invalidation info is divided into two lists: * 1) events so far in current command, not yet reflected to caches. * 2) events in previous commands of current transaction; these have * been reflected to local caches, and must be either broadcast to * other backends or rolled back from local cache when we commit * or abort the transaction. + * Actually, we need two such lists for each level of nested transaction, + * so that we can discard events from an aborted subtransaction. When + * a subtransaction commits, we append its lists to the parent's lists. * * The relcache-file-invalidated flag can just be a simple boolean, * since we only act on it at transaction commit; we don't care which @@ -125,13 +134,22 @@ typedef struct InvalidationListHeader *---------------- */ -/* head of current-command event list */ -static InvalidationListHeader CurrentCmdInvalidMsgs; +typedef struct TransInvalidationInfo +{ + /* Back link to parent transaction's info */ + struct TransInvalidationInfo *parent; + + /* head of current-command event list */ + InvalidationListHeader CurrentCmdInvalidMsgs; -/* head of previous-commands event list */ -static InvalidationListHeader PriorCmdInvalidMsgs; + /* head of previous-commands event list */ + InvalidationListHeader PriorCmdInvalidMsgs; -static bool RelcacheInitFileInval; /* init file must be invalidated? */ + /* init file must be invalidated? */ + bool RelcacheInitFileInval; +} TransInvalidationInfo; + +static TransInvalidationInfo *transInvalInfo = NULL; /* * Dynamically-registered callback functions. Current implementation @@ -176,7 +194,7 @@ AddInvalidationMessage(InvalidationChunk **listHdr, /* First time through; create initial chunk */ #define FIRSTCHUNKSIZE 16 chunk = (InvalidationChunk *) - MemoryContextAlloc(TopTransactionContext, + MemoryContextAlloc(CurTransactionContext, sizeof(InvalidationChunk) + (FIRSTCHUNKSIZE - 1) *sizeof(SharedInvalidationMessage)); chunk->nitems = 0; @@ -190,7 +208,7 @@ AddInvalidationMessage(InvalidationChunk **listHdr, int chunksize = 2 * chunk->maxitems; chunk = (InvalidationChunk *) - MemoryContextAlloc(TopTransactionContext, + MemoryContextAlloc(CurTransactionContext, sizeof(InvalidationChunk) + (chunksize - 1) *sizeof(SharedInvalidationMessage)); chunk->nitems = 0; @@ -203,29 +221,6 @@ AddInvalidationMessage(InvalidationChunk **listHdr, chunk->nitems++; } -/* - * Free a list of inval message chunks. - * - * NOTE: when we are about to commit or abort a transaction, it's - * not really necessary to pfree the lists explicitly, since they will - * go away anyway when TopTransactionContext is destroyed. - */ -static void -FreeInvalidationMessageList(InvalidationChunk **listHdr) -{ - InvalidationChunk *chunk = *listHdr; - - *listHdr = NULL; - - while (chunk != NULL) - { - InvalidationChunk *nextchunk = chunk->next; - - pfree(chunk); - chunk = nextchunk; - } -} - /* * Append one list of invalidation message chunks to another, resetting * the source chunk-list pointer to NULL. @@ -331,31 +326,6 @@ AppendInvalidationMessages(InvalidationListHeader *dest, AppendInvalidationMessageList(&dest->rclist, &src->rclist); } -/* - * Reset an invalidation list to empty - * - * physicalFree may be set false if caller knows transaction is ending - */ -static void -DiscardInvalidationMessages(InvalidationListHeader *hdr, bool physicalFree) -{ - if (physicalFree) - { - /* Physically pfree the list data */ - FreeInvalidationMessageList(&hdr->cclist); - FreeInvalidationMessageList(&hdr->rclist); - } - else - { - /* - * Assume the storage will go away at xact end, just reset - * pointers - */ - hdr->cclist = NULL; - hdr->rclist = NULL; - } -} - /* * Execute the given function for all the messages in an invalidation list. * The list is not altered. @@ -386,7 +356,7 @@ RegisterCatcacheInvalidation(int cacheId, ItemPointer tuplePtr, Oid dbId) { - AddCatcacheInvalidationMessage(&CurrentCmdInvalidMsgs, + AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, cacheId, hashValue, tuplePtr, dbId); } @@ -398,7 +368,7 @@ RegisterCatcacheInvalidation(int cacheId, static void RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId) { - AddRelcacheInvalidationMessage(&CurrentCmdInvalidMsgs, + AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, dbId, relId, physId); /* @@ -406,7 +376,7 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId) * relcache init file, mark that we need to zap that file at commit. */ if (RelationIdIsInInitFile(relId)) - RelcacheInitFileInval = true; + transInvalInfo->RelcacheInitFileInval = true; } /* @@ -619,8 +589,38 @@ AcceptInvalidationMessages(void) } /* - * AtEOXactInvalidationMessages - * Process queued-up invalidation messages at end of transaction. + * AtStart_Inval + * Initialize inval lists at start of a main transaction. + */ +void +AtStart_Inval(void) +{ + Assert(transInvalInfo == NULL); + transInvalInfo = (TransInvalidationInfo *) + MemoryContextAllocZero(TopTransactionContext, + sizeof(TransInvalidationInfo)); +} + +/* + * AtSubStart_Inval + * Initialize inval lists at start of a subtransaction. + */ +void +AtSubStart_Inval(void) +{ + TransInvalidationInfo *myInfo; + + Assert(transInvalInfo != NULL); + myInfo = (TransInvalidationInfo *) + MemoryContextAllocZero(TopTransactionContext, + sizeof(TransInvalidationInfo)); + myInfo->parent = transInvalInfo; + transInvalInfo = myInfo; +} + +/* + * AtEOXact_Inval + * Process queued-up invalidation messages at end of main transaction. * * If isCommit, we must send out the messages in our PriorCmdInvalidMsgs list * to the shared invalidation message queue. Note that these will be read @@ -643,8 +643,11 @@ AcceptInvalidationMessages(void) * This should be called as the last step in processing a transaction. */ void -AtEOXactInvalidationMessages(bool isCommit) +AtEOXact_Inval(bool isCommit) { + /* Must be at top of stack */ + Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL); + if (isCommit) { /* @@ -652,28 +655,77 @@ AtEOXactInvalidationMessages(bool isCommit) * and after we send the SI messages. However, we need not do * anything unless we committed. */ - if (RelcacheInitFileInval) + if (transInvalInfo->RelcacheInitFileInval) RelationCacheInitFileInvalidate(true); - AppendInvalidationMessages(&PriorCmdInvalidMsgs, - &CurrentCmdInvalidMsgs); + AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, + &transInvalInfo->CurrentCmdInvalidMsgs); - ProcessInvalidationMessages(&PriorCmdInvalidMsgs, + ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, SendSharedInvalidMessage); - if (RelcacheInitFileInval) + if (transInvalInfo->RelcacheInitFileInval) RelationCacheInitFileInvalidate(false); } else { - ProcessInvalidationMessages(&PriorCmdInvalidMsgs, + ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, LocalExecuteInvalidationMessage); } - RelcacheInitFileInval = false; + /* Need not free anything explicitly */ + transInvalInfo = NULL; +} + +/* + * AtSubEOXact_Inval + * Process queued-up invalidation messages at end of subtransaction. + * + * If isCommit, process CurrentCmdInvalidMsgs if any (there probably aren't), + * and then attach both CurrentCmdInvalidMsgs and PriorCmdInvalidMsgs to the + * parent's PriorCmdInvalidMsgs list. + * + * If not isCommit, we are aborting, and must locally process the messages + * in PriorCmdInvalidMsgs. No messages need be sent to other backends. + * We can forget about CurrentCmdInvalidMsgs too, since those changes haven't + * touched the caches yet. + * + * In any case, pop the transaction stack. We need not physically free memory + * here, since CurTransactionContext is about to be emptied anyway + * (if aborting). + */ +void +AtSubEOXact_Inval(bool isCommit) +{ + TransInvalidationInfo *myInfo = transInvalInfo; + + /* Must be at non-top of stack */ + Assert(myInfo != NULL && myInfo->parent != NULL); + + if (isCommit) + { + /* If CurrentCmdInvalidMsgs still has anything, fix it */ + CommandEndInvalidationMessages(); + + /* Pass up my inval messages to parent */ + AppendInvalidationMessages(&myInfo->parent->PriorCmdInvalidMsgs, + &myInfo->PriorCmdInvalidMsgs); - DiscardInvalidationMessages(&PriorCmdInvalidMsgs, false); - DiscardInvalidationMessages(&CurrentCmdInvalidMsgs, false); + /* Pending relcache inval becomes parent's problem too */ + if (myInfo->RelcacheInitFileInval) + myInfo->parent->RelcacheInitFileInval = true; + } + else + { + ProcessInvalidationMessages(&myInfo->PriorCmdInvalidMsgs, + LocalExecuteInvalidationMessage); + } + + /* Pop the transaction state stack */ + transInvalInfo = myInfo->parent; + + /* Need not free anything else explicitly */ + pfree(myInfo); } /* @@ -687,27 +739,25 @@ AtEOXactInvalidationMessages(bool isCommit) * current command. We then move the current-cmd list over to become part * of the prior-cmds list. * - * The isCommit = false case is not currently used, but may someday be - * needed to support rollback to a savepoint within a transaction. - * * Note: * This should be called during CommandCounterIncrement(), * after we have advanced the command ID. */ void -CommandEndInvalidationMessages(bool isCommit) +CommandEndInvalidationMessages(void) { - if (isCommit) - { - ProcessInvalidationMessages(&CurrentCmdInvalidMsgs, - LocalExecuteInvalidationMessage); - AppendInvalidationMessages(&PriorCmdInvalidMsgs, - &CurrentCmdInvalidMsgs); - } - else - { - /* XXX what needs to be done here? */ - } + /* + * You might think this shouldn't be called outside any transaction, + * but bootstrap does it, and also ABORT issued when not in a transaction. + * So just quietly return if no state to work on. + */ + if (transInvalInfo == NULL) + return; + + ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, + LocalExecuteInvalidationMessage); + AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, + &transInvalInfo->CurrentCmdInvalidMsgs); } /* diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index ee8b46407e1b75fe52092b064194ce70705d780c..23428992724c3ebb680a7059180d6dea7a14738b 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.205 2004/06/18 06:13:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.206 2004/07/01 00:51:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -273,6 +273,8 @@ static void IndexSupportInitialize(Form_pg_index iform, static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid, StrategyNumber numStrats, StrategyNumber numSupport); +static inline void RelationPushReferenceCount(Relation rel); +static inline void RelationPopReferenceCount(Relation rel); /* @@ -1678,6 +1680,8 @@ RelationClearRelation(Relation relation, bool rebuild) list_free(relation->rd_indexlist); if (relation->rd_indexcxt) MemoryContextDelete(relation->rd_indexcxt); + if (relation->rd_prevrefcnt) + pfree(relation->rd_prevrefcnt); /* * If we're really done with the relcache entry, blow it away. But if @@ -1968,7 +1972,7 @@ RelationCacheInvalidate(void) * we must reset refcnts before handling pending invalidations. */ void -AtEOXact_RelationCache(bool commit) +AtEOXact_RelationCache(bool isCommit) { HASH_SEQ_STATUS status; RelIdCacheEnt *idhentry; @@ -1993,7 +1997,7 @@ AtEOXact_RelationCache(bool commit) */ if (relation->rd_isnew) { - if (commit) + if (isCommit) relation->rd_isnew = false; else { @@ -2019,7 +2023,7 @@ AtEOXact_RelationCache(bool commit) */ expected_refcnt = relation->rd_isnailed ? 1 : 0; - if (commit) + if (isCommit) { if (relation->rd_refcnt != expected_refcnt && !IsBootstrapProcessingMode()) @@ -2036,6 +2040,12 @@ AtEOXact_RelationCache(bool commit) RelationSetReferenceCount(relation, expected_refcnt); } + /* + * Reset the refcount stack. Just drop the item count; don't deallocate + * the stack itself so it can be reused by future subtransactions. + */ + relation->rd_numpushed = 0; + /* * Flush any temporary index list. */ @@ -2048,6 +2058,131 @@ AtEOXact_RelationCache(bool commit) } } +/* + * RelationPushReferenceCount + * + * Push the current reference count into the stack. Don't modify the + * reference count itself. + */ +static inline void +RelationPushReferenceCount(Relation rel) +{ + /* Enlarge the stack if we run out of space. */ + if (rel->rd_numpushed == rel->rd_numalloc) + { + MemoryContext old_cxt = MemoryContextSwitchTo(CacheMemoryContext); + + if (rel->rd_numalloc == 0) + { + rel->rd_numalloc = 8; + rel->rd_prevrefcnt = palloc(rel->rd_numalloc * sizeof(int)); + } + else + { + rel->rd_numalloc *= 2; + rel->rd_prevrefcnt = repalloc(rel->rd_prevrefcnt, rel->rd_numalloc * sizeof(int)); + } + + MemoryContextSwitchTo(old_cxt); + } + + rel->rd_prevrefcnt[rel->rd_numpushed++] = rel->rd_refcnt; +} + +/* + * RelationPopReferenceCount + * + * Pop the latest stored reference count. If there is none, drop it + * to zero; the entry was created in the current subtransaction. + */ +static inline void +RelationPopReferenceCount(Relation rel) +{ + if (rel->rd_numpushed == 0) + { + rel->rd_refcnt = rel->rd_isnailed ? 1 : 0; + return; + } + + rel->rd_refcnt = rel->rd_prevrefcnt[--rel->rd_numpushed]; +} + +/* + * AtEOSubXact_RelationCache + */ +void +AtEOSubXact_RelationCache(bool isCommit) +{ + HASH_SEQ_STATUS status; + RelIdCacheEnt *idhentry; + + /* We'd better not be bootstrapping. */ + Assert(!IsBootstrapProcessingMode()); + + hash_seq_init(&status, RelationIdCache); + + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + { + Relation relation = idhentry->reldesc; + + /* + * During subtransaction commit, we first check whether the + * current refcount is correct: if there is no item in the stack, + * the relcache entry was created during this subtransaction, it should + * be 0 (or 1 for nailed relations). If the stack has at least one + * item, the expected count is whatever that item is. + */ + if (isCommit) + { + int expected_refcnt; + + if (relation->rd_numpushed == 0) + expected_refcnt = relation->rd_isnailed ? 1 : 0; + else + expected_refcnt = relation->rd_prevrefcnt[relation->rd_numpushed - 1]; + + if (relation->rd_refcnt != expected_refcnt) + { + elog(WARNING, "relcache reference leak: relation \"%s\" has refcnt %d instead of %d", + RelationGetRelationName(relation), + relation->rd_refcnt, expected_refcnt); + } + } + + /* + * On commit, the expected count is stored so there's no harm in + * popping it (and we may need to fix if there was a leak); and during + * abort, the correct refcount has to be restored. + */ + RelationPopReferenceCount(relation); + } +} + +/* + * AtSubStart_RelationCache + * + * At subtransaction start, we push the current reference count into + * the refcount stack, so it can be restored if the subtransaction aborts. + */ +void +AtSubStart_RelationCache(void) +{ + HASH_SEQ_STATUS status; + RelIdCacheEnt *idhentry; + + /* We'd better not be bootstrapping. */ + Assert(!IsBootstrapProcessingMode()); + + hash_seq_init(&status, RelationIdCache); + + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + { + Relation relation = idhentry->reldesc; + + RelationPushReferenceCount(relation); + } +} + /* * RelationBuildLocalRelation * Build a relcache entry for an about-to-be-created relation, diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 48d28d429f214c9db40423b19a805d4886b12bdd..3caf18c5f330362e83b4f94b782fcb5a5cbc03d5 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.134 2004/06/18 06:13:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.135 2004/07/01 00:51:20 tgl Exp $ * * *------------------------------------------------------------------------- @@ -27,7 +27,6 @@ #include "catalog/pg_database.h" #include "catalog/pg_shadow.h" #include "catalog/pg_tablespace.h" -#include "commands/trigger.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "postmaster/postmaster.h" @@ -350,12 +349,6 @@ InitPostgres(const char *dbname, const char *username) /* Initialize portal manager */ EnablePortalManager(); - /* - * Initialize the deferred trigger manager --- must happen before - * first transaction start. - */ - DeferredTriggerInit(); - /* start a new transaction here before access to db */ if (!bootstrap) StartTransactionCommand(); diff --git a/src/backend/utils/misc/README b/src/backend/utils/misc/README index 12a2cdef03654fb2ff1c411d2ed6d358a29fc756..3ea838b1f5312de3ed6ca79fa79274f48d871540 100644 --- a/src/backend/utils/misc/README +++ b/src/backend/utils/misc/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/utils/misc/README,v 1.4 2004/01/19 19:04:40 tgl Exp $ +$PostgreSQL: pgsql/src/backend/utils/misc/README,v 1.5 2004/07/01 00:51:24 tgl Exp $ GUC IMPLEMENTATION NOTES @@ -68,49 +68,66 @@ SET on transaction abort, and rollback of SET LOCAL at transaction end would be effective had there never been any SET commands in the current session. -To handle these cases we must keep track of as many as four distinct -values for each variable. They are: +To handle these cases we must keep track of many distinct values for each +variable. The primary values are: * actual variable contents always the current effective value * reset_value the value to use for RESET -* session_value the "committed" setting for the session - * tentative_value the uncommitted result of SET -During initialization we set the first three of these (actual, reset_value, -and session_value) based on whichever non-interactive source has the -highest priority. All three will have the same value. +The reason we need a tentative_value separate from the actual value is +that when a transaction does SET followed by SET LOCAL, the actual value +will now be the LOCAL value, but we want to remember the prior SET so that +that value is restored at transaction commit. + +In addition, for each level of transaction (possibly nested) we have to +remember the transaction-entry-time actual and tentative values, in case +we need to restore them at transaction end. (The RESET value is essentially +non-transactional, so it doesn't have to be stacked.) For efficiency these +stack entries are not constructed until/unless the variable is actually SET +within a particular transaction. + +During initialization we set the actual value and reset_value based on +whichever non-interactive source has the highest priority. They will +have the same value. The tentative_value is not meaningful at this point. + +A SET command starts by stacking the existing actual and tentative values +if this hasn't already been done within the current transaction. Then: A SET LOCAL command sets the actual variable (and nothing else). At -transaction end, the session_value is used to restore the actual variable -to its pre-transaction value. +transaction end, the stacked values are used to restore the GUC entry +to its pre-transaction state. A SET (or SET SESSION) command sets the actual variable, and if no error, then sets the tentative_value. If the transaction commits, the -tentative_value is assigned to the session_value and the actual variable -(which could by now be different, if the SET was followed by SET LOCAL). -If the transaction aborts, the tentative_value is discarded and the -actual variable is restored from the session_value. +tentative_value is assigned again to the actual variable (which could by +now be different, if the SET was followed by SET LOCAL). If the +transaction aborts, the stacked values are used to restore the GUC entry +to its pre-transaction state. + +In the case of SET within nested subtransactions, at each commit the +tentative_value propagates out to the next transaction level. It will +be thrown away at abort of any level, or after exiting the top transaction. RESET is executed like a SET, but using the reset_value as the desired new value. (We do not provide a RESET LOCAL command, but SET LOCAL TO DEFAULT has the same behavior that RESET LOCAL would.) The source associated with -the reset_value also becomes associated with the actual and session values. +the reset_value also becomes associated with the actual and tentative values. If SIGHUP is received, the GUC code rereads the postgresql.conf configuration file (this does not happen in the signal handler, but at next return to main loop; note that it can be executed while within a transaction). New values from postgresql.conf are assigned to actual -variable, reset_value, and session_value, but only if each of these has a -current source priority <= PGC_S_FILE. (It is thus possible for -reset_value to track the config-file setting even if there is currently -a different interactive value of the actual variable.) +variable, reset_value, and stacked actual values, but only if each of +these has a current source priority <= PGC_S_FILE. (It is thus possible +for reset_value to track the config-file setting even if there is +currently a different interactive value of the actual variable.) Note that tentative_value is unused and undefined except between a SET command and the end of the transaction. Also notice that we must track -the source associated with each of the four values. +the source associated with each one of the values. The assign_hook and show_hook routines work only with the actual variable, and are not directly aware of the additional values maintained by GUC. @@ -129,9 +146,9 @@ pstrdup/palloc mechanisms. We would need to keep them in a permanent context anyway, and strdup gives us more control over handling out-of-memory failures. -We allow a variable's actual value, reset_val, session_val, and -tentative_val to point at the same storage. This makes it slightly harder -to free space (must test that the value to be freed isn't equal to any of -the other three pointers). The main advantage is that we never need to -strdup during transaction commit/abort, so cannot cause an out-of-memory -failure there. +We allow a string variable's actual value, reset_val, tentative_val, and +stacked copies of same to point at the same storage. This makes it +slightly harder to free space (must test whether a value to be freed isn't +equal to any of the other pointers in the GUC entry or associated stack +items). The main advantage is that we never need to strdup during +transaction commit/abort, so cannot cause an out-of-memory failure there. diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index f050e201e2a097f495351de516334d07a7883249..f5c16de83babef806e753d92662715eb5bde9b58 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,17 +10,16 @@ * Written by Peter Eisentraut <peter_e@gmx.net>. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.211 2004/06/11 03:54:54 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.212 2004/07/01 00:51:24 tgl Exp $ * *-------------------------------------------------------------------- */ #include "postgres.h" -#include <errno.h> +#include <ctype.h> #include <float.h> #include <limits.h> #include <unistd.h> -#include <ctype.h> #include "utils/guc.h" #include "utils/guc_tables.h" @@ -54,6 +53,7 @@ #include "tcop/tcopprot.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/memutils.h" #include "utils/pg_locale.h" #include "pgstat.h" @@ -105,6 +105,7 @@ static const char *assign_custom_variable_classes(const char *newval, bool doit, GucSource source); static bool assign_stage_log_stats(bool newval, bool doit, GucSource source); static bool assign_log_stats(bool newval, bool doit, GucSource source); +static bool assign_transaction_read_only(bool newval, bool doit, GucSource source); /* @@ -174,45 +175,6 @@ static int max_identifier_length; static int block_size; static bool integer_datetimes; -/* Macros for freeing malloc'd pointers only if appropriate to do so */ -/* Some of these tests are probably redundant, but be safe ... */ -#define SET_STRING_VARIABLE(rec, newval) \ - do { \ - if (*(rec)->variable && \ - *(rec)->variable != (rec)->reset_val && \ - *(rec)->variable != (rec)->session_val && \ - *(rec)->variable != (rec)->tentative_val) \ - free(*(rec)->variable); \ - *(rec)->variable = (newval); \ - } while (0) -#define SET_STRING_RESET_VAL(rec, newval) \ - do { \ - if ((rec)->reset_val && \ - (rec)->reset_val != *(rec)->variable && \ - (rec)->reset_val != (rec)->session_val && \ - (rec)->reset_val != (rec)->tentative_val) \ - free((rec)->reset_val); \ - (rec)->reset_val = (newval); \ - } while (0) -#define SET_STRING_SESSION_VAL(rec, newval) \ - do { \ - if ((rec)->session_val && \ - (rec)->session_val != *(rec)->variable && \ - (rec)->session_val != (rec)->reset_val && \ - (rec)->session_val != (rec)->tentative_val) \ - free((rec)->session_val); \ - (rec)->session_val = (newval); \ - } while (0) -#define SET_STRING_TENTATIVE_VAL(rec, newval) \ - do { \ - if ((rec)->tentative_val && \ - (rec)->tentative_val != *(rec)->variable && \ - (rec)->tentative_val != (rec)->reset_val && \ - (rec)->tentative_val != (rec)->session_val) \ - free((rec)->tentative_val); \ - (rec)->tentative_val = (newval); \ - } while (0) - /* * Displayable names for context types (enum GucContext) @@ -801,7 +763,7 @@ static struct config_bool ConfigureNamesBool[] = GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE }, &XactReadOnly, - false, NULL, NULL + false, assign_transaction_read_only, NULL }, { {"add_missing_from", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, @@ -1766,14 +1728,13 @@ static const char * const map_old_guc_names[] = { */ static struct config_generic **guc_variables; -/* Current number of variables contained in the vector - */ +/* Current number of variables contained in the vector */ static int num_guc_variables; -/* Vector capacity - */ +/* Vector capacity */ static int size_guc_variables; + static bool guc_dirty; /* TRUE if need to do commit/abort work */ static bool reporting_enabled; /* TRUE to enable GUC_REPORT */ @@ -1783,14 +1744,71 @@ static char *guc_string_workspace; /* for avoiding memory leaks */ static int guc_var_compare(const void *a, const void *b); static int guc_name_compare(const char *namea, const char *nameb); +static void push_old_value(struct config_generic *gconf); static void ReportGUCOption(struct config_generic * record); static char *_ShowOption(struct config_generic * record); -struct config_generic** get_guc_variables() + +/* + * Support for assigning to a field of a string GUC item. Free the prior + * value if it's not referenced anywhere else in the item (including stacked + * states). + */ +static void +set_string_field(struct config_string *conf, char **field, char *newval) +{ + char *oldval = *field; + GucStack *stack; + + /* Do the assignment */ + *field = newval; + + /* Exit if any duplicate references, or if old value was NULL anyway */ + if (oldval == NULL || + oldval == *(conf->variable) || + oldval == conf->reset_val || + oldval == conf->tentative_val) + return; + for (stack = conf->gen.stack; stack; stack = stack->prev) + { + if (oldval == stack->tentative_val.stringval || + oldval == stack->value.stringval) + return; + } + + /* Not used anymore, so free it */ + free(oldval); +} + +/* + * Detect whether strval is referenced anywhere in a GUC string item + */ +static bool +string_field_used(struct config_string *conf, char *strval) +{ + GucStack *stack; + + if (strval == *(conf->variable) || + strval == conf->reset_val || + strval == conf->tentative_val) + return true; + for (stack = conf->gen.stack; stack; stack = stack->prev) + { + if (strval == stack->tentative_val.stringval || + strval == stack->value.stringval) + return true; + } + return false; +} + + +struct config_generic ** +get_guc_variables(void) { return guc_variables; } + /* * Build the sorted array. This is split out so that it could be * re-executed after startup (eg, we could allow loadable modules to @@ -2001,14 +2019,13 @@ find_option(const char *name) return find_option(map_old_guc_names[i+1]); } - /* Check if the name is qualified, and if so, check if the qualifier + /* + * Check if the name is qualified, and if so, check if the qualifier * maps to a custom variable class. */ dot = strchr(name, GUC_QUALIFIER_SEPARATOR); if(dot != NULL && is_custom_class(name, dot - name)) - /* - * Add a placeholder variable for this name - */ + /* Add a placeholder variable for this name */ return (struct config_generic*)add_placeholder_variable(name); /* Unknown name */ @@ -2081,9 +2098,9 @@ InitializeGUCOptions(void) gconf->status = 0; gconf->reset_source = PGC_S_DEFAULT; - gconf->session_source = PGC_S_DEFAULT; gconf->tentative_source = PGC_S_DEFAULT; gconf->source = PGC_S_DEFAULT; + gconf->stack = NULL; switch (gconf->vartype) { @@ -2097,7 +2114,6 @@ InitializeGUCOptions(void) elog(FATAL, "failed to initialize %s to %d", conf->gen.name, (int) conf->reset_val); *conf->variable = conf->reset_val; - conf->session_val = conf->reset_val; break; } case PGC_INT: @@ -2119,7 +2135,6 @@ InitializeGUCOptions(void) elog(FATAL, "failed to initialize %s to %d", conf->gen.name, conf->reset_val); *conf->variable = conf->reset_val; - conf->session_val = conf->reset_val; break; } case PGC_REAL: @@ -2135,7 +2150,6 @@ InitializeGUCOptions(void) elog(FATAL, "failed to initialize %s to %g", conf->gen.name, conf->reset_val); *conf->variable = conf->reset_val; - conf->session_val = conf->reset_val; break; } case PGC_STRING: @@ -2150,7 +2164,6 @@ InitializeGUCOptions(void) conf->assign_hook == assign_log_statement); *conf->variable = NULL; conf->reset_val = NULL; - conf->session_val = NULL; conf->tentative_val = NULL; if (conf->boot_val == NULL) @@ -2190,7 +2203,6 @@ InitializeGUCOptions(void) } } *conf->variable = str; - conf->session_val = str; break; } } @@ -2254,6 +2266,9 @@ ResetAllOptions(void) if (gconf->source <= PGC_S_OVERRIDE) continue; + /* Save old value to support transaction abort */ + push_old_value(gconf); + switch (gconf->vartype) { case PGC_BOOL: @@ -2336,8 +2351,8 @@ ResetAllOptions(void) } } - SET_STRING_VARIABLE(conf, str); - SET_STRING_TENTATIVE_VAL(conf, str); + set_string_field(conf, conf->variable, str); + set_string_field(conf, &conf->tentative_val, str); conf->gen.source = conf->gen.reset_source; conf->gen.tentative_source = conf->gen.reset_source; conf->gen.status |= GUC_HAVE_TENTATIVE; @@ -2353,11 +2368,93 @@ ResetAllOptions(void) /* - * Do GUC processing at transaction commit or abort. + * push_old_value + * Push previous state during first assignment to a GUC variable + * within a particular transaction. + * + * We have to be willing to "back-fill" the state stack if the first + * assignment occurs within a subtransaction nested several levels deep. + * This ensures that if an intermediate transaction aborts, it will have + * the proper value available to restore the setting to. + */ +static void +push_old_value(struct config_generic *gconf) +{ + int my_level = GetCurrentTransactionNestLevel(); + GucStack *stack; + + /* If we're not inside a transaction, do nothing */ + if (my_level == 0) + return; + + for (;;) + { + /* Done if we already pushed it at this nesting depth */ + if (gconf->stack && gconf->stack->nest_level >= my_level) + return; + + /* + * We keep all the stack entries in TopTransactionContext so as to + * avoid allocation problems when a subtransaction back-fills stack + * entries for upper transaction levels. + */ + stack = (GucStack *) MemoryContextAlloc(TopTransactionContext, + sizeof(GucStack)); + + stack->prev = gconf->stack; + stack->nest_level = stack->prev ? stack->prev->nest_level + 1 : 1; + stack->status = gconf->status; + stack->tentative_source = gconf->tentative_source; + stack->source = gconf->source; + + switch (gconf->vartype) + { + case PGC_BOOL: + stack->tentative_val.boolval = + ((struct config_bool *) gconf)->tentative_val; + stack->value.boolval = + *((struct config_bool *) gconf)->variable; + break; + + case PGC_INT: + stack->tentative_val.intval = + ((struct config_int *) gconf)->tentative_val; + stack->value.intval = + *((struct config_int *) gconf)->variable; + break; + + case PGC_REAL: + stack->tentative_val.realval = + ((struct config_real *) gconf)->tentative_val; + stack->value.realval = + *((struct config_real *) gconf)->variable; + break; + + case PGC_STRING: + stack->tentative_val.stringval = + ((struct config_string *) gconf)->tentative_val; + stack->value.stringval = + *((struct config_string *) gconf)->variable; + break; + } + + gconf->stack = stack; + + /* Set state to indicate nothing happened yet within this level */ + gconf->status = GUC_HAVE_STACK; + + /* Ensure we remember to pop at end of xact */ + guc_dirty = true; + } +} + +/* + * Do GUC processing at transaction or subtransaction commit or abort. */ void -AtEOXact_GUC(bool isCommit) +AtEOXact_GUC(bool isCommit, bool isSubXact) { + int my_level; int i; /* Quick exit if nothing's changed in this transaction */ @@ -2371,15 +2468,56 @@ AtEOXact_GUC(bool isCommit) guc_string_workspace = NULL; } + my_level = GetCurrentTransactionNestLevel(); + Assert(isSubXact ? (my_level > 1) : (my_level == 1)); + for (i = 0; i < num_guc_variables; i++) { struct config_generic *gconf = guc_variables[i]; + int my_status = gconf->status; + GucStack *stack = gconf->stack; + bool useTentative; bool changed; - /* Skip if nothing's happened to this var in this transaction */ - if (gconf->status == 0) + /* + * Skip if nothing's happened to this var in this transaction + */ + if (my_status == 0) + { + Assert(stack == NULL); + continue; + } + /* Assert that we stacked old value before changing it */ + Assert(stack != NULL && (my_status & GUC_HAVE_STACK)); + /* However, the last change may have been at an outer xact level */ + if (stack->nest_level < my_level) continue; + Assert(stack->nest_level == my_level); + + /* + * We will pop the stack entry. Start by restoring outer xact status + * (since we may want to modify it below). Be careful to use + * my_status to reference the inner xact status below this point... + */ + gconf->status = stack->status; + + /* + * We have two cases: + * + * If commit and HAVE_TENTATIVE, set actual value to tentative + * (this is to override a SET LOCAL if one occurred later than SET). + * We keep the tentative value and propagate HAVE_TENTATIVE to + * the parent status, allowing the SET's effect to percolate up. + * (But if we're exiting the outermost transaction, we'll drop the + * HAVE_TENTATIVE bit below.) + * + * Otherwise, we have a transaction that aborted or executed only + * SET LOCAL (or no SET at all). In either case it should have no + * further effect, so restore both tentative and actual values from + * the stack entry. + */ + useTentative = isCommit && (my_status & GUC_HAVE_TENTATIVE) != 0; changed = false; switch (gconf->vartype) @@ -2387,126 +2525,190 @@ AtEOXact_GUC(bool isCommit) case PGC_BOOL: { struct config_bool *conf = (struct config_bool *) gconf; + bool newval; + GucSource newsource; - if (isCommit && (conf->gen.status & GUC_HAVE_TENTATIVE)) + if (useTentative) + { + newval = conf->tentative_val; + newsource = conf->gen.tentative_source; + conf->gen.status |= GUC_HAVE_TENTATIVE; + } + else { - conf->session_val = conf->tentative_val; - conf->gen.session_source = conf->gen.tentative_source; + newval = stack->value.boolval; + newsource = stack->source; + conf->tentative_val = stack->tentative_val.boolval; + conf->gen.tentative_source = stack->tentative_source; } - if (*conf->variable != conf->session_val) + if (*conf->variable != newval) { if (conf->assign_hook) - if (!(*conf->assign_hook) (conf->session_val, + if (!(*conf->assign_hook) (newval, true, PGC_S_OVERRIDE)) elog(LOG, "failed to commit %s", conf->gen.name); - *conf->variable = conf->session_val; + *conf->variable = newval; changed = true; } - conf->gen.source = conf->gen.session_source; - conf->gen.status = 0; + conf->gen.source = newsource; break; } case PGC_INT: { struct config_int *conf = (struct config_int *) gconf; + int newval; + GucSource newsource; - if (isCommit && (conf->gen.status & GUC_HAVE_TENTATIVE)) + if (useTentative) + { + newval = conf->tentative_val; + newsource = conf->gen.tentative_source; + conf->gen.status |= GUC_HAVE_TENTATIVE; + } + else { - conf->session_val = conf->tentative_val; - conf->gen.session_source = conf->gen.tentative_source; + newval = stack->value.intval; + newsource = stack->source; + conf->tentative_val = stack->tentative_val.intval; + conf->gen.tentative_source = stack->tentative_source; } - if (*conf->variable != conf->session_val) + if (*conf->variable != newval) { if (conf->assign_hook) - if (!(*conf->assign_hook) (conf->session_val, + if (!(*conf->assign_hook) (newval, true, PGC_S_OVERRIDE)) elog(LOG, "failed to commit %s", conf->gen.name); - *conf->variable = conf->session_val; + *conf->variable = newval; changed = true; } - conf->gen.source = conf->gen.session_source; - conf->gen.status = 0; + conf->gen.source = newsource; break; } case PGC_REAL: { struct config_real *conf = (struct config_real *) gconf; + double newval; + GucSource newsource; - if (isCommit && (conf->gen.status & GUC_HAVE_TENTATIVE)) + if (useTentative) { - conf->session_val = conf->tentative_val; - conf->gen.session_source = conf->gen.tentative_source; + newval = conf->tentative_val; + newsource = conf->gen.tentative_source; + conf->gen.status |= GUC_HAVE_TENTATIVE; + } + else + { + newval = stack->value.realval; + newsource = stack->source; + conf->tentative_val = stack->tentative_val.realval; + conf->gen.tentative_source = stack->tentative_source; } - if (*conf->variable != conf->session_val) + if (*conf->variable != newval) { if (conf->assign_hook) - if (!(*conf->assign_hook) (conf->session_val, + if (!(*conf->assign_hook) (newval, true, PGC_S_OVERRIDE)) elog(LOG, "failed to commit %s", conf->gen.name); - *conf->variable = conf->session_val; + *conf->variable = newval; changed = true; } - conf->gen.source = conf->gen.session_source; - conf->gen.status = 0; + conf->gen.source = newsource; break; } case PGC_STRING: { struct config_string *conf = (struct config_string *) gconf; + char *newval; + GucSource newsource; - if (isCommit && (conf->gen.status & GUC_HAVE_TENTATIVE)) + if (useTentative) { - SET_STRING_SESSION_VAL(conf, conf->tentative_val); - conf->gen.session_source = conf->gen.tentative_source; - conf->tentative_val = NULL; /* transfer ownership */ + newval = conf->tentative_val; + newsource = conf->gen.tentative_source; + conf->gen.status |= GUC_HAVE_TENTATIVE; } else - SET_STRING_TENTATIVE_VAL(conf, NULL); - - if (*conf->variable != conf->session_val) { - char *str = conf->session_val; + newval = stack->value.stringval; + newsource = stack->source; + set_string_field(conf, &conf->tentative_val, + stack->tentative_val.stringval); + conf->gen.tentative_source = stack->tentative_source; + } + if (*conf->variable != newval) + { if (conf->assign_hook) { const char *newstr; - newstr = (*conf->assign_hook) (str, true, + newstr = (*conf->assign_hook) (newval, true, PGC_S_OVERRIDE); if (newstr == NULL) elog(LOG, "failed to commit %s", conf->gen.name); - else if (newstr != str) + else if (newstr != newval) { /* + * If newval should now be freed, it'll be + * taken care of below. + * * See notes in set_config_option about * casting */ - str = (char *) newstr; - SET_STRING_SESSION_VAL(conf, str); + newval = (char *) newstr; } } - SET_STRING_VARIABLE(conf, str); + set_string_field(conf, conf->variable, newval); changed = true; } - conf->gen.source = conf->gen.session_source; - conf->gen.status = 0; + conf->gen.source = newsource; + /* Release stacked values if not used anymore */ + set_string_field(conf, &stack->value.stringval, + NULL); + set_string_field(conf, &stack->tentative_val.stringval, + NULL); + /* Don't store tentative value separately after commit */ + if (!isSubXact) + set_string_field(conf, &conf->tentative_val, NULL); break; } } + /* Finish popping the state stack */ + gconf->stack = stack->prev; + pfree(stack); + + /* + * If we're now out of all xact levels, forget TENTATIVE status bit; + * there's nothing tentative about the value anymore. + */ + if (!isSubXact) + { + Assert(gconf->stack == NULL); + gconf->status = 0; + } + + /* Report new value if we changed it */ if (changed && (gconf->flags & GUC_REPORT)) ReportGUCOption(gconf); } - guc_dirty = false; + /* + * If we're now out of all xact levels, we can clear guc_dirty. + * (Note: we cannot reset guc_dirty when exiting a subtransaction, + * because we know that all outer transaction levels will have stacked + * values to deal with.) + */ + if (!isSubXact) + guc_dirty = false; } @@ -2810,7 +3012,7 @@ set_config_option(const char *name, const char *value, } /* - * Should we set reset/session values? (If so, the behavior is not + * Should we set reset/stacked values? (If so, the behavior is not * transactional.) */ makeDefault = changeVal && (source <= PGC_S_OVERRIDE) && (value != NULL); @@ -2820,7 +3022,7 @@ set_config_option(const char *name, const char *value, * However, if changeVal is false then plow ahead anyway since we are * trying to find out if the value is potentially good, not actually * use it. Also keep going if makeDefault is true, since we may want - * to set the reset/session values even if we can't set the variable + * to set the reset/stacked values even if we can't set the variable * itself. */ if (record->source > source) @@ -2901,6 +3103,9 @@ set_config_option(const char *name, const char *value, if (changeVal || makeDefault) { + /* Save old value to support transaction abort */ + if (!makeDefault) + push_old_value(&conf->gen); if (changeVal) { *conf->variable = newval; @@ -2908,15 +3113,20 @@ set_config_option(const char *name, const char *value, } if (makeDefault) { + GucStack *stack; + if (conf->gen.reset_source <= source) { conf->reset_val = newval; conf->gen.reset_source = source; } - if (conf->gen.session_source <= source) + for (stack = conf->gen.stack; stack; stack = stack->prev) { - conf->session_val = newval; - conf->gen.session_source = source; + if (stack->source <= source) + { + stack->value.boolval = newval; + stack->source = source; + } } } else if (isLocal) @@ -3006,6 +3216,9 @@ set_config_option(const char *name, const char *value, if (changeVal || makeDefault) { + /* Save old value to support transaction abort */ + if (!makeDefault) + push_old_value(&conf->gen); if (changeVal) { *conf->variable = newval; @@ -3013,15 +3226,20 @@ set_config_option(const char *name, const char *value, } if (makeDefault) { + GucStack *stack; + if (conf->gen.reset_source <= source) { conf->reset_val = newval; conf->gen.reset_source = source; } - if (conf->gen.session_source <= source) + for (stack = conf->gen.stack; stack; stack = stack->prev) { - conf->session_val = newval; - conf->gen.session_source = source; + if (stack->source <= source) + { + stack->value.intval = newval; + stack->source = source; + } } } else if (isLocal) @@ -3101,6 +3319,9 @@ set_config_option(const char *name, const char *value, if (changeVal || makeDefault) { + /* Save old value to support transaction abort */ + if (!makeDefault) + push_old_value(&conf->gen); if (changeVal) { *conf->variable = newval; @@ -3108,15 +3329,20 @@ set_config_option(const char *name, const char *value, } if (makeDefault) { + GucStack *stack; + if (conf->gen.reset_source <= source) { conf->reset_val = newval; conf->gen.reset_source = source; } - if (conf->gen.session_source <= source) + for (stack = conf->gen.stack; stack; stack = stack->prev) { - conf->session_val = newval; - conf->gen.session_source = source; + if (stack->source <= source) + { + stack->value.realval = newval; + stack->source = source; + } } } else if (isLocal) @@ -3261,27 +3487,34 @@ set_config_option(const char *name, const char *value, if (changeVal || makeDefault) { + /* Save old value to support transaction abort */ + if (!makeDefault) + push_old_value(&conf->gen); if (changeVal) { - SET_STRING_VARIABLE(conf, newval); + set_string_field(conf, conf->variable, newval); conf->gen.source = source; } if (makeDefault) { + GucStack *stack; + if (conf->gen.reset_source <= source) { - SET_STRING_RESET_VAL(conf, newval); + set_string_field(conf, &conf->reset_val, newval); conf->gen.reset_source = source; } - if (conf->gen.session_source <= source) + for (stack = conf->gen.stack; stack; stack = stack->prev) { - SET_STRING_SESSION_VAL(conf, newval); - conf->gen.session_source = source; + if (stack->source <= source) + { + set_string_field(conf, &stack->value.stringval, + newval); + stack->source = source; + } } /* Perhaps we didn't install newval anywhere */ - if (newval != *conf->variable && - newval != conf->session_val && - newval != conf->reset_val) + if (!string_field_used(conf, newval)) free(newval); } else if (isLocal) @@ -3291,7 +3524,7 @@ set_config_option(const char *name, const char *value, } else { - SET_STRING_TENTATIVE_VAL(conf, newval); + set_string_field(conf, &conf->tentative_val, newval); conf->gen.tentative_source = source; conf->gen.status |= GUC_HAVE_TENTATIVE; guc_dirty = true; @@ -3608,44 +3841,36 @@ define_custom_variable(struct config_generic* variable) /* This better be a placeholder */ if(((*res)->flags & GUC_CUSTOM_PLACEHOLDER) == 0) - { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("attempt to redefine parameter \"%s\"", name))); - } - pHolder = (struct config_string*)*res; + + Assert((*res)->vartype == PGC_STRING); + pHolder = (struct config_string*) *res; - /* We have the same name, no sorting is necessary. - */ + /* We have the same name, no sorting is necessary */ *res = variable; value = *pHolder->variable; - /* Assign the variable stored in the placeholder to the real - * variable. + /* + * Assign the string value stored in the placeholder to the real variable. + * + * XXX this is not really good enough --- it should be a nontransactional + * assignment, since we don't want it to roll back if the current xact + * fails later. */ set_config_option(name, value, pHolder->gen.context, pHolder->gen.source, false, true); - /* Free up stuff occupied by the placeholder variable + /* + * Free up as much as we conveniently can of the placeholder structure + * (this neglects any stack items...) */ - if(value != NULL) - free((void*)value); - - if(pHolder->reset_val != NULL && pHolder->reset_val != value) - free(pHolder->reset_val); - - if(pHolder->session_val != NULL - && pHolder->session_val != value - && pHolder->session_val != pHolder->reset_val) - free(pHolder->session_val); - - if(pHolder->tentative_val != NULL - && pHolder->tentative_val != value - && pHolder->tentative_val != pHolder->reset_val - && pHolder->tentative_val != pHolder->session_val) - free(pHolder->tentative_val); + set_string_field(pHolder, pHolder->variable, NULL); + set_string_field(pHolder, &pHolder->reset_val, NULL); + set_string_field(pHolder, &pHolder->tentative_val, NULL); free(pHolder); } @@ -3754,7 +3979,7 @@ void DefineCustomStringVariable( define_custom_variable(&var->gen); } -extern void EmittWarningsOnPlaceholders(const char* className) +extern void EmitWarningsOnPlaceholders(const char* className) { struct config_generic** vars = guc_variables; struct config_generic** last = vars + num_guc_variables; @@ -5133,5 +5358,14 @@ assign_log_stats(bool newval, bool doit, GucSource source) return true; } +static bool +assign_transaction_read_only(bool newval, bool doit, GucSource source) +{ + if (doit && source >= PGC_S_INTERACTIVE && IsSubTransaction()) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot set transaction read only mode inside a subtransaction"))); + return true; +} #include "guc-file.c" diff --git a/src/backend/utils/mmgr/README b/src/backend/utils/mmgr/README index f705827c1b62e0d4f41922d0cb33101e3789b602..490b781cc913a3997b6355321c0f2d415b370d18 100644 --- a/src/backend/utils/mmgr/README +++ b/src/backend/utils/mmgr/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/utils/mmgr/README,v 1.6 2004/06/05 19:48:09 tgl Exp $ +$PostgreSQL: pgsql/src/backend/utils/mmgr/README,v 1.7 2004/07/01 00:51:29 tgl Exp $ Notes about memory allocation redesign -------------------------------------- @@ -90,7 +90,7 @@ context managers as discussed below. We could even consider getting rid of CurrentMemoryContext entirely, instead requiring the target memory context for allocation to be specified explicitly. But I think that would be too much notational overhead --- -we'd have to pass an apppropriate memory context to called routines in +we'd have to pass an appropriate memory context to called routines in many places. For example, the copyObject routines would need to be passed a context, as would function execution routines that return a pass-by-reference datatype. And what of routines that temporarily @@ -176,15 +176,30 @@ is kept separate from per-transaction and per-portal contexts because a query string might need to live either a longer or shorter time than any single transaction or portal. -TopTransactionContext --- this holds everything that lives until end of -transaction (longer than one statement within a transaction!). An example -of what has to be here is the list of pending NOTIFY messages to be sent -at xact commit. This context will be reset, and all its children deleted, -at conclusion of each transaction cycle. Note: this context is NOT -cleared immediately upon error; its contents will survive until the -transaction block is exited by COMMIT/ROLLBACK. -(If we ever implement nested transactions, TopTransactionContext may need -to be split into a true "top" pointer and a "current transaction" pointer.) +TopTransactionContext --- this holds everything that lives until end of the +top-level transaction. This context will be reset, and all its children +deleted, at conclusion of each top-level transaction cycle. In most cases +you don't want to allocate stuff directly here, but in CurTransactionContext; +what does belong here is control information that exists explicitly to manage +status across multiple subtransactions. Note: this context is NOT cleared +immediately upon error; its contents will survive until the transaction block +is exited by COMMIT/ROLLBACK. + +CurTransactionContext --- this holds data that has to survive until the end +of the current transaction, and in particular will be needed at top-level +transaction commit. When we are in a top-level transaction this is the same +as TopTransactionContext, but in subtransactions it points to a child context. +It is important to understand that if a subtransaction aborts, its +CurTransactionContext is thrown away after finishing the abort processing; +but a committed subtransaction's CurTransactionContext is kept until top-level +commit (unless of course one of the intermediate levels of subtransaction +aborts). This ensures that we do not keep data from a failed subtransaction +longer than necessary. Because of this behavior, you must be careful to clean +up properly during subtransaction abort --- the subtransaction's state must be +delinked from any pointers or lists kept in upper transactions, or you will +have dangling pointers leading to a crash at top-level commit. An example of +data kept here is pending NOTIFY messages, which are sent at top-level commit, +but only if the generating subtransaction did not abort. QueryContext --- this is not actually a separate context, but a global variable pointing to the context that holds the current command's parse diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index c444886e14058f68c7e8fc6cd8772cbeb962df79..96ffb1a8e1ccf1f2d3d730cf3114fae2e3475d25 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.45 2004/06/05 19:48:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.46 2004/07/01 00:51:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,6 +45,7 @@ MemoryContext PostmasterContext = NULL; MemoryContext CacheMemoryContext = NULL; MemoryContext MessageContext = NULL; MemoryContext TopTransactionContext = NULL; +MemoryContext CurTransactionContext = NULL; /* These two are transient links to contexts owned by other objects: */ MemoryContext QueryContext = NULL; diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index f77125cebf5f0f5e63697e1b787d7107fbc1d903..466b2fc97bf635cb638c147593b718b394eeb524 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.65 2004/05/30 23:40:39 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.66 2004/07/01 00:51:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -511,3 +511,94 @@ AtCleanup_Portals(void) PortalDrop(portal, true); } } + +/* + * Pre-subcommit processing for portals. + * + * Reassign the portals created in the current subtransaction to the parent + * transaction. (XXX perhaps we should reassign only holdable cursors, + * and drop the rest?) + */ +void +AtSubCommit_Portals(TransactionId parentXid) +{ + HASH_SEQ_STATUS status; + PortalHashEnt *hentry; + TransactionId curXid = GetCurrentTransactionId(); + + hash_seq_init(&status, PortalHashTable); + + while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL) + { + Portal portal = hentry->portal; + + if (portal->createXact == curXid) + portal->createXact = parentXid; + } +} + +/* + * Subtransaction abort handling for portals. + * + * Deactivate all portals created during the failed subtransaction. + * Note that per AtSubCommit_Portals, this will catch portals created + * in descendants of the subtransaction too. + */ +void +AtSubAbort_Portals(void) +{ + HASH_SEQ_STATUS status; + PortalHashEnt *hentry; + TransactionId curXid = GetCurrentTransactionId(); + + hash_seq_init(&status, PortalHashTable); + + while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL) + { + Portal portal = hentry->portal; + + if (portal->createXact != curXid) + continue; + + portal->portalActive = false; + + /* let portalcmds.c clean up the state it knows about */ + if (PointerIsValid(portal->cleanup)) + { + (*portal->cleanup) (portal, true); + portal->cleanup = NULL; + } + } +} + +/* + * Post-subabort cleanup for portals. + * + * Drop all portals created in the finishing subtransaction and all + * its descendants. + */ +void +AtSubCleanup_Portals(void) +{ + HASH_SEQ_STATUS status; + PortalHashEnt *hentry; + TransactionId curXid = GetCurrentTransactionId(); + + hash_seq_init(&status, PortalHashTable); + + while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL) + { + Portal portal = hentry->portal; + + if (portal->createXact != curXid) + continue; + + /* + * Let's just make sure no one's active... + */ + portal->portalActive = false; + + /* Zap it with prejudice. */ + PortalDrop(portal, true); + } +} diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index a56e59a3d656b5b5dd674fe7c8981346349d7227..446ee4b72c54e7e37cdfcfc0d8de459ca0dfea2b 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -16,13 +16,14 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.72 2003/11/29 19:52:04 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.73 2004/07/01 00:51:33 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/subtrans.h" #include "storage/sinval.h" #include "utils/tqual.h" @@ -115,6 +116,10 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple) if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; + /* deleting subtransaction aborted */ + if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + return true; + Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) @@ -261,6 +266,10 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple) if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; + /* deleting subtransaction aborted */ + if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + return true; + Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) @@ -441,6 +450,10 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid) if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HeapTupleMayBeUpdated; + /* deleting subtransaction aborted */ + if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + return HeapTupleMayBeUpdated; + Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) @@ -575,6 +588,10 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple) if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; + /* deleting subtransaction aborted */ + if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + return true; + Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) @@ -712,6 +729,11 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; + /* deleting subtransaction aborted */ + /* FIXME -- is this correct w.r.t. the cmax of the tuple? */ + if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + return true; + Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) @@ -747,7 +769,7 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) for (i = 0; i < snapshot->xcnt; i++) { - if (TransactionIdEquals(HeapTupleHeaderGetXmin(tuple), + if (SubTransXidsHaveCommonAncestor(HeapTupleHeaderGetXmin(tuple), snapshot->xip[i])) return false; } @@ -792,7 +814,7 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) return true; for (i = 0; i < snapshot->xcnt; i++) { - if (TransactionIdEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xip[i])) + if (SubTransXidsHaveCommonAncestor(HeapTupleHeaderGetXmax(tuple), snapshot->xip[i])) return true; } } @@ -868,8 +890,8 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; - Assert(HeapTupleHeaderGetXmin(tuple) == - HeapTupleHeaderGetXmax(tuple)); + Assert(SubTransXidsHaveCommonAncestor(HeapTupleHeaderGetXmin(tuple), + HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ @@ -943,7 +965,7 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin) * Deleter committed, but check special cases. */ - if (TransactionIdEquals(HeapTupleHeaderGetXmin(tuple), + if (SubTransXidsHaveCommonAncestor(HeapTupleHeaderGetXmin(tuple), HeapTupleHeaderGetXmax(tuple))) { /* diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 11bc08ed977b8626d9cf0578247b3feddaeb5854..8b4ac1e29e70a2260c3655822caecd1d1fc065e3 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -39,7 +39,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * Portions taken from FreeBSD. * - * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.40 2004/06/24 19:26:59 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.41 2004/07/01 00:51:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1828,7 +1828,7 @@ main(int argc, char *argv[]) char *pgdenv; /* PGDATA value got from sent to * environment */ char *subdirs[] = - {"global", "pg_xlog", "pg_clog", "base", "base/1", "pg_tblspc"}; + {"global", "pg_xlog", "pg_clog", "pg_subtrans", "base", "base/1", "pg_tblspc"}; progname = get_progname(argv[0]); set_pglocale_pgservice(argv[0], "initdb"); diff --git a/src/include/access/clog.h b/src/include/access/clog.h index bd7f4152be3b244efe7a115e54429d07eded407f..2df1cedc1c9f21286ce070286e5c12ba0308cc63 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/clog.h,v 1.8 2003/11/29 22:40:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/clog.h,v 1.9 2004/07/01 00:51:38 tgl Exp $ */ #ifndef CLOG_H #define CLOG_H @@ -16,13 +16,16 @@ /* * Possible transaction statuses --- note that all-zeroes is the initial * state. + * + * A "subcommitted" transaction is a committed subtransaction whose parent + * hasn't committed or aborted yet. */ typedef int XidStatus; #define TRANSACTION_STATUS_IN_PROGRESS 0x00 #define TRANSACTION_STATUS_COMMITTED 0x01 #define TRANSACTION_STATUS_ABORTED 0x02 -/* 0x03 is available without changing commit log space allocation */ +#define TRANSACTION_STATUS_SUB_COMMITTED 0x03 /* exported because lwlock.c needs it */ #define NUM_CLOG_BUFFERS 8 @@ -39,12 +42,6 @@ extern void ShutdownCLOG(void); extern void CheckPointCLOG(void); extern void ExtendCLOG(TransactionId newestXact); extern void TruncateCLOG(TransactionId oldestXact); - -/* XLOG stuff */ -#define CLOG_ZEROPAGE 0x00 - -extern void clog_redo(XLogRecPtr lsn, XLogRecord *record); -extern void clog_undo(XLogRecPtr lsn, XLogRecord *record); -extern void clog_desc(char *buf, uint8 xl_info, char *rec); +extern void clog_zeropage_redo(int pageno); #endif /* CLOG_H */ diff --git a/src/include/access/gistscan.h b/src/include/access/gistscan.h index b8466429959cc91c04fae093ed7b4110f3e46de9..4022f542752fa83e6d50015804cec65fe3bf96bc 100644 --- a/src/include/access/gistscan.h +++ b/src/include/access/gistscan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/gistscan.h,v 1.22 2003/11/29 22:40:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/gistscan.h,v 1.23 2004/07/01 00:51:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,5 +23,6 @@ extern Datum gistrestrpos(PG_FUNCTION_ARGS); extern Datum gistendscan(PG_FUNCTION_ARGS); extern void gistadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum); extern void AtEOXact_gist(void); +extern void AtEOSubXact_gist(TransactionId childXid); #endif /* GISTSCAN_H */ diff --git a/src/include/access/hash.h b/src/include/access/hash.h index ffeea63417b522794ff232b16787015287f21449..2088cc2f5a6c25d1c0ba6f7a3cff939ce088c46d 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.54 2003/11/29 22:40:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.55 2004/07/01 00:51:38 tgl Exp $ * * NOTES * modeled after Margo Seltzer's hash implementation for unix. @@ -293,6 +293,7 @@ extern void _hash_regscan(IndexScanDesc scan); extern void _hash_dropscan(IndexScanDesc scan); extern bool _hash_has_active_scan(Relation rel, Bucket bucket); extern void AtEOXact_hash(void); +extern void AtEOSubXact_hash(TransactionId childXid); /* hashsearch.c */ extern bool _hash_next(IndexScanDesc scan, ScanDirection dir); diff --git a/src/include/access/htup.h b/src/include/access/htup.h index 3d48b5f45a3ad5657647881150453152cfb367b4..fdcfc8dc6f1f196071ebcd61ff0eec9b91ea946a 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.65 2004/04/01 21:28:45 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.66 2004/07/01 00:51:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,18 +109,14 @@ typedef struct HeapTupleFields { TransactionId t_xmin; /* inserting xact ID */ - - union - { - CommandId t_cmin; /* inserting command ID */ - TransactionId t_xmax; /* deleting xact ID */ - } t_field2; + CommandId t_cmin; /* inserting command ID */ + TransactionId t_xmax; /* deleting xact ID */ union { CommandId t_cmax; /* deleting command ID */ TransactionId t_xvac; /* VACUUM FULL xact ID */ - } t_field3; + } t_field4; } HeapTupleFields; typedef struct DatumTupleFields @@ -172,9 +168,7 @@ typedef HeapTupleHeaderData *HeapTupleHeader; * attribute(s) */ #define HEAP_HASEXTENDED 0x000C /* the two above combined */ #define HEAP_HASOID 0x0010 /* has an object-id field */ -/* bit 0x0020 is presently unused */ -#define HEAP_XMAX_IS_XMIN 0x0040 /* created and deleted in the same - * transaction */ +/* 0x0020 and 0x0040 are unused */ #define HEAP_XMAX_UNLOGGED 0x0080 /* to lock tuple for update * without logging */ #define HEAP_XMIN_COMMITTED 0x0100 /* t_xmin committed */ @@ -211,62 +205,47 @@ typedef HeapTupleHeaderData *HeapTupleHeader; #define HeapTupleHeaderGetXmax(tup) \ ( \ - ((tup)->t_infomask & HEAP_XMAX_IS_XMIN) ? \ - (tup)->t_choice.t_heap.t_xmin \ - : \ - (tup)->t_choice.t_heap.t_field2.t_xmax \ + (tup)->t_choice.t_heap.t_xmax \ ) #define HeapTupleHeaderSetXmax(tup, xid) \ -do { \ - TransactionId _newxid = (xid); \ - if (TransactionIdEquals((tup)->t_choice.t_heap.t_xmin, _newxid)) \ - (tup)->t_infomask |= HEAP_XMAX_IS_XMIN; \ - else \ - { \ - (tup)->t_infomask &= ~HEAP_XMAX_IS_XMIN; \ - TransactionIdStore(_newxid, &(tup)->t_choice.t_heap.t_field2.t_xmax); \ - } \ -} while (0) +( \ + TransactionIdStore((xid), &(tup)->t_choice.t_heap.t_xmax) \ +) -/* - * Note: GetCmin will produce wrong answers after SetXmax has been executed - * by a transaction other than the inserting one. We could check - * HEAP_XMAX_INVALID and return FirstCommandId if it's clear, but since that - * bit will be set again if the deleting transaction aborts, there'd be no - * real gain in safety from the extra test. So, just rely on the caller not - * to trust the value unless it's meaningful. - */ #define HeapTupleHeaderGetCmin(tup) \ ( \ - (tup)->t_choice.t_heap.t_field2.t_cmin \ + (tup)->t_choice.t_heap.t_cmin \ ) #define HeapTupleHeaderSetCmin(tup, cid) \ -do { \ - Assert((tup)->t_infomask & HEAP_XMAX_INVALID); \ - (tup)->t_choice.t_heap.t_field2.t_cmin = (cid); \ -} while (0) +( \ + (tup)->t_choice.t_heap.t_cmin = (cid) \ +) /* - * As with GetCmin, we can't completely ensure that GetCmax can detect whether - * a valid command ID is available, and there's little point in a partial test. + * Note: GetCmax will produce wrong answers after SetXvac has been executed + * by a transaction other than the inserting one. We could check + * HEAP_XMAX_INVALID and return FirstCommandId if it's clear, but since that + * bit will be set again if the deleting transaction aborts, there'd be no + * real gain in safety from the extra test. So, just rely on the caller not + * to trust the value unless it's meaningful. */ #define HeapTupleHeaderGetCmax(tup) \ ( \ - (tup)->t_choice.t_heap.t_field3.t_cmax \ + (tup)->t_choice.t_heap.t_field4.t_cmax \ ) #define HeapTupleHeaderSetCmax(tup, cid) \ do { \ Assert(!((tup)->t_infomask & HEAP_MOVED)); \ - (tup)->t_choice.t_heap.t_field3.t_cmax = (cid); \ + (tup)->t_choice.t_heap.t_field4.t_cmax = (cid); \ } while (0) #define HeapTupleHeaderGetXvac(tup) \ ( \ ((tup)->t_infomask & HEAP_MOVED) ? \ - (tup)->t_choice.t_heap.t_field3.t_xvac \ + (tup)->t_choice.t_heap.t_field4.t_xvac \ : \ InvalidTransactionId \ ) @@ -274,7 +253,7 @@ do { \ #define HeapTupleHeaderSetXvac(tup, xid) \ do { \ Assert((tup)->t_infomask & HEAP_MOVED); \ - TransactionIdStore((xid), &(tup)->t_choice.t_heap.t_field3.t_xvac); \ + TransactionIdStore((xid), &(tup)->t_choice.t_heap.t_field4.t_xvac); \ } while (0) #define HeapTupleHeaderGetDatumLength(tup) \ diff --git a/src/include/access/rmgr.h b/src/include/access/rmgr.h index e63e3fbc31d88873fddcf82efc8dd2e62caaaa3c..7ea3134031d7d72948e90b6e282203cf30329468 100644 --- a/src/include/access/rmgr.h +++ b/src/include/access/rmgr.h @@ -3,7 +3,7 @@ * * Resource managers definition * - * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.10 2003/11/29 22:40:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.11 2004/07/01 00:51:38 tgl Exp $ */ #ifndef RMGR_H #define RMGR_H @@ -16,7 +16,7 @@ typedef uint8 RmgrId; #define RM_XLOG_ID 0 #define RM_XACT_ID 1 #define RM_SMGR_ID 2 -#define RM_CLOG_ID 3 +#define RM_SLRU_ID 3 #define RM_HEAP_ID 10 #define RM_BTREE_ID 11 #define RM_HASH_ID 12 diff --git a/src/include/access/rtree.h b/src/include/access/rtree.h index fdb33eba06fe929c2bc552a1c912f18785587a4e..5b5347e9a08a080d7cd1bb0476368fc7e0396230 100644 --- a/src/include/access/rtree.h +++ b/src/include/access/rtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/rtree.h,v 1.32 2003/11/29 22:40:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/rtree.h,v 1.33 2004/07/01 00:51:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -130,6 +130,7 @@ extern void rtree_desc(char *buf, uint8 xl_info, char *rec); extern void rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum); extern void AtEOXact_rtree(void); +extern void AtEOSubXact_rtree(TransactionId childXid); /* rtstrat.c */ extern StrategyNumber RTMapToInternalOperator(StrategyNumber strat); diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 213cca5c21654510ba3f78ed90f20ed981bc6e7a..e3245fac6583e3b972a649cacc5eb352389fb463 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -6,11 +6,12 @@ * Portions Copyright (c) 2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/slru.h,v 1.6 2004/05/31 03:48:08 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/slru.h,v 1.7 2004/07/01 00:51:38 tgl Exp $ */ #ifndef SLRU_H #define SLRU_H +#include "access/xlog.h" #include "storage/lwlock.h" @@ -56,4 +57,12 @@ extern void SimpleLruSetLatestPage(SlruCtl ctl, int pageno); extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint); extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); +/* XLOG stuff */ +#define CLOG_ZEROPAGE 0x00 +#define SUBTRANS_ZEROPAGE 0x10 + +extern void slru_redo(XLogRecPtr lsn, XLogRecord *record); +extern void slru_undo(XLogRecPtr lsn, XLogRecord *record); +extern void slru_desc(char *buf, uint8 xl_info, char *rec); + #endif /* SLRU_H */ diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h new file mode 100644 index 0000000000000000000000000000000000000000..2c601752d123188f4af323478e6f6c1de94541f5 --- /dev/null +++ b/src/include/access/subtrans.h @@ -0,0 +1,35 @@ +/* + * subtrans.h + * + * PostgreSQL subtrans-log manager + * + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/access/subtrans.h,v 1.1 2004/07/01 00:51:38 tgl Exp $ + */ +#ifndef SUBTRANS_H +#define SUBTRANS_H + +#include "access/xlog.h" + +/* exported because lwlock.c needs it */ +/* cannot be different from NUM_CLOG_BUFFERS without slru.c changes */ +#define NUM_SUBTRANS_BUFFERS NUM_CLOG_BUFFERS + +extern void SubTransSetParent(TransactionId xid, TransactionId parent); +extern TransactionId SubTransGetParent(TransactionId xid); +extern TransactionId SubTransGetTopmostTransaction(TransactionId xid); +extern bool SubTransXidsHaveCommonAncestor(TransactionId xid1, TransactionId xid2); + +extern int SUBTRANSShmemSize(void); +extern void SUBTRANSShmemInit(void); +extern void BootStrapSUBTRANS(void); +extern void StartupSUBTRANS(void); +extern void ShutdownSUBTRANS(void); +extern void CheckPointSUBTRANS(void); +extern void ExtendSUBTRANS(TransactionId newestXact); +extern void TruncateSUBTRANS(TransactionId oldestXact); +extern void subtrans_zeropage_redo(int pageno); + +#endif /* SUBTRANS_H */ diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 3a2cad7bb03f42a6857055715737959f188d6e40..44e0ff6ea7577e052870d77a9df2c0f710fc79ec 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/transam.h,v 1.48 2003/11/29 22:40:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/transam.h,v 1.49 2004/07/01 00:51:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -107,13 +107,16 @@ extern bool TransactionIdDidCommit(TransactionId transactionId); extern bool TransactionIdDidAbort(TransactionId transactionId); extern void TransactionIdCommit(TransactionId transactionId); extern void TransactionIdAbort(TransactionId transactionId); +extern void TransactionIdSubCommit(TransactionId transactionId); +extern void TransactionIdCommitTree(int nxids, TransactionId *xids); +extern void TransactionIdAbortTree(int nxids, TransactionId *xids); extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); /* in transam/varsup.c */ -extern TransactionId GetNewTransactionId(void); +extern TransactionId GetNewTransactionId(bool isSubXact); extern TransactionId ReadNewTransactionId(void); extern Oid GetNewObjectId(void); extern void CheckMaxObjectId(Oid assigned_oid); diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 53a585ec6947f256f19dacc58ccfd1120fe22419..c5b66afd0df9fb19b9dab2488909dc7fda7411eb 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.63 2004/05/22 23:14:38 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.64 2004/07/01 00:51:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -63,7 +63,15 @@ typedef enum TBlockState TBLOCK_INPROGRESS, TBLOCK_END, TBLOCK_ABORT, - TBLOCK_ENDABORT + TBLOCK_ENDABORT, + + TBLOCK_SUBBEGIN, + TBLOCK_SUBBEGINABORT, + TBLOCK_SUBINPROGRESS, + TBLOCK_SUBEND, + TBLOCK_SUBABORT, + TBLOCK_SUBENDABORT_OK, + TBLOCK_SUBENDABORT_ERROR } TBlockState; /* @@ -76,12 +84,15 @@ typedef void (*EOXactCallback) (bool isCommit, void *arg); */ typedef struct TransactionStateData { - TransactionId transactionIdData; - CommandId commandId; - AbsoluteTime startTime; - int startTimeUsec; - TransState state; - TBlockState blockState; + TransactionId transactionIdData; /* my XID */ + CommandId commandId; /* current CID */ + TransState state; /* low-level state */ + TBlockState blockState; /* high-level state */ + int nestingLevel; /* nest depth */ + MemoryContext curTransactionContext; /* my xact-lifetime context */ + List *childXids; /* subcommitted child XIDs */ + AclId currentUser; /* subxact start current_user */ + struct TransactionStateData *parent; /* back link to parent */ } TransactionStateData; typedef TransactionStateData *TransactionState; @@ -102,9 +113,11 @@ typedef TransactionStateData *TransactionState; typedef struct xl_xact_commit { time_t xtime; + int nrels; /* number of RelFileNodes */ + int nsubxacts; /* number of subtransaction XIDs */ /* Array of RelFileNode(s) to drop at commit */ - /* The XLOG record length determines how many there are */ RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */ + /* ARRAY OF COMMITTED SUBTRANSACTION XIDs FOLLOWS */ } xl_xact_commit; #define MinSizeOfXactCommit offsetof(xl_xact_commit, xnodes) @@ -112,9 +125,11 @@ typedef struct xl_xact_commit typedef struct xl_xact_abort { time_t xtime; + int nrels; /* number of RelFileNodes */ + int nsubxacts; /* number of subtransaction XIDs */ /* Array of RelFileNode(s) to drop at abort */ - /* The XLOG record length determines how many there are */ RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */ + /* ARRAY OF ABORTED SUBTRANSACTION XIDs FOLLOWS */ } xl_xact_abort; #define MinSizeOfXactAbort offsetof(xl_xact_abort, xnodes) @@ -126,18 +141,20 @@ typedef struct xl_xact_abort */ extern bool IsTransactionState(void); extern bool IsAbortedTransactionBlockState(void); +extern TransactionId GetTopTransactionId(void); extern TransactionId GetCurrentTransactionId(void); extern CommandId GetCurrentCommandId(void); extern AbsoluteTime GetCurrentTransactionStartTime(void); extern AbsoluteTime GetCurrentTransactionStartTimeUsec(int *usec); +extern int GetCurrentTransactionNestLevel(void); extern bool TransactionIdIsCurrentTransactionId(TransactionId xid); -extern bool CommandIdIsCurrentCommandId(CommandId cid); extern void CommandCounterIncrement(void); extern void StartTransactionCommand(void); extern void CommitTransactionCommand(void); extern void AbortCurrentTransaction(void); extern void BeginTransactionBlock(void); extern void EndTransactionBlock(void); +extern bool IsSubTransaction(void); extern bool IsTransactionBlock(void); extern bool IsTransactionOrTransactionBlock(void); extern char TransactionBlockStatusCode(void); @@ -151,6 +168,8 @@ extern void UnregisterEOXactCallback(EOXactCallback callback, void *arg); extern void RecordTransactionCommit(void); +extern int xactGetCommittedChildren(TransactionId **ptr, bool metoo); + extern void XactPushRollback(void (*func) (void *), void *data); extern void XactPopRollback(void); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 0e44e77446d650f98243117fbf26041903c1a378..1c29ab076268cf0ad81a818a7a8efc21698136b8 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.51 2004/05/29 22:48:22 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.52 2004/07/01 00:51:38 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -111,7 +111,7 @@ typedef struct XLogContRecord /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD05A /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD05B /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index ac6f8a1fa0cdddb4218c74ca2093644986d95941..b6f98778628edc1fb971e5533fa89d2ba78b65d9 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.240 2004/06/25 17:20:28 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.241 2004/07/01 00:51:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200406251 +#define CATALOG_VERSION_NO 200406261 #endif diff --git a/src/include/commands/async.h b/src/include/commands/async.h index 6429895fbdca527889ec3b35e263366162d75615..47bd91aaaa4cd57ffceeddea5bb610760f39d85d 100644 --- a/src/include/commands/async.h +++ b/src/include/commands/async.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/commands/async.h,v 1.24 2004/05/23 03:50:45 tgl Exp $ + * $PostgreSQL: pgsql/src/include/commands/async.h,v 1.25 2004/07/01 00:51:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,9 @@ extern void Async_Unlisten(char *relname, int pid); /* perform (or cancel) outbound notify processing at transaction commit */ extern void AtCommit_Notify(void); extern void AtAbort_Notify(void); +extern void AtSubStart_Notify(void); +extern void AtSubCommit_Notify(void); +extern void AtSubAbort_Notify(void); /* signal handler for inbound notifies (SIGUSR2) */ extern void NotifyInterruptHandler(SIGNAL_ARGS); diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index f9f03c1bd0349204118803c0dffa88e8bcfc23f8..73021fbb91f86e02dd593e8df46396a7667c46ca 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/commands/tablecmds.h,v 1.16 2004/05/05 04:48:47 tgl Exp $ + * $PostgreSQL: pgsql/src/include/commands/tablecmds.h,v 1.17 2004/07/01 00:51:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -42,6 +42,9 @@ extern void register_on_commit_action(Oid relid, OnCommitAction action); extern void remove_on_commit_action(Oid relid); extern void PreCommit_on_commit_actions(void); -extern void AtEOXact_on_commit_actions(bool isCommit); +extern void AtEOXact_on_commit_actions(bool isCommit, TransactionId xid); +extern void AtEOSubXact_on_commit_actions(bool isCommit, + TransactionId childXid, + TransactionId parentXid); #endif /* TABLECMDS_H */ diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 9083c1395f2d37b03cfa90dd984b5445fd905c3f..f9e4b2a396e8e0ec8bd0d3f969c7dec58dc88d82 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/commands/trigger.h,v 1.45 2003/11/29 22:40:59 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/commands/trigger.h,v 1.46 2004/07/01 00:51:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -151,44 +151,12 @@ extern void ExecARUpdateTriggers(EState *estate, ItemPointer tupleid, HeapTuple newtuple); - -/* - * Deferred trigger stuff - */ -typedef struct DeferredTriggerStatusData -{ - Oid dts_tgoid; - bool dts_tgisdeferred; -} DeferredTriggerStatusData; - -typedef struct DeferredTriggerStatusData *DeferredTriggerStatus; - -typedef struct DeferredTriggerEventItem -{ - Oid dti_tgoid; - int32 dti_state; -} DeferredTriggerEventItem; - -typedef struct DeferredTriggerEventData *DeferredTriggerEvent; - -typedef struct DeferredTriggerEventData -{ - DeferredTriggerEvent dte_next; /* list link */ - int32 dte_event; - Oid dte_relid; - ItemPointerData dte_oldctid; - ItemPointerData dte_newctid; - int32 dte_n_items; - /* dte_item is actually a variable-size array, of length dte_n_items */ - DeferredTriggerEventItem dte_item[1]; -} DeferredTriggerEventData; - - -extern void DeferredTriggerInit(void); extern void DeferredTriggerBeginXact(void); extern void DeferredTriggerEndQuery(void); extern void DeferredTriggerEndXact(void); extern void DeferredTriggerAbortXact(void); +extern void DeferredTriggerBeginSubXact(void); +extern void DeferredTriggerEndSubXact(bool isCommit); extern void DeferredTriggerSetState(ConstraintsSetStmt *stmt); diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h index 2e477e70f87c2c6953dab682396f37526ab4e981..e283b55cecd194a812905c48945058351edaa59f 100644 --- a/src/include/executor/spi.h +++ b/src/include/executor/spi.h @@ -2,7 +2,7 @@ * * spi.h * - * $PostgreSQL: pgsql/src/include/executor/spi.h,v 1.44 2004/04/01 21:28:46 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/spi.h,v 1.45 2004/07/01 00:51:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -119,5 +119,6 @@ extern void SPI_cursor_move(Portal portal, bool forward, int count); extern void SPI_cursor_close(Portal portal); extern void AtEOXact_SPI(bool isCommit); +extern void AtEOSubXact_SPI(bool isCommit, TransactionId childXid); #endif /* SPI_H */ diff --git a/src/include/executor/spi_priv.h b/src/include/executor/spi_priv.h index dcafa1ccb9a060074db8f11a6f28913688e540c6..2785f6fe28126ff29f27b305eb241806c168d921 100644 --- a/src/include/executor/spi_priv.h +++ b/src/include/executor/spi_priv.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/spi_priv.h,v 1.18 2004/03/21 22:29:11 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/spi_priv.h,v 1.19 2004/07/01 00:51:42 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,7 @@ typedef struct MemoryContext procCxt; /* procedure context */ MemoryContext execCxt; /* executor context */ MemoryContext savedcxt; + TransactionId connectXid; /* Xid of connecting transaction */ } _SPI_connection; typedef struct diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 7defaf93f885c782e213d3316004e5e9c4aa8b5a..e992751f856b01751e79825e982a37a414e16db8 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.82 2004/05/31 19:24:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.83 2004/07/01 00:51:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -148,6 +148,8 @@ extern void InitBufferPoolAccess(void); extern char *ShowBufferUsage(void); extern void ResetBufferUsage(void); extern void AtEOXact_Buffers(bool isCommit); +extern void AtSubStart_Buffers(void); +extern void AtEOSubXact_Buffers(bool isCommit); extern void FlushBufferPool(void); extern BlockNumber BufferGetBlockNumber(Buffer buffer); extern BlockNumber RelationGetNumberOfBlocks(Relation relation); diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 5f8012e0ed62aab66d046287b7d5aa02caf95edc..727ec508a3b1ecd9620d5ea6eea34c0cac7ef876 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.58 2004/06/05 17:42:46 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.59 2004/07/01 00:51:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -126,10 +126,11 @@ typedef struct PageHeaderData typedef PageHeaderData *PageHeader; /* - * Page layout version number 0 is for pre-7.3 Postgres releases. The - * current version number is 1, denoting a new HeapTupleHeader layout. + * Page layout version number 0 is for pre-7.3 Postgres releases. + * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout. + * Release 7.5 changed the HeapTupleHeader layout again. */ -#define PG_PAGE_LAYOUT_VERSION 1 +#define PG_PAGE_LAYOUT_VERSION 2 /* ---------------------------------------------------------------- diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 8c7159c0cb0250e4191d59cdbee22f6b872ee7f8..650b326949733fd0a596ba224227324039f7bc6a 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.77 2004/05/28 05:13:29 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.78 2004/07/01 00:51:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,6 +26,14 @@ typedef struct PROC_QUEUE int size; /* number of entries in list */ } PROC_QUEUE; +/* Release options for LockReleaseAll */ +typedef enum +{ + ReleaseAll, /* All my locks */ + ReleaseAllExceptSession, /* All except session locks (Xid = 0) */ + ReleaseGivenXids /* Only locks with Xids in given array */ +} LockReleaseWhich; + /* struct PGPROC is declared in storage/proc.h, but must forward-reference it */ typedef struct PGPROC PGPROC; @@ -165,11 +173,12 @@ typedef struct LOCK * * There are two possible kinds of proclock tags: a transaction (identified * both by the PGPROC of the backend running it, and the xact's own ID) and - * a session (identified by backend PGPROC, with xid = InvalidTransactionId). + * a session (identified by backend PGPROC, with XID = InvalidTransactionId). * * Currently, session proclocks are used for user locks and for cross-xact - * locks obtained for VACUUM. We assume that a session lock never conflicts - * with per-transaction locks obtained by the same backend. + * locks obtained for VACUUM. Note that a single backend can hold locks + * under several different XIDs at once (including session locks). We treat + * such locks as never conflicting (a backend can never block itself). * * The holding[] array counts the granted locks (of each type) represented * by this proclock. Note that there will be a proclock object, possibly with @@ -177,11 +186,11 @@ typedef struct LOCK * Otherwise, proclock objects whose counts have gone to zero are recycled * as soon as convenient. * - * Each PROCLOCK object is linked into lists for both the associated LOCK object - * and the owning PGPROC object. Note that the PROCLOCK is entered into these - * lists as soon as it is created, even if no lock has yet been granted. - * A PGPROC that is waiting for a lock to be granted will also be linked into - * the lock's waitProcs queue. + * Each PROCLOCK object is linked into lists for both the associated LOCK + * object and the owning PGPROC object. Note that the PROCLOCK is entered + * into these lists as soon as it is created, even if no lock has yet been + * granted. A PGPROC that is waiting for a lock to be granted will also be + * linked into the lock's waitProcs queue. */ typedef struct PROCLOCKTAG { @@ -239,7 +248,7 @@ extern bool LockAcquire(LOCKMETHODID lockmethodid, LOCKTAG *locktag, extern bool LockRelease(LOCKMETHODID lockmethodid, LOCKTAG *locktag, TransactionId xid, LOCKMODE lockmode); extern bool LockReleaseAll(LOCKMETHODID lockmethodid, PGPROC *proc, - bool allxids, TransactionId xid); + LockReleaseWhich which, int nxids, TransactionId *xids); extern int LockCheckConflicts(LockMethod lockMethodTable, LOCKMODE lockmode, LOCK *lock, PROCLOCK *proclock, PGPROC *proc, diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index c7283f374cfdd594bec3f3d122fbd0ff55f35d82..1551d7568c597718069529077a583b3fd950ef7c 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.67 2003/12/01 21:59:25 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.68 2004/07/01 00:51:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -103,7 +103,8 @@ extern int ProcGlobalSemas(int maxBackends); extern void InitProcGlobal(int maxBackends); extern void InitProcess(void); extern void InitDummyProcess(int proctype); -extern void ProcReleaseLocks(bool isCommit); +extern void ProcReleaseLocks(LockReleaseWhich which, + int nxids, TransactionId *xids); extern void ProcQueueInit(PROC_QUEUE *queue); extern int ProcSleep(LockMethod lockMethodTable, LOCKMODE lockmode, diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 52040432dcc9c9992b4930954150d9e49a270aee..e4f0930ef7ab60f94889f8205b7afdadf3bf27ff 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.44 2004/06/02 17:28:18 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.45 2004/07/01 00:51:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -66,6 +66,9 @@ extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks, extern void smgrimmedsync(SMgrRelation reln); extern void smgrDoPendingDeletes(bool isCommit); extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr); +extern void AtSubStart_smgr(void); +extern void AtSubCommit_smgr(void); +extern void AtSubAbort_smgr(void); extern void smgrcommit(void); extern void smgrabort(void); extern void smgrsync(void); diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 9c8d3053fec70ea0c5490702a183cd82fd902031..3ce54b99a253880b91fe06d1b5479c491c6cd9ec 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -13,7 +13,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/catcache.h,v 1.48 2003/11/29 22:41:15 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/utils/catcache.h,v 1.49 2004/07/01 00:51:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -101,6 +101,9 @@ typedef struct catctup * and negative entries is identical. */ int refcount; /* number of active references */ + int *prev_refcount; /* refcounts for upper subtransactions */ + int numpushes; /* number of used refcounts in the array */ + int numalloc; /* allocated size of array */ bool dead; /* dead but not yet removed? */ bool negative; /* negative cache entry? */ uint32 hash_value; /* hash value for this tuple's keys */ @@ -139,6 +142,9 @@ typedef struct catclist */ Dlelem cache_elem; /* list member of per-catcache list */ int refcount; /* number of active references */ + int *prev_refcount; /* refcounts for upper subtransactions */ + int numpushes; /* number of used refcounts in the array */ + int numalloc; /* allocated size of array */ bool dead; /* dead but not yet removed? */ bool ordered; /* members listed in index order? */ short nkeys; /* number of lookup keys specified */ @@ -163,6 +169,8 @@ extern DLLIMPORT MemoryContext CacheMemoryContext; extern void CreateCacheMemoryContext(void); extern void AtEOXact_CatCache(bool isCommit); +extern void AtSubStart_CatCache(void); +extern void AtEOSubXact_CatCache(bool isCommit); extern CatCache *InitCatCache(int id, const char *relname, const char *indname, int reloidattr, diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index b91682af88f556f8be6c03030b1594b450df9dfb..0a510cd66537c1573bf31fc982177cf4de8c24ad 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -7,7 +7,7 @@ * Copyright (c) 2000-2003, PostgreSQL Global Development Group * Written by Peter Eisentraut <peter_e@gmx.net>. * - * $PostgreSQL: pgsql/src/include/utils/guc.h,v 1.47 2004/05/28 05:13:32 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/guc.h,v 1.48 2004/07/01 00:51:44 tgl Exp $ *-------------------------------------------------------------------- */ #ifndef GUC_H @@ -175,14 +175,14 @@ extern void DefineCustomStringVariable( GucStringAssignHook assign_hook, GucShowHook show_hook); -extern void EmittWarningsOnPlaceholders(const char* className); +extern void EmitWarningsOnPlaceholders(const char* className); extern const char *GetConfigOption(const char *name); extern const char *GetConfigOptionResetString(const char *name); extern void ProcessConfigFile(GucContext context); extern void InitializeGUCOptions(void); extern void ResetAllOptions(void); -extern void AtEOXact_GUC(bool isCommit); +extern void AtEOXact_GUC(bool isCommit, bool isSubXact); extern void BeginReportingGUCOptions(void); extern void ParseLongOption(const char *string, char **name, char **value); extern bool set_config_option(const char *name, const char *value, diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 62d2d571b292d1d4423c87c3391b8a9013313ba7..d522f6d5e9418097ae31ff5e4fa1570847559a00 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -7,12 +7,31 @@ * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/utils/guc_tables.h,v 1.11 2004/05/26 15:07:41 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/guc_tables.h,v 1.12 2004/07/01 00:51:44 tgl Exp $ * *------------------------------------------------------------------------- */ -#ifndef GUC_TABLES -#define GUC_TABLES 1 +#ifndef GUC_TABLES_H +#define GUC_TABLES_H 1 + +/* + * GUC supports these types of variables: + */ +enum config_type +{ + PGC_BOOL, + PGC_INT, + PGC_REAL, + PGC_STRING +}; + +union config_var_value +{ + bool boolval; + int intval; + double realval; + char *stringval; +}; /* * Groupings to help organize all the run-time options for display @@ -56,15 +75,19 @@ enum config_group }; /* - * GUC supports these types of variables: + * Stack entry for saving the state of a variable prior to the current + * transaction */ -enum config_type +typedef struct guc_stack { - PGC_BOOL, - PGC_INT, - PGC_REAL, - PGC_STRING -}; + struct guc_stack *prev; /* previous stack item, if any */ + int nest_level; /* nesting depth of cur transaction */ + int status; /* previous status bits, see below */ + GucSource tentative_source; /* source of the tentative_value */ + GucSource source; /* source of the actual value */ + union config_var_value tentative_val; /* previous tentative val */ + union config_var_value value; /* previous actual value */ +} GucStack; /* * Generic fields applicable to all types of variables @@ -86,9 +109,9 @@ struct config_generic enum config_type vartype; /* type of variable (set only at startup) */ int status; /* status bits, see below */ GucSource reset_source; /* source of the reset_value */ - GucSource session_source; /* source of the session_value */ GucSource tentative_source; /* source of the tentative_value */ GucSource source; /* source of the current actual value */ + GucStack *stack; /* stacked outside-of-transaction states */ }; /* bit values in flags field */ @@ -104,6 +127,7 @@ struct config_generic /* bit values in status field */ #define GUC_HAVE_TENTATIVE 0x0001 /* tentative value is defined */ #define GUC_HAVE_LOCAL 0x0002 /* a SET LOCAL has been executed */ +#define GUC_HAVE_STACK 0x0004 /* we have stacked prior value(s) */ /* GUC records for specific variable types */ @@ -118,7 +142,6 @@ struct config_bool GucBoolAssignHook assign_hook; GucShowHook show_hook; /* variable fields, initialized at runtime: */ - bool session_val; bool tentative_val; }; @@ -134,7 +157,6 @@ struct config_int GucIntAssignHook assign_hook; GucShowHook show_hook; /* variable fields, initialized at runtime: */ - int session_val; int tentative_val; }; @@ -150,7 +172,6 @@ struct config_real GucRealAssignHook assign_hook; GucShowHook show_hook; /* variable fields, initialized at runtime: */ - double session_val; double tentative_val; }; @@ -165,7 +186,6 @@ struct config_string GucShowHook show_hook; /* variable fields, initialized at runtime: */ char *reset_val; - char *session_val; char *tentative_val; }; @@ -180,4 +200,4 @@ extern struct config_generic **get_guc_variables(void); extern void build_guc_variables(void); -#endif +#endif /* GUC_TABLES_H */ diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index a2bad9cd06ce4483e73318ee95d7b932b96192f3..add5ca83c713842f10c2720d9c5170af772d8c6d 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.31 2004/05/06 16:10:57 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.32 2004/07/01 00:51:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,9 +22,15 @@ typedef void (*CacheCallbackFunction) (Datum arg, Oid relid); extern void AcceptInvalidationMessages(void); -extern void AtEOXactInvalidationMessages(bool isCommit); +extern void AtStart_Inval(void); -extern void CommandEndInvalidationMessages(bool isCommit); +extern void AtSubStart_Inval(void); + +extern void AtEOXact_Inval(bool isCommit); + +extern void AtSubEOXact_Inval(bool isCommit); + +extern void CommandEndInvalidationMessages(void); extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple); diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index 7865859c062412c7a85f4970641f7b440553a627..d2d7d4a90939ee459576d612e4d163ca5696b928 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -10,7 +10,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/memutils.h,v 1.55 2004/06/05 19:48:09 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/memutils.h,v 1.56 2004/07/01 00:51:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -73,6 +73,7 @@ extern DLLIMPORT MemoryContext PostmasterContext; extern DLLIMPORT MemoryContext CacheMemoryContext; extern DLLIMPORT MemoryContext MessageContext; extern DLLIMPORT MemoryContext TopTransactionContext; +extern DLLIMPORT MemoryContext CurTransactionContext; /* These two are transient links to contexts owned by other objects: */ extern DLLIMPORT MemoryContext QueryContext; diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h index 2819295e837e38c601322b8720b46addd8117886..3437dc448a3d7bcbbc5614a9514d4aa67e84e3be 100644 --- a/src/include/utils/portal.h +++ b/src/include/utils/portal.h @@ -39,7 +39,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.48 2003/11/29 22:41:16 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.49 2004/07/01 00:51:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -167,6 +167,9 @@ extern void EnablePortalManager(void); extern void AtCommit_Portals(void); extern void AtAbort_Portals(void); extern void AtCleanup_Portals(void); +extern void AtSubCommit_Portals(TransactionId parentXid); +extern void AtSubAbort_Portals(void); +extern void AtSubCleanup_Portals(void); extern Portal CreatePortal(const char *name, bool allowDup, bool dupSilent); extern Portal CreateNewPortal(void); extern void PortalDrop(Portal portal, bool isError); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index e5008e56ea5276c42948d02d15eae7bc2a96ee0d..b7f85eda68e07464caae020c0468d76e4134ac82 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.74 2004/05/08 19:09:25 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.75 2004/07/01 00:51:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -110,6 +110,9 @@ typedef struct RelationData BlockNumber rd_targblock; /* current insertion target block, or * InvalidBlockNumber */ int rd_refcnt; /* reference count */ + int *rd_prevrefcnt; /* reference count stack */ + int rd_numalloc; /* stack allocated size */ + int rd_numpushed; /* stack used size */ bool rd_isnew; /* rel was created in current xact */ /* diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index da82f4f6137af58da78f0e687e3a38ede891a182..47f46190df79c4d2c7f44b3f201af539fb2b5343 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.40 2004/06/18 06:14:21 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.41 2004/07/01 00:51:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -65,7 +65,9 @@ extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode); extern void RelationCacheInvalidate(void); -extern void AtEOXact_RelationCache(bool commit); +extern void AtEOXact_RelationCache(bool isCommit); +extern void AtSubStart_RelationCache(void); +extern void AtEOSubXact_RelationCache(bool isCommit); /* * Routines to help manage rebuilding of relcache init file diff --git a/src/test/regress/expected/transactions.out b/src/test/regress/expected/transactions.out index b72ca5f36e58be9656adbd2f371b8f9c005059e7..6cc89b5c5e4a5bde7a0d2ffc00c87b56cbeb2159 100644 --- a/src/test/regress/expected/transactions.out +++ b/src/test/regress/expected/transactions.out @@ -68,3 +68,70 @@ ERROR: transaction is read-only START TRANSACTION READ WRITE; DROP TABLE writetest; -- ok COMMIT; +-- Subtransactions, basic tests +-- create & drop tables +SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE; +CREATE TABLE foobar (a int); +BEGIN; + CREATE TABLE foo (a int); + BEGIN; + DROP TABLE foo; + CREATE TABLE bar (a int); + ROLLBACK; + BEGIN; + CREATE TABLE baz (a int); + COMMIT; + drop TABLE foobar; + CREATE TABLE barbaz (a int); +COMMIT; +-- should exist: barbaz, baz, foo +SELECT * FROM foo; -- should be empty + a +--- +(0 rows) + +SELECT * FROM bar; -- shouldn't exist +ERROR: relation "bar" does not exist +SELECT * FROM barbaz; -- should be empty + a +--- +(0 rows) + +SELECT * FROM baz; -- should be empty + a +--- +(0 rows) + +-- inserts +BEGIN; + INSERT INTO foo VALUES (1); + BEGIN; + INSERT into bar VALUES (1); +ERROR: relation "bar" does not exist + ROLLBACK; + BEGIN; + INSERT into barbaz VALUES (1); + COMMIT; + BEGIN; + BEGIN; + INSERT INTO foo VALUES (2); + COMMIT; + ROLLBACK; + INSERT INTO foo VALUES (3); +COMMIT; +SELECT * FROM foo; -- should have 1 and 3 + a +--- + 1 + 3 +(2 rows) + +SELECT * FROM barbaz; -- should have 1 + a +--- + 1 +(1 row) + +DROP TABLE foo; +DROP TABLE baz; +DROP TABLE barbaz; diff --git a/src/test/regress/expected/without_oid.out b/src/test/regress/expected/without_oid.out index ef373e6e3c35b3b4229cce191315b40079e3631e..708c4c5e94d98dc1e636aeb53a583a547e9ec68f 100644 --- a/src/test/regress/expected/without_oid.out +++ b/src/test/regress/expected/without_oid.out @@ -1,8 +1,18 @@ -- -- WITHOUT OID -- -CREATE TABLE wi (i INT) WITH OIDS; -CREATE TABLE wo (i INT) WITHOUT OIDS; +-- +-- This test tries to verify that WITHOUT OIDS actually saves space. +-- On machines where MAXALIGN is 8, WITHOUT OIDS may or may not save any +-- space, depending on the size of the tuple header + null bitmap. +-- As of 7.5 we need a 9-bit null bitmap to force the difference to appear. +-- +CREATE TABLE wi (i INT, + n1 int, n2 int, n3 int, n4 int, + n5 int, n6 int, n7 int, n8 int) WITH OIDS; +CREATE TABLE wo (i INT, + n1 int, n2 int, n3 int, n4 int, + n5 int, n6 int, n7 int, n8 int) WITHOUT OIDS; INSERT INTO wi VALUES (1); -- 1 INSERT INTO wo SELECT i FROM wi; -- 1 INSERT INTO wo SELECT i+1 FROM wi; -- 1+1=2 @@ -24,6 +34,15 @@ INSERT INTO wo SELECT i+896 FROM wi; -- 896+2448=3344 INSERT INTO wo SELECT i+3344 FROM wo; -- 3344+3344=6688 INSERT INTO wi SELECT i+2448 FROM wo; -- 2448+6688=9136 INSERT INTO wo SELECT i+6688 FROM wi WHERE i<=2448; -- 6688+2448=9136 +SELECT count(oid) FROM wi; + count +------- + 9136 +(1 row) + +-- should fail +SELECT count(oid) FROM wo; +ERROR: column "oid" does not exist VACUUM ANALYZE wi; VACUUM ANALYZE wo; SELECT min(relpages) < max(relpages), min(reltuples) - max(reltuples) diff --git a/src/test/regress/sql/transactions.sql b/src/test/regress/sql/transactions.sql index 10ef759998bc610b05d0f70a2fbb8c3bca53de73..a656c393b4fbc119ad239f611c327f6ba457568c 100644 --- a/src/test/regress/sql/transactions.sql +++ b/src/test/regress/sql/transactions.sql @@ -54,3 +54,48 @@ CREATE TABLE test AS SELECT * FROM writetest; -- fail START TRANSACTION READ WRITE; DROP TABLE writetest; -- ok COMMIT; + +-- Subtransactions, basic tests +-- create & drop tables +SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE; +CREATE TABLE foobar (a int); +BEGIN; + CREATE TABLE foo (a int); + BEGIN; + DROP TABLE foo; + CREATE TABLE bar (a int); + ROLLBACK; + BEGIN; + CREATE TABLE baz (a int); + COMMIT; + drop TABLE foobar; + CREATE TABLE barbaz (a int); +COMMIT; +-- should exist: barbaz, baz, foo +SELECT * FROM foo; -- should be empty +SELECT * FROM bar; -- shouldn't exist +SELECT * FROM barbaz; -- should be empty +SELECT * FROM baz; -- should be empty + +-- inserts +BEGIN; + INSERT INTO foo VALUES (1); + BEGIN; + INSERT into bar VALUES (1); + ROLLBACK; + BEGIN; + INSERT into barbaz VALUES (1); + COMMIT; + BEGIN; + BEGIN; + INSERT INTO foo VALUES (2); + COMMIT; + ROLLBACK; + INSERT INTO foo VALUES (3); +COMMIT; +SELECT * FROM foo; -- should have 1 and 3 +SELECT * FROM barbaz; -- should have 1 + +DROP TABLE foo; +DROP TABLE baz; +DROP TABLE barbaz; diff --git a/src/test/regress/sql/without_oid.sql b/src/test/regress/sql/without_oid.sql index 4cb961941a9595af9a65901c6ad35390e14b6365..2c176c8e3e205db19a1037e650af61d0d801f7ce 100644 --- a/src/test/regress/sql/without_oid.sql +++ b/src/test/regress/sql/without_oid.sql @@ -2,8 +2,19 @@ -- WITHOUT OID -- -CREATE TABLE wi (i INT) WITH OIDS; -CREATE TABLE wo (i INT) WITHOUT OIDS; +-- +-- This test tries to verify that WITHOUT OIDS actually saves space. +-- On machines where MAXALIGN is 8, WITHOUT OIDS may or may not save any +-- space, depending on the size of the tuple header + null bitmap. +-- As of 7.5 we need a 9-bit null bitmap to force the difference to appear. +-- +CREATE TABLE wi (i INT, + n1 int, n2 int, n3 int, n4 int, + n5 int, n6 int, n7 int, n8 int) WITH OIDS; +CREATE TABLE wo (i INT, + n1 int, n2 int, n3 int, n4 int, + n5 int, n6 int, n7 int, n8 int) WITHOUT OIDS; + INSERT INTO wi VALUES (1); -- 1 INSERT INTO wo SELECT i FROM wi; -- 1 INSERT INTO wo SELECT i+1 FROM wi; -- 1+1=2 @@ -25,8 +36,14 @@ INSERT INTO wo SELECT i+896 FROM wi; -- 896+2448=3344 INSERT INTO wo SELECT i+3344 FROM wo; -- 3344+3344=6688 INSERT INTO wi SELECT i+2448 FROM wo; -- 2448+6688=9136 INSERT INTO wo SELECT i+6688 FROM wi WHERE i<=2448; -- 6688+2448=9136 + +SELECT count(oid) FROM wi; +-- should fail +SELECT count(oid) FROM wo; + VACUUM ANALYZE wi; VACUUM ANALYZE wo; + SELECT min(relpages) < max(relpages), min(reltuples) - max(reltuples) FROM pg_class WHERE relname IN ('wi', 'wo');