diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 525d24feace3d03ae25ce08f564f4c1679d0b91f..68ff092148104d9c8b8557a113934c232d9e5fed 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.156 2007/09/03 00:39:11 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.157 2007/09/05 18:10:47 tgl Exp $ --> <!-- Documentation of the system catalogs, directed toward PostgreSQL developers --> @@ -5147,7 +5147,7 @@ There are several distinct types of lockable objects: whole relations (e.g., tables), individual pages of relations, individual tuples of relations, - transaction IDs, + transaction IDs (both virtual and permanent IDs), and general database objects (identified by class OID and object OID, in the same way as in <structname>pg_description</structname> or <structname>pg_depend</structname>). Also, the right to extend a @@ -5178,6 +5178,7 @@ <literal>page</>, <literal>tuple</>, <literal>transactionid</>, + <literal>virtualxid</>, <literal>object</>, <literal>userlock</>, or <literal>advisory</> @@ -5219,6 +5220,15 @@ Tuple number within the page, or NULL if the object is not a tuple </entry> </row> + <row> + <entry><structfield>virtualxid</structfield></entry> + <entry><type>text</type></entry> + <entry></entry> + <entry> + Virtual ID of a transaction, or NULL if the object is not a + virtual transaction ID + </entry> + </row> <row> <entry><structfield>transactionid</structfield></entry> <entry><type>xid</type></entry> @@ -5257,11 +5267,11 @@ </entry> </row> <row> - <entry><structfield>transaction</structfield></entry> - <entry><type>xid</type></entry> + <entry><structfield>virtualtransaction</structfield></entry> + <entry><type>text</type></entry> <entry></entry> <entry> - ID of the transaction that is holding or awaiting this lock + Virtual ID of the transaction that is holding or awaiting this lock </entry> </row> <row> @@ -5301,10 +5311,14 @@ </para> <para> - Every transaction holds an exclusive lock on its transaction ID for its - entire duration. If one transaction finds it necessary to wait specifically + Every transaction holds an exclusive lock on its virtual transaction ID for + its entire duration. If a permanent ID is assigned to the transaction + (which normally happens only if the transaction changes the state of the + database), it also holds an exclusive lock on its permanent transaction ID + until it ends. When one transaction finds it necessary to wait specifically for another transaction, it does so by attempting to acquire share lock on - the other transaction ID. That will succeed only when the other transaction + the other transaction ID (either virtual or permanent ID depending on the + situation). That will succeed only when the other transaction terminates and releases its locks. </para> @@ -5314,7 +5328,7 @@ and therefore row-level locks normally do not appear in this view. If a transaction is waiting for a row-level lock, it will usually appear in the view as waiting for the - transaction ID of the current holder of that row lock. + permanent transaction ID of the current holder of that row lock. </para> <para> @@ -5350,11 +5364,10 @@ </para> <para> - If you have enabled the statistics collector, the - <structfield>pid</structfield> column can be joined to the + The <structfield>pid</structfield> column can be joined to the <structfield>procpid</structfield> column of the <structname>pg_stat_activity</structname> view to get more - information on the session holding or waiting to hold the lock. + information on the session holding or waiting to hold each lock. Also, if you are using prepared transactions, the <structfield>transaction</> column can be joined to the <structfield>transaction</structfield> column of the diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 5ef230a4fe3ac64b8f6da2dfc79cbd3d99485699..d770c4606f2114d7c907fc63681c2099b78fcf12 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.141 2007/08/22 04:45:20 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.142 2007/09/05 18:10:47 tgl Exp $ --> <chapter Id="runtime-config"> <title>Server Configuration</title> @@ -2939,10 +2939,15 @@ SELECT * FROM parent WHERE key = 2400; <entry>Process start time stamp</entry> <entry>no</entry> </row> + <row> + <entry><literal>%v</literal></entry> + <entry>Virtual transaction ID (backendID/localXID)</entry> + <entry>no</entry> + </row> <row> <entry><literal>%x</literal></entry> - <entry>Transaction ID</entry> - <entry>yes</entry> + <entry>Transaction ID (0 if none is assigned)</entry> + <entry>no</entry> </row> <row> <entry><literal>%q</literal></entry> diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 3f44bd7d948a31227d2f13e86e4ff9eb003c8186..3f23378b8fa0de65abe97e0e302cd8adc0a2dc3c 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.237 2007/08/14 17:35:18 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.238 2007/09/05 18:10:47 tgl Exp $ * * * INTERFACE ROUTINES @@ -1632,12 +1632,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, MarkBufferDirty(buffer); /* XLOG stuff */ - if (relation->rd_istemp) - { - /* No XLOG record, but still need to flag that XID exists on disk */ - MyXactMadeTempRelUpdate = true; - } - else if (use_wal) + if (use_wal && !relation->rd_istemp) { xl_heap_insert xlrec; xl_heap_header xlhdr; @@ -1947,11 +1942,6 @@ l1: PageSetLSN(dp, recptr); PageSetTLI(dp, ThisTimeLineID); } - else - { - /* No XLOG record, but still need to flag that XID exists on disk */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); @@ -2403,11 +2393,6 @@ l2: PageSetLSN(BufferGetPage(buffer), recptr); PageSetTLI(BufferGetPage(buffer), ThisTimeLineID); } - else - { - /* No XLOG record, but still need to flag that XID exists on disk */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); @@ -2924,11 +2909,6 @@ l3: PageSetLSN(dp, recptr); PageSetTLI(dp, ThisTimeLineID); } - else - { - /* No XLOG record, but still need to flag that XID exists on disk */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README index 6e7e132acabb1fe5dd1bcb43a09018ccd2d9eeec..87b405917021f070a43b6d67441e15b392995e53 100644 --- a/src/backend/access/transam/README +++ b/src/backend/access/transam/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.6 2007/08/01 22:45:07 tgl Exp $ +$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.7 2007/09/05 18:10:47 tgl Exp $ The Transaction System ---------------------- @@ -187,16 +187,29 @@ Another difference is that BeginInternalSubtransaction is allowed when no explicit transaction block has been established, while DefineSavepoint is not. -Subtransaction numbering ------------------------- +Transaction and subtransaction numbering +---------------------------------------- -A top-level transaction is always given a TransactionId (XID) as soon as it is -created. This is necessary for a number of reasons, notably XMIN bookkeeping -for VACUUM. However, a subtransaction doesn't need its own XID unless it -(or one of its child subxacts) writes tuples into the database. Therefore, -we postpone assigning XIDs to subxacts until and unless they call -GetCurrentTransactionId. The subsidiary actions of obtaining a lock on the -XID and and entering it into pg_subtrans and PG_PROC are done at the same time. +Transactions and subtransactions are assigned permanent XIDs only when/if +they first do something that requires one --- typically, insert/update/delete +a tuple, though there are a few other places that need an XID assigned. +If a subtransaction requires an XID, we always first assign one to its +parent. This maintains the invariant that child transactions have XIDs later +than their parents, which is assumed in a number of places. + +The subsidiary actions of obtaining a lock on the XID and and entering it into +pg_subtrans and PG_PROC are done at the time it is assigned. + +A transaction that has no XID still needs to be identified for various +purposes, notably holding locks. For this purpose we assign a "virtual +transaction ID" or VXID to each top-level transaction. VXIDs are formed from +two fields, the backendID and a backend-local counter; this arrangement allows +assignment of a new VXID at transaction start without any contention for +shared memory. To ensure that a VXID isn't re-used too soon after backend +exit, we store the last local counter value into shared memory at backend +exit, and initialize it from the previous value for the same backendID slot +at backend start. All these counters go back to zero at shared memory +re-initialization, but that's OK because VXIDs never appear anywhere on-disk. Internally, a backend needs a way to identify subtransactions whether or not they have XIDs; but this need only lasts as long as the parent top transaction @@ -204,7 +217,8 @@ endures. Therefore, we have SubTransactionId, which is somewhat like CommandId in that it's generated from a counter that we reset at the start of each top transaction. The top-level transaction itself has SubTransactionId 1, and subtransactions have IDs 2 and up. (Zero is reserved for -InvalidSubTransactionId.) +InvalidSubTransactionId.) Note that subtransactions do not have their +own VXIDs; they use the parent top transaction's VXID. pg_clog and pg_subtrans diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 9665d1295419832151373b2e37404b79d33bed43..419c8656065255f6b253d51a0f324715b30faec8 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -26,7 +26,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.43 2007/08/01 22:45:07 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.44 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -423,10 +423,6 @@ CLOGPagePrecedes(int page1, int page2) /* * Write a ZEROPAGE xlog record - * - * Note: xlog record is marked as outside transaction control, since we - * want it to be redone whether the invoking transaction commits or not. - * (Besides which, this is normally done just before entering a transaction.) */ static void WriteZeroPageXlogRec(int pageno) @@ -437,7 +433,7 @@ WriteZeroPageXlogRec(int pageno) rdata.len = sizeof(int); rdata.buffer = InvalidBuffer; rdata.next = NULL; - (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata); + (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata); } /* @@ -445,9 +441,6 @@ WriteZeroPageXlogRec(int pageno) * * We must flush the xlog record to disk before returning --- see notes * in TruncateCLOG(). - * - * Note: xlog record is marked as outside transaction control, since we - * want it to be redone whether the invoking transaction commits or not. */ static void WriteTruncateXlogRec(int pageno) @@ -459,7 +452,7 @@ WriteTruncateXlogRec(int pageno) rdata.len = sizeof(int); rdata.buffer = InvalidBuffer; rdata.next = NULL; - recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE | XLOG_NO_TRAN, &rdata); + recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata); XLogFlush(recptr); } diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 3ce6f14bcf6546a91f3391bc36da7f3cfaed89b6..b34fa9be78502bb304b7a7460e922f6ef5595b6d 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.24 2007/08/01 22:45:07 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.25 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1842,9 +1842,6 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) /* * Write an xlog record reflecting the zeroing of either a MEMBERs or * OFFSETs page (info shows which) - * - * Note: xlog record is marked as outside transaction control, since we - * want it to be redone whether the invoking transaction commits or not. */ static void WriteMZeroPageXlogRec(int pageno, uint8 info) @@ -1855,7 +1852,7 @@ WriteMZeroPageXlogRec(int pageno, uint8 info) rdata.len = sizeof(int); rdata.buffer = InvalidBuffer; rdata.next = NULL; - (void) XLogInsert(RM_MULTIXACT_ID, info | XLOG_NO_TRAN, &rdata); + (void) XLogInsert(RM_MULTIXACT_ID, info, &rdata); } /* diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 2ae81e823d5f788c090afd9f90dc8ca395af21f4..3e7e8435029895a7a73679580543cd4abe362630 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.32 2007/08/01 22:45:07 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.33 2007/09/05 18:10:47 tgl Exp $ * * NOTES * Each global transaction is associated with a global transaction @@ -274,9 +274,11 @@ MarkAsPreparing(TransactionId xid, const char *gid, MemSet(&gxact->proc, 0, sizeof(PGPROC)); SHMQueueElemInit(&(gxact->proc.links)); gxact->proc.waitStatus = STATUS_OK; + gxact->proc.lxid = InvalidLocalTransactionId; gxact->proc.xid = xid; gxact->proc.xmin = InvalidTransactionId; gxact->proc.pid = 0; + gxact->proc.backendId = InvalidBackendId; gxact->proc.databaseId = databaseid; gxact->proc.roleId = owner; gxact->proc.inCommit = false; @@ -813,8 +815,8 @@ StartPrepare(GlobalTransaction gxact) hdr.prepared_at = gxact->prepared_at; hdr.owner = gxact->owner; hdr.nsubxacts = xactGetCommittedChildren(&children); - hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels); - hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels); + hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels, NULL); + hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels, NULL); StrNCpy(hdr.gid, gxact->gid, GIDSIZE); save_state_data(&hdr, sizeof(TwoPhaseFileHeader)); @@ -1702,9 +1704,7 @@ RecordTransactionCommitPrepared(TransactionId xid, } rdata[lastrdata].next = NULL; - recptr = XLogInsert(RM_XACT_ID, - XLOG_XACT_COMMIT_PREPARED | XLOG_NO_TRAN, - rdata); + recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata); /* * We don't currently try to sleep before flush here ... nor is there @@ -1784,9 +1784,7 @@ RecordTransactionAbortPrepared(TransactionId xid, } rdata[lastrdata].next = NULL; - recptr = XLogInsert(RM_XACT_ID, - XLOG_XACT_ABORT_PREPARED | XLOG_NO_TRAN, - rdata); + recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata); /* Always flush, since we're about to remove the 2PC state file */ XLogFlush(recptr); diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 18787d17770e522ea23bd72ecd81dc878a500164..2e972d56f60fb0605618e3c7304d6ecc78049ff4 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.247 2007/09/03 00:39:13 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.248 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ #include "storage/fd.h" #include "storage/lmgr.h" #include "storage/procarray.h" +#include "storage/sinvaladt.h" #include "storage/smgr.h" #include "utils/combocid.h" #include "utils/flatfiles.h" @@ -216,7 +217,7 @@ static SubXactCallbackItem *SubXact_callbacks = NULL; /* local function prototypes */ -static void AssignSubTransactionId(TransactionState s); +static void AssignTransactionId(TransactionState s); static void AbortTransaction(void); static void AtAbort_Memory(void); static void AtCleanup_Memory(void); @@ -232,7 +233,7 @@ static void CallSubXactCallbacks(SubXactEvent event, SubTransactionId parentSubid); static void CleanupTransaction(void); static void CommitTransaction(void); -static void RecordTransactionAbort(void); +static void RecordTransactionAbort(bool isSubXact); static void StartTransaction(void); static void RecordSubTransactionCommit(void); @@ -304,25 +305,36 @@ IsAbortedTransactionBlockState(void) /* * GetTopTransactionId * - * Get the ID of the main transaction, even if we are currently inside - * a subtransaction. If we are not in a transaction at all, or if we - * are in transaction startup and haven't yet assigned an XID, - * InvalidTransactionId is returned. + * This will return the XID of the main transaction, assigning one if + * it's not yet set. Be careful to call this only inside a valid xact. */ TransactionId GetTopTransactionId(void) { + if (!TransactionIdIsValid(TopTransactionStateData.transactionId)) + AssignTransactionId(&TopTransactionStateData); return TopTransactionStateData.transactionId; } +/* + * GetTopTransactionIdIfAny + * + * This will return the XID of the main transaction, if one is assigned. + * It will return InvalidTransactionId if we are not currently inside a + * transaction, or inside a transaction that hasn't yet been assigned an XID. + */ +TransactionId +GetTopTransactionIdIfAny(void) +{ + return TopTransactionStateData.transactionId; +} /* * GetCurrentTransactionId * - * We do not assign XIDs to subtransactions until/unless this is called. - * When we do assign an XID to a subtransaction, recursively make sure - * its parent has one as well (this maintains the invariant that a child - * transaction has an XID following its parent's). + * This will return the XID of the current transaction (main or sub + * transaction), assigning one if it's not yet set. Be careful to call this + * only inside a valid xact. */ TransactionId GetCurrentTransactionId(void) @@ -330,20 +342,49 @@ GetCurrentTransactionId(void) TransactionState s = CurrentTransactionState; if (!TransactionIdIsValid(s->transactionId)) - AssignSubTransactionId(s); - + AssignTransactionId(s); return s->transactionId; } +/* + * GetCurrentTransactionIdIfAny + * + * This will return the XID of the current sub xact, if one is assigned. + * It will return InvalidTransactionId if we are not currently inside a + * transaction, or inside a transaction that hasn't been assigned an XID yet. + */ +TransactionId +GetCurrentTransactionIdIfAny(void) +{ + return CurrentTransactionState->transactionId; +} + + +/* + * AssignTransactionId + * + * Assigns a new permanent XID to the given TransactionState. + * We do not assign XIDs to transactions until/unless this is called. + * Also, any parent TransactionStates that don't yet have XIDs are assigned + * one; this maintains the invariant that a child transaction has an XID + * following its parent's. + */ static void -AssignSubTransactionId(TransactionState s) +AssignTransactionId(TransactionState s) { + bool isSubXact = (s->parent != NULL); ResourceOwner currentOwner; - Assert(s->parent != NULL); + /* Assert that caller didn't screw up */ + Assert(!TransactionIdIsValid(s->transactionId)); Assert(s->state == TRANS_INPROGRESS); - if (!TransactionIdIsValid(s->parent->transactionId)) - AssignSubTransactionId(s->parent); + + /* + * Ensure parent(s) have XIDs, so that a child always has an XID later + * than its parent. + */ + if (isSubXact && !TransactionIdIsValid(s->parent->transactionId)) + AssignTransactionId(s->parent); /* * Generate a new Xid and record it in PG_PROC and pg_subtrans. @@ -353,20 +394,20 @@ AssignSubTransactionId(TransactionState s) * PG_PROC, the subtrans entry is needed to ensure that other backends see * the Xid as "running". See GetNewTransactionId. */ - s->transactionId = GetNewTransactionId(true); + s->transactionId = GetNewTransactionId(isSubXact); - SubTransSetParent(s->transactionId, s->parent->transactionId); + if (isSubXact) + SubTransSetParent(s->transactionId, s->parent->transactionId); /* - * Acquire lock on the transaction XID. (We assume this cannot block.) We - * have to be sure that the lock is assigned to the transaction's - * ResourceOwner. + * Acquire lock on the transaction XID. (We assume this cannot block.) + * We have to ensure that the lock is assigned to the transaction's + * own ResourceOwner. */ currentOwner = CurrentResourceOwner; PG_TRY(); { CurrentResourceOwner = s->curTransactionOwner; - XactLockTableInsert(s->transactionId); } PG_CATCH(); @@ -380,22 +421,6 @@ AssignSubTransactionId(TransactionState s) } -/* - * GetCurrentTransactionIdIfAny - * - * Unlike GetCurrentTransactionId, this will return InvalidTransactionId - * if we are currently not in a transaction, or in a transaction or - * subtransaction that has not yet assigned itself an XID. - */ -TransactionId -GetCurrentTransactionIdIfAny(void) -{ - TransactionState s = CurrentTransactionState; - - return s->transactionId; -} - - /* * GetCurrentSubTransactionId */ @@ -726,192 +751,188 @@ AtSubStart_ResourceOwner(void) void RecordTransactionCommit(void) { + TransactionId xid = GetTopTransactionIdIfAny(); + bool markXidCommitted = TransactionIdIsValid(xid); int nrels; RelFileNode *rels; + bool haveNonTemp; int nchildren; TransactionId *children; /* Get data needed for commit record */ - nrels = smgrGetPendingDeletes(true, &rels); + nrels = smgrGetPendingDeletes(true, &rels, &haveNonTemp); nchildren = xactGetCommittedChildren(&children); /* - * If we made neither any XLOG entries nor any temp-rel updates, and have - * no files to be deleted, we can omit recording the transaction commit at - * all. (This test includes the effects of subtransactions, so the - * presence of committed subxacts need not alone force a write.) + * If we haven't been assigned an XID yet, we neither can, nor do we + * want to write a COMMIT record. */ - if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate || nrels > 0) + if (!markXidCommitted) { - TransactionId xid = GetCurrentTransactionId(); - bool madeTCentries; - bool isAsyncCommit = false; - XLogRecPtr recptr; + /* + * We expect that every smgrscheduleunlink is followed by a catalog + * update, and hence XID assignment, so we shouldn't get here with + * any pending deletes. Use a real test not just an Assert to check + * this, since it's a bit fragile. + */ + if (nrels != 0) + elog(ERROR, "cannot commit a transaction that deleted files but has no xid"); + + /* Can't have child XIDs either; AssignTransactionId enforces this */ + Assert(nchildren == 0); + + /* + * If we didn't create XLOG entries, we're done here; otherwise we + * should flush those entries the same as a commit record. (An + * example of a possible record that wouldn't cause an XID to be + * assigned is a sequence advance record due to nextval() --- we + * want to flush that to disk before reporting commit.) + */ + if (XactLastRecEnd.xrecoff == 0) + goto cleanup; + } + else + { + /* + * Begin commit critical section and insert the commit XLOG record. + */ + XLogRecData rdata[3]; + int lastrdata = 0; + xl_xact_commit xlrec; /* Tell bufmgr and smgr to prepare for commit */ BufmgrCommit(); - START_CRIT_SECTION(); - /* - * We only need to log the commit in XLOG if the transaction made any - * transaction-controlled XLOG entries or will delete files. + * Mark ourselves as within our "commit critical section". This + * forces any concurrent checkpoint to wait until we've updated + * pg_clog. Without this, it is possible for the checkpoint to + * set REDO after the XLOG record but fail to flush the pg_clog + * update to disk, leading to loss of the transaction commit if + * the system crashes a little later. + * + * Note: we could, but don't bother to, set this flag in + * RecordTransactionAbort. That's because loss of a transaction + * abort is noncritical; the presumption would be that it aborted, + * anyway. + * + * It's safe to change the inCommit flag of our own backend + * without holding the ProcArrayLock, since we're the only one + * modifying it. This makes checkpoint's determination of which + * xacts are inCommit a bit fuzzy, but it doesn't matter. */ - madeTCentries = (MyLastRecPtr.xrecoff != 0); - if (madeTCentries || nrels > 0) + START_CRIT_SECTION(); + MyProc->inCommit = true; + + SetCurrentTransactionStopTimestamp(); + xlrec.xact_time = xactStopTimestamp; + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; + rdata[0].data = (char *) (&xlrec); + rdata[0].len = MinSizeOfXactCommit; + rdata[0].buffer = InvalidBuffer; + /* dump rels to delete */ + if (nrels > 0) { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_commit xlrec; - - /* - * Mark ourselves as within our "commit critical section". This - * forces any concurrent checkpoint to wait until we've updated - * pg_clog. Without this, it is possible for the checkpoint to - * set REDO after the XLOG record but fail to flush the pg_clog - * update to disk, leading to loss of the transaction commit if - * the system crashes a little later. - * - * Note: we could, but don't bother to, set this flag in - * RecordTransactionAbort. That's because loss of a transaction - * abort is noncritical; the presumption would be that it aborted, - * anyway. - * - * It's safe to change the inCommit flag of our own backend - * without holding the ProcArrayLock, since we're the only one - * modifying it. This makes checkpoint's determination of which - * xacts are inCommit a bit fuzzy, but it doesn't matter. - */ - MyProc->inCommit = true; - - SetCurrentTransactionStopTimestamp(); - xlrec.xact_time = xactStopTimestamp; - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactCommit; - rdata[0].buffer = InvalidBuffer; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); - rdata[1].buffer = InvalidBuffer; - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - rdata[2].buffer = InvalidBuffer; - lastrdata = 2; - } - rdata[lastrdata].next = NULL; - - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata); + rdata[0].next = &(rdata[1]); + rdata[1].data = (char *) rels; + rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; + lastrdata = 1; } - else + /* dump committed child Xids */ + if (nchildren > 0) { - /* Just flush through last record written by me */ - recptr = ProcLastRecEnd; + rdata[lastrdata].next = &(rdata[2]); + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; + lastrdata = 2; } + rdata[lastrdata].next = NULL; + + (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata); + } + /* + * Check if we want to commit asynchronously. If the user has set + * synchronous_commit = off, and we're not doing cleanup of any non-temp + * rels nor committing any command that wanted to force sync commit, then + * we can defer flushing XLOG. (We must not allow asynchronous commit if + * there are any non-temp tables to be deleted, because we might delete + * the files before the COMMIT record is flushed to disk. We do allow + * asynchronous commit if all to-be-deleted tables are temporary though, + * since they are lost anyway if we crash.) + */ + if (XactSyncCommit || forceSyncCommit || haveNonTemp) + { /* - * We must flush our XLOG entries to disk if we made any XLOG entries, - * whether in or out of transaction control. For example, if we - * reported a nextval() result to the client, this ensures that any - * XLOG record generated by nextval will hit the disk before we report - * the transaction committed. + * Synchronous commit case. * - * Note: if we generated a commit record above, MyXactMadeXLogEntry - * will certainly be set now. + * Sleep before flush! So we can flush more than one commit + * records per single fsync. (The idea is some other backend + * may do the XLogFlush while we're sleeping. This needs work + * still, because on most Unixen, the minimum select() delay + * is 10msec or more, which is way too long.) + * + * We do not sleep if enableFsync is not turned on, nor if + * there are fewer than CommitSiblings other backends with + * active transactions. */ - if (MyXactMadeXLogEntry) - { - /* - * If the user has set synchronous_commit = off, and we're - * not doing cleanup of any rels nor committing any command - * that wanted to force sync commit, then we can defer fsync. - */ - if (XactSyncCommit || forceSyncCommit || nrels > 0) - { - /* - * Synchronous commit case. - * - * Sleep before flush! So we can flush more than one commit - * records per single fsync. (The idea is some other backend - * may do the XLogFlush while we're sleeping. This needs work - * still, because on most Unixen, the minimum select() delay - * is 10msec or more, which is way too long.) - * - * We do not sleep if enableFsync is not turned on, nor if - * there are fewer than CommitSiblings other backends with - * active transactions. - */ - if (CommitDelay > 0 && enableFsync && - CountActiveBackends() >= CommitSiblings) - pg_usleep(CommitDelay); + if (CommitDelay > 0 && enableFsync && + CountActiveBackends() >= CommitSiblings) + pg_usleep(CommitDelay); - XLogFlush(recptr); - } - else - { - /* - * Asynchronous commit case. - */ - isAsyncCommit = true; + XLogFlush(XactLastRecEnd); - /* - * Report the latest async commit LSN, so that - * the WAL writer knows to flush this commit. - */ - XLogSetAsyncCommitLSN(recptr); - } + /* + * Now we may update the CLOG, if we wrote a COMMIT record above + */ + if (markXidCommitted) + { + TransactionIdCommit(xid); + /* to avoid race conditions, the parent must commit first */ + TransactionIdCommitTree(nchildren, children); } - + } + else + { /* - * We must mark the transaction committed in clog if its XID appears - * either in permanent rels or in local temporary rels. We test this - * by seeing if we made transaction-controlled entries *OR* local-rel - * tuple updates. Note that if we made only the latter, we have not - * emitted an XLOG record for our commit, and so in the event of a - * crash the clog update might be lost. This is okay because no one - * else will ever care whether we committed. + * Asynchronous commit case. * - * The recptr here refers to the last xlog entry by this transaction - * so is the correct value to use for setting the clog. + * Report the latest async commit LSN, so that + * the WAL writer knows to flush this commit. */ - if (madeTCentries || MyXactMadeTempRelUpdate) + XLogSetAsyncCommitLSN(XactLastRecEnd); + + /* + * We must not immediately update the CLOG, since we didn't + * flush the XLOG. Instead, we store the LSN up to which + * the XLOG must be flushed before the CLOG may be updated. + */ + if (markXidCommitted) { - if (isAsyncCommit) - { - TransactionIdAsyncCommit(xid, recptr); - /* to avoid race conditions, the parent must commit first */ - TransactionIdAsyncCommitTree(nchildren, children, recptr); - } - else - { - TransactionIdCommit(xid); - /* to avoid race conditions, the parent must commit first */ - TransactionIdCommitTree(nchildren, children); - } + TransactionIdAsyncCommit(xid, XactLastRecEnd); + /* to avoid race conditions, the parent must commit first */ + TransactionIdAsyncCommitTree(nchildren, children, XactLastRecEnd); } + } - /* Checkpoint can proceed now */ + /* + * If we entered a commit critical section, leave it now, and + * let checkpoints proceed. + */ + if (markXidCommitted) + { MyProc->inCommit = false; - END_CRIT_SECTION(); } - /* Break the chain of back-links in the XLOG records I output */ - MyLastRecPtr.xrecoff = 0; - MyXactMadeXLogEntry = false; - MyXactMadeTempRelUpdate = false; + /* Reset XactLastRecEnd until the next transaction writes something */ + XactLastRecEnd.xrecoff = 0; - /* And clean up local data */ +cleanup: + /* Clean up local data */ if (rels) pfree(rels); if (children) @@ -1030,23 +1051,20 @@ AtSubCommit_childXids(void) static void RecordSubTransactionCommit(void) { + TransactionId xid = GetCurrentTransactionIdIfAny(); + /* * We do not log the subcommit in XLOG; it doesn't matter until the * top-level transaction commits. * - * We must mark the subtransaction subcommitted in clog if its XID appears - * either in permanent rels or in local temporary rels. We test this by - * seeing if we made transaction-controlled entries *OR* local-rel tuple - * updates. (The test here actually covers the entire transaction tree so - * far, so it may mark subtransactions that don't really need it, but it's - * probably not worth being tenser. Note that if a prior subtransaction - * dirtied these variables, then RecordTransactionCommit will have to do - * the full pushup anyway...) + * We must mark the subtransaction subcommitted in the CLOG if + * it had a valid XID assigned. If it did not, nobody else will + * ever know about the existence of this subxact. We don't + * have to deal with deletions scheduled for on-commit here, since + * they'll be reassigned to our parent (who might still abort). */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) + if (TransactionIdIsValid(xid)) { - TransactionId xid = GetCurrentTransactionId(); - /* XXX does this really need to be a critical section? */ START_CRIT_SECTION(); @@ -1066,108 +1084,118 @@ RecordSubTransactionCommit(void) * RecordTransactionAbort */ static void -RecordTransactionAbort(void) +RecordTransactionAbort(bool isSubXact) { + TransactionId xid = GetCurrentTransactionIdIfAny(); int nrels; RelFileNode *rels; int nchildren; TransactionId *children; - - /* Get data needed for abort record */ - nrels = smgrGetPendingDeletes(false, &rels); - nchildren = xactGetCommittedChildren(&children); + XLogRecData rdata[3]; + int lastrdata = 0; + xl_xact_abort xlrec; /* - * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, and are not going to delete any files, we can omit - * recording the transaction abort at all. No one will ever care that it - * aborted. (These tests cover our whole transaction tree.) + * If we haven't been assigned an XID, nobody will care whether we + * aborted or not. Hence, we're done in that case. It does not matter + * if we have rels to delete (note that this routine is not responsible + * for actually deleting 'em). We cannot have any child XIDs, either. */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) + if (!TransactionIdIsValid(xid)) { - TransactionId xid = GetCurrentTransactionId(); + /* Reset XactLastRecEnd until the next transaction writes something */ + if (!isSubXact) + XactLastRecEnd.xrecoff = 0; + return; + } - /* - * Catch the scenario where we aborted partway through - * RecordTransactionCommit ... - */ - if (TransactionIdDidCommit(xid)) - elog(PANIC, "cannot abort transaction %u, it was already committed", xid); + /* + * We have a valid XID, so we should write an ABORT record for it. + * + * We do not flush XLOG to disk here, since the default assumption after a + * crash would be that we aborted, anyway. For the same reason, we don't + * need to worry about interlocking against checkpoint start. + */ - START_CRIT_SECTION(); + /* + * Check that we haven't aborted halfway through RecordTransactionCommit. + */ + if (TransactionIdDidCommit(xid)) + elog(PANIC, "cannot abort transaction %u, it was already committed", + xid); - /* - * We only need to log the abort in XLOG if the transaction made any - * transaction-controlled XLOG entries or will delete files. (If it - * made no transaction-controlled XLOG entries, its XID appears - * nowhere in permanent storage, so no one else will ever care if it - * committed.) - * - * We do not flush XLOG to disk unless deleting files, since the - * default assumption after a crash would be that we aborted, anyway. - * For the same reason, we don't need to worry about interlocking - * against checkpoint start. - */ - if (MyLastRecPtr.xrecoff != 0 || nrels > 0) - { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_abort xlrec; - XLogRecPtr recptr; - - SetCurrentTransactionStopTimestamp(); - xlrec.xact_time = xactStopTimestamp; - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactAbort; - rdata[0].buffer = InvalidBuffer; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); - rdata[1].buffer = InvalidBuffer; - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - rdata[2].buffer = InvalidBuffer; - lastrdata = 2; - } - rdata[lastrdata].next = NULL; + /* Fetch the data we need for the abort record */ + nrels = smgrGetPendingDeletes(false, &rels, NULL); + nchildren = xactGetCommittedChildren(&children); - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); + /* XXX do we really need a critical section here? */ + START_CRIT_SECTION(); - /* Must flush if we are deleting files... */ - if (nrels > 0) - XLogFlush(recptr); - } + /* Write the ABORT record */ + if (isSubXact) + xlrec.xact_time = GetCurrentTimestamp(); + else + { + SetCurrentTransactionStopTimestamp(); + xlrec.xact_time = xactStopTimestamp; + } + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; + rdata[0].data = (char *) (&xlrec); + rdata[0].len = MinSizeOfXactAbort; + rdata[0].buffer = InvalidBuffer; + /* dump rels to delete */ + if (nrels > 0) + { + rdata[0].next = &(rdata[1]); + rdata[1].data = (char *) rels; + rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; + lastrdata = 1; + } + /* dump committed child Xids */ + if (nchildren > 0) + { + rdata[lastrdata].next = &(rdata[2]); + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; + lastrdata = 2; + } + rdata[lastrdata].next = NULL; - /* - * Mark the transaction aborted in clog. This is not absolutely - * necessary but we may as well do it while we are here. - * - * The ordering here isn't critical but it seems best to mark the - * parent first. This assures an atomic transition of all the - * subtransactions to aborted state from the point of view of - * concurrent TransactionIdDidAbort calls. - */ - TransactionIdAbort(xid); - TransactionIdAbortTree(nchildren, children); + (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); - END_CRIT_SECTION(); - } + /* + * Mark the transaction aborted in clog. This is not absolutely necessary + * but we may as well do it while we are here; also, in the subxact case + * it is helpful because XactLockTableWait makes use of it to avoid + * waiting for already-aborted subtransactions. It is OK to do it without + * having flushed the ABORT record to disk, because in event of a crash + * we'd be assumed to have aborted anyway. + * + * The ordering here isn't critical but it seems best to mark the + * parent first. This assures an atomic transition of all the + * subtransactions to aborted state from the point of view of + * concurrent TransactionIdDidAbort calls. + */ + TransactionIdAbort(xid); + TransactionIdAbortTree(nchildren, children); - /* Break the chain of back-links in the XLOG records I output */ - MyLastRecPtr.xrecoff = 0; - MyXactMadeXLogEntry = false; - MyXactMadeTempRelUpdate = false; + END_CRIT_SECTION(); + + /* + * If we're aborting a subtransaction, we can immediately remove failed + * XIDs from PGPROC's cache of running child XIDs. We do that here for + * subxacts, because we already have the child XID array at hand. For + * main xacts, the equivalent happens just after this function returns. + */ + if (isSubXact) + XidCacheRemoveRunningXids(xid, nchildren, children); + + /* Reset XactLastRecEnd until the next transaction writes something */ + if (!isSubXact) + XactLastRecEnd.xrecoff = 0; /* And clean up local data */ if (rels) @@ -1251,108 +1279,6 @@ AtSubAbort_childXids(void) s->childXids = NIL; } -/* - * RecordSubTransactionAbort - */ -static void -RecordSubTransactionAbort(void) -{ - int nrels; - RelFileNode *rels; - TransactionId xid = GetCurrentTransactionId(); - int nchildren; - TransactionId *children; - - /* Get data needed for abort record */ - nrels = smgrGetPendingDeletes(false, &rels); - nchildren = xactGetCommittedChildren(&children); - - /* - * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, and are not going to delete any files, we can omit - * recording the transaction abort at all. No one will ever care that it - * aborted. (These tests cover our whole transaction tree, and therefore - * may mark subxacts that don't really need it, but it's probably not - * worth being tenser.) - * - * In this case we needn't worry about marking subcommitted children as - * aborted, because they didn't mark themselves as subcommitted in the - * first place; see the optimization in RecordSubTransactionCommit. - */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) - { - START_CRIT_SECTION(); - - /* - * We only need to log the abort in XLOG if the transaction made any - * transaction-controlled XLOG entries or will delete files. - */ - if (MyLastRecPtr.xrecoff != 0 || nrels > 0) - { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_abort xlrec; - XLogRecPtr recptr; - - xlrec.xact_time = GetCurrentTimestamp(); - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactAbort; - rdata[0].buffer = InvalidBuffer; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); - rdata[1].buffer = InvalidBuffer; - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - rdata[2].buffer = InvalidBuffer; - lastrdata = 2; - } - rdata[lastrdata].next = NULL; - - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); - - /* Must flush if we are deleting files... */ - if (nrels > 0) - XLogFlush(recptr); - } - - /* - * Mark the transaction aborted in clog. This is not absolutely - * necessary but XactLockTableWait makes use of it to avoid waiting - * for already-aborted subtransactions. - */ - TransactionIdAbort(xid); - TransactionIdAbortTree(nchildren, children); - - END_CRIT_SECTION(); - } - - /* - * We can immediately remove failed XIDs from PGPROC's cache of running - * child XIDs. It's easiest to do it here while we have the child XID - * array at hand, even though in the main-transaction case the equivalent - * work happens just after return from RecordTransactionAbort. - */ - XidCacheRemoveRunningXids(xid, nchildren, children); - - /* And clean up local data */ - if (rels) - pfree(rels); - if (children) - pfree(children); -} - /* ---------------------------------------------------------------- * CleanupTransaction stuff * ---------------------------------------------------------------- @@ -1436,6 +1362,7 @@ static void StartTransaction(void) { TransactionState s; + VirtualTransactionId vxid; /* * Let's just make sure the state stack is empty @@ -1479,13 +1406,25 @@ StartTransaction(void) AtStart_ResourceOwner(); /* - * generate a new transaction id + * Assign a new LocalTransactionId, and combine it with the backendId to + * form a virtual transaction id. + */ + vxid.backendId = MyBackendId; + vxid.localTransactionId = GetNextLocalTransactionId(); + + /* + * Lock the virtual transaction id before we announce it in the proc array */ - s->transactionId = GetNewTransactionId(false); + VirtualXactLockTableInsert(vxid); - XactLockTableInsert(s->transactionId); + /* + * Advertise it in the proc array. We assume assignment of + * LocalTransactionID is atomic, and the backendId should be set already. + */ + Assert(MyProc->backendId == vxid.backendId); + MyProc->lxid = vxid.localTransactionId; - PG_TRACE1(transaction__start, s->transactionId); + PG_TRACE1(transaction__start, vxid.localTransactionId); /* * set transaction_timestamp() (a/k/a now()). We want this to be the same @@ -1631,9 +1570,17 @@ CommitTransaction(void) */ if (MyProc != NULL) { - /* Lock ProcArrayLock because that's what GetSnapshotData uses. */ + /* + * Lock ProcArrayLock because that's what GetSnapshotData uses. + * You might assume that we can skip this step if we had no + * transaction id assigned, because the failure case outlined + * in GetSnapshotData cannot happen in that case. This is true, + * but we *still* need the lock guarantee that two concurrent + * computations of the *oldest* xmin will get the same result. + */ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->xid = InvalidTransactionId; + MyProc->lxid = InvalidLocalTransactionId; MyProc->xmin = InvalidTransactionId; MyProc->inVacuum = false; /* must be cleared with xid/xmin */ @@ -1861,10 +1808,8 @@ PrepareTransaction(void) * Now we clean up backend-internal state and release internal resources. */ - /* Break the chain of back-links in the XLOG records I output */ - MyLastRecPtr.xrecoff = 0; - MyXactMadeXLogEntry = false; - MyXactMadeTempRelUpdate = false; + /* Reset XactLastRecEnd until the next transaction writes something */ + XactLastRecEnd.xrecoff = 0; /* * Let others know about no transaction in progress by me. This has to be @@ -1872,9 +1817,17 @@ PrepareTransaction(void) * someone may think it is unlocked and recyclable. */ - /* Lock ProcArrayLock because that's what GetSnapshotData uses. */ + /* + * Lock ProcArrayLock because that's what GetSnapshotData uses. + * You might assume that we can skip this step if we have no + * transaction id assigned, because the failure case outlined + * in GetSnapshotData cannot happen in that case. This is true, + * but we *still* need the lock guarantee that two concurrent + * computations of the *oldest* xmin will get the same result. + */ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->xid = InvalidTransactionId; + MyProc->lxid = InvalidLocalTransactionId; MyProc->xmin = InvalidTransactionId; MyProc->inVacuum = false; /* must be cleared with xid/xmin */ @@ -2032,8 +1985,7 @@ AbortTransaction(void) * Advertise the fact that we aborted in pg_clog (assuming that we got as * far as assigning an XID to advertise). */ - if (TransactionIdIsValid(s->transactionId)) - RecordTransactionAbort(); + RecordTransactionAbort(false); /* * Let others know about no transaction in progress by me. Note that this @@ -2042,9 +1994,17 @@ AbortTransaction(void) */ if (MyProc != NULL) { - /* Lock ProcArrayLock because that's what GetSnapshotData uses. */ + /* + * Lock ProcArrayLock because that's what GetSnapshotData uses. + * You might assume that we can skip this step if we have no + * transaction id assigned, because the failure case outlined + * in GetSnapshotData cannot happen in that case. This is true, + * but we *still* need the lock guarantee that two concurrent + * computations of the *oldest* xmin will get the same result. + */ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->xid = InvalidTransactionId; + MyProc->lxid = InvalidLocalTransactionId; MyProc->xmin = InvalidTransactionId; MyProc->inVacuum = false; /* must be cleared with xid/xmin */ MyProc->inCommit = false; /* be sure this gets cleared */ @@ -3752,13 +3712,11 @@ CommitSubTransaction(void) CommandCounterIncrement(); /* Mark subtransaction as subcommitted */ - if (TransactionIdIsValid(s->transactionId)) - { - RecordSubTransactionCommit(); - AtSubCommit_childXids(); - } + RecordSubTransactionCommit(); /* Post-commit cleanup */ + if (TransactionIdIsValid(s->transactionId)) + AtSubCommit_childXids(); AfterTriggerEndSubXact(true); AtSubCommit_Portals(s->subTransactionId, s->parent->subTransactionId, @@ -3884,13 +3842,12 @@ AbortSubTransaction(void) s->parent->subTransactionId); /* Advertise the fact that we aborted in pg_clog. */ + RecordTransactionAbort(true); + + /* Post-abort cleanup */ if (TransactionIdIsValid(s->transactionId)) - { - RecordSubTransactionAbort(); AtSubAbort_childXids(); - } - /* Post-abort cleanup */ CallSubXactCallbacks(SUBXACT_EVENT_ABORT_SUB, s->subTransactionId, s->parent->subTransactionId); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 1db33fb26da5a01708099a8bcfa42030bba12dd0..5474a91c247966905e55928dd871bbeb4e3b333f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.279 2007/08/28 23:17:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.280 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -154,38 +154,16 @@ static TimeLineID recoveryTargetTLI; static List *expectedTLIs; static TimeLineID curFileTLI; -/* - * MyLastRecPtr points to the start of the last XLOG record inserted by the - * current transaction. If MyLastRecPtr.xrecoff == 0, then the current - * xact hasn't yet inserted any transaction-controlled XLOG records. - * - * Note that XLOG records inserted outside transaction control are not - * reflected into MyLastRecPtr. They do, however, cause MyXactMadeXLogEntry - * to be set true. The latter can be used to test whether the current xact - * made any loggable changes (including out-of-xact changes, such as - * sequence updates). - * - * When we insert/update/delete a tuple in a temporary relation, we do not - * make any XLOG record, since we don't care about recovering the state of - * the temp rel after a crash. However, we will still need to remember - * whether our transaction committed or aborted in that case. So, we must - * set MyXactMadeTempRelUpdate true to indicate that the XID will be of - * interest later. - */ -XLogRecPtr MyLastRecPtr = {0, 0}; - -bool MyXactMadeXLogEntry = false; - -bool MyXactMadeTempRelUpdate = false; - /* * ProcLastRecPtr points to the start of the last XLOG record inserted by the - * current backend. It is updated for all inserts, transaction-controlled - * or not. ProcLastRecEnd is similar but points to end+1 of last record. + * current backend. It is updated for all inserts. XactLastRecEnd points to + * end+1 of the last record, and is reset when we end a top-level transaction, + * or start a new one; so it can be used to tell if the current transaction has + * created any XLOG records. */ static XLogRecPtr ProcLastRecPtr = {0, 0}; -XLogRecPtr ProcLastRecEnd = {0, 0}; +XLogRecPtr XactLastRecEnd = {0, 0}; /* * RedoRecPtr is this backend's local copy of the REDO record pointer @@ -488,15 +466,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) bool updrqst; bool doPageWrites; bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH); - bool no_tran = (rmid == RM_XLOG_ID); + /* info's high bits are reserved for use by me */ if (info & XLR_INFO_MASK) - { - if ((info & XLR_INFO_MASK) != XLOG_NO_TRAN) - elog(PANIC, "invalid xlog info mask %02X", (info & XLR_INFO_MASK)); - no_tran = true; - info &= ~XLR_INFO_MASK; - } + elog(PANIC, "invalid xlog info mask %02X", info); /* * In bootstrap mode, we don't actually log anything but XLOG resources; @@ -856,11 +829,8 @@ begin:; #endif /* Record begin of record in appropriate places */ - if (!no_tran) - MyLastRecPtr = RecPtr; ProcLastRecPtr = RecPtr; Insert->PrevRecord = RecPtr; - MyXactMadeXLogEntry = true; Insert->currpos += SizeOfXLogRecord; freespace -= SizeOfXLogRecord; @@ -1018,7 +988,7 @@ begin:; SpinLockRelease(&xlogctl->info_lck); } - ProcLastRecEnd = RecPtr; + XactLastRecEnd = RecPtr; END_CRIT_SECTION(); diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 7473524865947158df8295d8ac824c00b64dba18..30ea87d5b7ac62b870b889b52183d86d9fa07800 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -3,7 +3,7 @@ * * Copyright (c) 1996-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.41 2007/08/25 17:47:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.42 2007/09/05 18:10:47 tgl Exp $ */ CREATE VIEW pg_roles AS @@ -145,8 +145,8 @@ CREATE VIEW pg_locks AS SELECT * FROM pg_lock_status() AS L (locktype text, database oid, relation oid, page int4, tuple int2, - transactionid xid, classid oid, objid oid, objsubid int2, - transaction xid, pid int4, mode text, granted boolean); + virtualxid text, transactionid xid, classid oid, objid oid, objsubid int2, + virtualtransaction text, pid int4, mode text, granted boolean); CREATE VIEW pg_cursors AS SELECT C.name, C.statement, C.is_holdable, C.is_binary, diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index d79e73f59d8c29c4afb5f86517d34041bb3e219f..ac56b583f17d8726986fba0975bd696e16e308b8 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.162 2007/08/25 19:08:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.163 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,7 @@ #include "parser/parse_expr.h" #include "parser/parse_func.h" #include "parser/parsetree.h" +#include "storage/procarray.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -126,9 +127,8 @@ DefineIndex(RangeVar *heapRelation, int16 *coloptions; IndexInfo *indexInfo; int numberOfAttributes; - List *old_xact_list; - ListCell *lc; - uint32 ixcnt; + VirtualTransactionId *old_lockholders; + VirtualTransactionId *old_snapshots; LockRelId heaprelid; LOCKTAG heaplocktag; Snapshot snapshot; @@ -484,24 +484,36 @@ DefineIndex(RangeVar *heapRelation, * xacts that open the table for writing after this point; they will see * the new index when they open it. * + * Note: the reason we use actual lock acquisition here, rather than + * just checking the ProcArray and sleeping, is that deadlock is possible + * if one of the transactions in question is blocked trying to acquire + * an exclusive lock on our table. The lock code will detect deadlock + * and error out properly. + * * Note: GetLockConflicts() never reports our own xid, hence we need not - * check for that. + * check for that. Also, prepared xacts are not reported, which is + * fine since they certainly aren't going to do anything more. */ SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId); - old_xact_list = GetLockConflicts(&heaplocktag, ShareLock); + old_lockholders = GetLockConflicts(&heaplocktag, ShareLock); - foreach(lc, old_xact_list) + while (VirtualTransactionIdIsValid(*old_lockholders)) { - TransactionId xid = lfirst_xid(lc); - - XactLockTableWait(xid); + VirtualXactLockTableWait(*old_lockholders); + old_lockholders++; } /* * Now take the "reference snapshot" that will be used by validate_index() - * to filter candidate tuples. All other transactions running at this - * time will have to be out-waited before we can commit, because we can't - * guarantee that tuples deleted just before this will be in the index. + * to filter candidate tuples. Beware! There might be still snapshots + * in use that treat some transaction as in-progress that our reference + * snapshot treats as committed. If such a recently-committed transaction + * deleted tuples in the table, we will not include them in the index; yet + * those transactions which see the deleting one as still-in-progress will + * expect them to be there once we mark the index as valid. + * + * We solve this by waiting for all endangered transactions to exit before + * we mark the index as valid. * * We also set ActiveSnapshot to this snap, since functions in indexes may * need a snapshot. @@ -518,14 +530,21 @@ DefineIndex(RangeVar *heapRelation, * The index is now valid in the sense that it contains all currently * interesting tuples. But since it might not contain tuples deleted just * before the reference snap was taken, we have to wait out any - * transactions older than the reference snap. We can do this by waiting - * for each xact explicitly listed in the snap. + * transactions that might have older snapshots. Obtain a list of + * VXIDs of such transactions, and wait for them individually. * - * Note: GetSnapshotData() never stores our own xid into a snap, hence we - * need not check for that. + * We can exclude any running transactions that have xmin >= the xmax of + * our reference snapshot, since they are clearly not interested in any + * missing older tuples. Also, GetCurrentVirtualXIDs never reports our + * own vxid, so we need not check for that. */ - for (ixcnt = 0; ixcnt < snapshot->xcnt; ixcnt++) - XactLockTableWait(snapshot->xip[ixcnt]); + old_snapshots = GetCurrentVirtualXIDs(ActiveSnapshot->xmax); + + while (VirtualTransactionIdIsValid(*old_snapshots)) + { + VirtualXactLockTableWait(*old_snapshots); + old_snapshots++; + } /* * Index can now be marked valid -- update its pg_index entry diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index bd06bfb5da3f63fef607f90ec1cb2f7a20284024..cb2a1380caf20f1e467fbb61adc4d852d64e2136 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.143 2007/02/01 19:10:26 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.144 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,6 +25,7 @@ #include "commands/tablecmds.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "storage/proc.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -63,7 +64,7 @@ typedef struct SeqTableData { struct SeqTableData *next; /* link to next SeqTable object */ Oid relid; /* pg_class OID of this sequence */ - TransactionId xid; /* xact in which we last did a seq op */ + LocalTransactionId lxid; /* xact in which we last did a seq op */ int64 last; /* value last returned by nextval */ int64 cached; /* last value already cached for nextval */ /* if last != cached, we have not used up all the cached values */ @@ -282,7 +283,7 @@ DefineSequence(CreateSeqStmt *seq) rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); @@ -366,7 +367,7 @@ AlterSequence(AlterSeqStmt *stmt) rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); @@ -594,7 +595,7 @@ nextval_internal(Oid relid) rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); @@ -764,7 +765,7 @@ do_setval(Oid relid, int64 next, bool iscalled) rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); @@ -825,10 +826,10 @@ setval3_oid(PG_FUNCTION_ARGS) static Relation open_share_lock(SeqTable seq) { - TransactionId thisxid = GetTopTransactionId(); + LocalTransactionId thislxid = MyProc->lxid; /* Get the lock if not already held in this xact */ - if (seq->xid != thisxid) + if (seq->lxid != thislxid) { ResourceOwner currentOwner; @@ -848,7 +849,7 @@ open_share_lock(SeqTable seq) CurrentResourceOwner = currentOwner; /* Flag that we have a lock in the current xact */ - seq->xid = thisxid; + seq->lxid = thislxid; } /* We now know we have AccessShareLock, and can safely open the rel */ @@ -891,7 +892,7 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel) (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); elm->relid = relid; - elm->xid = InvalidTransactionId; + elm->lxid = InvalidLocalTransactionId; /* increment is set to 0 until we do read_info (see currval) */ elm->last = elm->cached = elm->increment = 0; elm->next = seqtab; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 358e9a5ad99839eaedd40e5ff8a4e4f425932349..87cf57daec3e298053d9f2faa23593fe84900d25 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.355 2007/08/13 19:08:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.356 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2601,14 +2601,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } - else - { - /* - * No XLOG record, but still need to flag that XID exists on - * disk - */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); @@ -2761,13 +2753,6 @@ move_chain_tuple(Relation rel, PageSetLSN(dst_page, recptr); PageSetTLI(dst_page, ThisTimeLineID); } - else - { - /* - * No XLOG record, but still need to flag that XID exists on disk - */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); @@ -2868,13 +2853,6 @@ move_plain_tuple(Relation rel, PageSetLSN(dst_page, recptr); PageSetTLI(dst_page, ThisTimeLineID); } - else - { - /* - * No XLOG record, but still need to flag that XID exists on disk - */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); @@ -3070,11 +3048,6 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage) PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } - else - { - /* No XLOG record, but still need to flag that XID exists on disk */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); } diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 3ac097388b2ef42674a344c1c97fbdac089448a8..ecc0ee78074cc3b863cc92796f8b606b7f2010cb 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -36,7 +36,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.90 2007/05/30 20:11:57 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.91 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -658,11 +658,6 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } - else - { - /* No XLOG record, but still need to flag that XID exists on disk */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 51da9679f35a7874512e4e706574e368188cfaff..577f73a31f199ec70b7c924d8b013c1eae87d784 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -23,7 +23,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.28 2007/07/01 02:22:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.29 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -404,7 +404,7 @@ TransactionIdIsActive(TransactionId xid) * This is also used to determine where to truncate pg_subtrans. allDbs * must be TRUE for that case, and ignoreVacuum FALSE. * - * Note: we include the currently running xids in the set of considered xids. + * Note: we include all currently running xids in the set of considered xids. * This ensures that if a just-started xact has not yet set its snapshot, * when it does set the snapshot it cannot set xmin less than what we compute. */ @@ -416,15 +416,19 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) int index; /* - * Normally we start the min() calculation with our own XID. But if - * called by checkpointer, we will not be inside a transaction, so use - * next XID as starting point for min() calculation. (Note that if there - * are no xacts running at all, that will be the subtrans truncation - * point!) + * We need to initialize the MIN() calculation with something. + * ReadNewTransactionId() is guaranteed to work, but is relatively + * expensive due to locking; so first we try a couple of shortcuts. + * If we have a valid xmin in our own PGPROC entry, that will do; + * or if we have assigned ourselves an XID, that will do. */ - result = GetTopTransactionId(); + result = MyProc ? MyProc->xmin : InvalidTransactionId; if (!TransactionIdIsValid(result)) - result = ReadNewTransactionId(); + { + result = GetTopTransactionIdIfAny(); + if (!TransactionIdIsValid(result)) + result = ReadNewTransactionId(); + } LWLockAcquire(ProcArrayLock, LW_SHARED); @@ -440,23 +444,22 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) /* Fetch xid just once - see GetNewTransactionId */ TransactionId xid = proc->xid; - if (TransactionIdIsNormal(xid)) - { - /* First consider the transaction own's Xid */ - if (TransactionIdPrecedes(xid, result)) - result = xid; - - /* - * Also consider the transaction's Xmin, if set. - * - * We must check both Xid and Xmin because there is a window - * where an xact's Xid is set but Xmin isn't yet. - */ - xid = proc->xmin; - if (TransactionIdIsNormal(xid)) - if (TransactionIdPrecedes(xid, result)) - result = xid; - } + /* First consider the transaction's own Xid, if any */ + if (TransactionIdIsNormal(xid) && + TransactionIdPrecedes(xid, result)) + result = xid; + + /* + * Also consider the transaction's Xmin, if set. + * + * We must check both Xid and Xmin because a transaction might + * have an Xmin but not (yet) an Xid; conversely, if it has + * an Xid, that could determine some not-yet-set Xmin. + */ + xid = proc->xmin; /* Fetch just once */ + if (TransactionIdIsNormal(xid) && + TransactionIdPrecedes(xid, result)) + result = xid; } } @@ -545,8 +548,6 @@ GetSnapshotData(Snapshot snapshot, bool serializable) errmsg("out of memory"))); } - globalxmin = xmin = GetTopTransactionId(); - /* * It is sufficient to get shared lock on ProcArrayLock, even if we are * computing a serializable snapshot and therefore will be setting @@ -557,6 +558,19 @@ GetSnapshotData(Snapshot snapshot, bool serializable) * discussion just below). So it doesn't matter whether another backend * concurrently doing GetSnapshotData or GetOldestXmin sees our xmin as * set or not; he'd compute the same xmin for himself either way. + * (We are assuming here that xmin can be set and read atomically, + * just like xid.) + * + * There is a corner case in which the above argument doesn't work: if + * there isn't any oldest xact, ie, all xids in the array are invalid. + * In that case we will compute xmin as the result of ReadNewTransactionId, + * and since GetNewTransactionId doesn't take the ProcArrayLock, it's not + * so obvious that two backends with overlapping shared locks will get + * the same answer. But GetNewTransactionId is required to store the XID + * it assigned into the ProcArray before releasing XidGenLock. Therefore + * the backend that did ReadNewTransactionId later will see that XID in + * the array, and will compute the same xmin as the earlier one that saw + * no XIDs in the array. */ LWLockAcquire(ProcArrayLock, LW_SHARED); @@ -589,6 +603,9 @@ GetSnapshotData(Snapshot snapshot, bool serializable) xmax = ReadNewTransactionId(); + /* initialize xmin calculation with xmax */ + globalxmin = xmin = xmax; + /* * Spin over procArray checking xid, xmin, and subxids. The goal is * to gather all active xids, find the lowest xmin, and try to record @@ -597,34 +614,40 @@ GetSnapshotData(Snapshot snapshot, bool serializable) for (index = 0; index < arrayP->numProcs; index++) { PGPROC *proc = arrayP->procs[index]; + TransactionId xid; + + /* Ignore procs running LAZY VACUUM */ + if (proc->inVacuum) + continue; + + /* Update globalxmin to be the smallest valid xmin */ + xid = proc->xmin; /* fetch just once */ + if (TransactionIdIsNormal(xid) && + TransactionIdPrecedes(xid, globalxmin)) + globalxmin = xid; /* Fetch xid just once - see GetNewTransactionId */ - TransactionId xid = proc->xid; + xid = proc->xid; /* - * Ignore my own proc (dealt with my xid above), procs not running a - * transaction, xacts started since we read the next transaction ID, - * and xacts executing LAZY VACUUM. There's no need to store XIDs - * above what we got from ReadNewTransactionId, since we'll treat them - * as running anyway. We also assume that such xacts can't compute an - * xmin older than ours, so they needn't be considered in computing - * globalxmin. + * If the transaction has been assigned an xid < xmax we add it to the + * snapshot, and update xmin if necessary. There's no need to store + * XIDs above what we got from ReadNewTransactionId, since we'll treat + * them as running anyway. We don't bother to examine their subxids + * either. + * + * We don't include our own XID (if any) in the snapshot, but we must + * include it into xmin. */ - if (proc == MyProc || - !TransactionIdIsNormal(xid) || - TransactionIdFollowsOrEquals(xid, xmax) || - proc->inVacuum) - continue; - - if (TransactionIdPrecedes(xid, xmin)) - xmin = xid; - snapshot->xip[count++] = xid; - - /* Update globalxmin to be the smallest valid xmin */ - xid = proc->xmin; if (TransactionIdIsNormal(xid)) - if (TransactionIdPrecedes(xid, globalxmin)) - globalxmin = xid; + { + if (TransactionIdFollowsOrEquals(xid, xmax)) + continue; + if (proc != MyProc) + snapshot->xip[count++] = xid; + if (TransactionIdPrecedes(xid, xmin)) + xmin = xid; + } /* * Save subtransaction XIDs if possible (if we've already overflowed, @@ -635,8 +658,10 @@ GetSnapshotData(Snapshot snapshot, bool serializable) * remove any. Hence it's important to fetch nxids just once. Should * be safe to use memcpy, though. (We needn't worry about missing any * xids added concurrently, because they must postdate xmax.) + * + * Again, our own XIDs are not included in the snapshot. */ - if (subcount >= 0) + if (subcount >= 0 && proc != MyProc) { if (proc->subxids.overflowed) subcount = -1; /* overflowed */ @@ -818,6 +843,9 @@ BackendPidGetProc(int pid) * * Only main transaction Ids are considered. This function is mainly * useful for determining what backend owns a lock. + * + * Beware that not every xact has an XID assigned. However, as long as you + * only call this using an XID found on disk, you're safe. */ int BackendXidGetPid(TransactionId xid) @@ -856,6 +884,63 @@ IsBackendPid(int pid) return (BackendPidGetProc(pid) != NULL); } + +/* + * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs. + * + * The array is palloc'd and is terminated with an invalid VXID. + * + * If limitXmin is not InvalidTransactionId, we skip any backends + * with xmin >= limitXmin. Also, our own process is always skipped. + */ +VirtualTransactionId * +GetCurrentVirtualXIDs(TransactionId limitXmin) +{ + VirtualTransactionId *vxids; + ProcArrayStruct *arrayP = procArray; + int count = 0; + int index; + + /* allocate result space with room for a terminator */ + vxids = (VirtualTransactionId *) + palloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1)); + + LWLockAcquire(ProcArrayLock, LW_SHARED); + + for (index = 0; index < arrayP->numProcs; index++) + { + PGPROC *proc = arrayP->procs[index]; + /* Fetch xmin just once - might change on us? */ + TransactionId pxmin = proc->xmin; + + if (proc == MyProc) + continue; + + /* + * Note that InvalidTransactionId precedes all other XIDs, so a + * proc that hasn't set xmin yet will always be included. + */ + if (!TransactionIdIsValid(limitXmin) || + TransactionIdPrecedes(pxmin, limitXmin)) + { + VirtualTransactionId vxid; + + GET_VXID_FROM_PGPROC(vxid, *proc); + if (VirtualTransactionIdIsValid(vxid)) + vxids[count++] = vxid; + } + } + + LWLockRelease(ProcArrayLock); + + /* add the terminator */ + vxids[count].backendId = InvalidBackendId; + vxids[count].localTransactionId = InvalidLocalTransactionId; + + return vxids; +} + + /* * CountActiveBackends --- count backends (other than myself) that are in * active transactions. This is used as a heuristic to decide if @@ -885,7 +970,7 @@ CountActiveBackends(void) if (proc->pid == 0) continue; /* do not count prepared xacts */ if (proc->xid == InvalidTransactionId) - continue; /* do not count if not in a transaction */ + continue; /* do not count if no XID assigned */ if (proc->waitLock != NULL) continue; /* do not count if blocked on a lock */ count++; diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 31c4a2dfad1158d0aeee123e9d744da51a6edeab..99690d8b36b23ea9be9ef2ced0f8db936d428b2b 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.63 2007/01/05 22:19:38 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.64 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,12 +19,15 @@ #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" +#include "storage/proc.h" #include "storage/shmem.h" #include "storage/sinvaladt.h" SISeg *shmInvalBuffer; +static LocalTransactionId nextLocalTransactionId; + static void CleanupInvalidationState(int status, Datum arg); static void SISetProcStateInvalid(SISeg *segP); @@ -40,6 +43,8 @@ SInvalShmemSize(void) size = offsetof(SISeg, procState); size = add_size(size, mul_size(sizeof(ProcState), MaxBackends)); + size = add_size(size, mul_size(sizeof(LocalTransactionId), MaxBackends)); + return size; } @@ -51,15 +56,21 @@ void SIBufferInit(void) { SISeg *segP; + Size size; int i; bool found; /* Allocate space in shared memory */ + size = offsetof(SISeg, procState); + size = add_size(size, mul_size(sizeof(ProcState), MaxBackends)); + shmInvalBuffer = segP = (SISeg *) - ShmemInitStruct("shmInvalBuffer", SInvalShmemSize(), &found); + ShmemInitStruct("shmInvalBuffer", size, &found); if (found) return; + segP->nextLXID = ShmemAlloc(sizeof(LocalTransactionId) * MaxBackends); + /* Clear message counters, save size of procState array */ segP->minMsgNum = 0; segP->maxMsgNum = 0; @@ -69,11 +80,12 @@ SIBufferInit(void) /* The buffer[] array is initially all unused, so we need not fill it */ - /* Mark all backends inactive */ + /* Mark all backends inactive, and initialize nextLXID */ for (i = 0; i < segP->maxBackends; i++) { segP->procState[i].nextMsgNum = -1; /* inactive */ segP->procState[i].resetState = false; + segP->nextLXID[i] = InvalidLocalTransactionId; } } @@ -128,9 +140,15 @@ SIBackendInit(SISeg *segP) elog(DEBUG2, "my backend id is %d", MyBackendId); #endif /* INVALIDDEBUG */ + /* Advertise assigned backend ID in MyProc */ + MyProc->backendId = MyBackendId; + /* Reduce free slot count */ segP->freeBackends--; + /* Fetch next local transaction ID into local memory */ + nextLocalTransactionId = segP->nextLXID[MyBackendId - 1]; + /* mark myself active, with all extant messages already read */ stateP->nextMsgNum = segP->maxMsgNum; stateP->resetState = false; @@ -160,6 +178,9 @@ CleanupInvalidationState(int status, Datum arg) LWLockAcquire(SInvalLock, LW_EXCLUSIVE); + /* Update next local transaction ID for next holder of this backendID */ + segP->nextLXID[MyBackendId - 1] = nextLocalTransactionId; + /* Mark myself inactive */ segP->procState[MyBackendId - 1].nextMsgNum = -1; segP->procState[MyBackendId - 1].resetState = false; @@ -352,3 +373,30 @@ SIDelExpiredDataEntries(SISeg *segP) } } } + + +/* + * GetNextLocalTransactionId --- allocate a new LocalTransactionId + * + * We split VirtualTransactionIds into two parts so that it is possible + * to allocate a new one without any contention for shared memory, except + * for a bit of additional overhead during backend startup/shutdown. + * The high-order part of a VirtualTransactionId is a BackendId, and the + * low-order part is a LocalTransactionId, which we assign from a local + * counter. To avoid the risk of a VirtualTransactionId being reused + * within a short interval, successive procs occupying the same backend ID + * slot should use a consecutive sequence of local IDs, which is implemented + * by copying nextLocalTransactionId as seen above. + */ +LocalTransactionId +GetNextLocalTransactionId(void) +{ + LocalTransactionId result; + + /* loop to avoid returning InvalidLocalTransactionId at wraparound */ + do { + result = nextLocalTransactionId++; + } while (!LocalTransactionIdIsValid(result)); + + return result; +} diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 1c5db363203345ffd4beb7c6113577d411718dac..f947d226fea1c318e335363c872669aecc4ce581 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.92 2007/07/25 22:16:18 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.93 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -421,8 +421,8 @@ UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode) * XactLockTableInsert * * Insert a lock showing that the given transaction ID is running --- - * this is done during xact startup. The lock can then be used to wait - * for the transaction to finish. + * this is done when an XID is acquired by a transaction or subtransaction. + * The lock can then be used to wait for the transaction to finish. */ void XactLockTableInsert(TransactionId xid) @@ -439,8 +439,7 @@ XactLockTableInsert(TransactionId xid) * * Delete the lock showing that the given transaction ID is running. * (This is never used for main transaction IDs; those locks are only - * released implicitly at transaction end. But we do use it for subtrans - * IDs.) + * released implicitly at transaction end. But we do use it for subtrans IDs.) */ void XactLockTableDelete(TransactionId xid) @@ -472,7 +471,7 @@ XactLockTableWait(TransactionId xid) for (;;) { Assert(TransactionIdIsValid(xid)); - Assert(!TransactionIdEquals(xid, GetTopTransactionId())); + Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny())); SET_LOCKTAG_TRANSACTION(tag, xid); @@ -500,7 +499,7 @@ ConditionalXactLockTableWait(TransactionId xid) for (;;) { Assert(TransactionIdIsValid(xid)); - Assert(!TransactionIdEquals(xid, GetTopTransactionId())); + Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny())); SET_LOCKTAG_TRANSACTION(tag, xid); @@ -517,6 +516,70 @@ ConditionalXactLockTableWait(TransactionId xid) return true; } + +/* + * VirtualXactLockTableInsert + * + * Insert a lock showing that the given virtual transaction ID is running --- + * this is done at main transaction start when its VXID is assigned. + * The lock can then be used to wait for the transaction to finish. + */ +void +VirtualXactLockTableInsert(VirtualTransactionId vxid) +{ + LOCKTAG tag; + + Assert(VirtualTransactionIdIsValid(vxid)); + + SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid); + + (void) LockAcquire(&tag, ExclusiveLock, false, false); +} + +/* + * VirtualXactLockTableWait + * + * Waits until the lock on the given VXID is released, which shows that + * the top-level transaction owning the VXID has ended. + */ +void +VirtualXactLockTableWait(VirtualTransactionId vxid) +{ + LOCKTAG tag; + + Assert(VirtualTransactionIdIsValid(vxid)); + + SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid); + + (void) LockAcquire(&tag, ShareLock, false, false); + + LockRelease(&tag, ShareLock, false); +} + +/* + * ConditionalVirtualXactLockTableWait + * + * As above, but only lock if we can get the lock without blocking. + * Returns TRUE if the lock was acquired. + */ +bool +ConditionalVirtualXactLockTableWait(VirtualTransactionId vxid) +{ + LOCKTAG tag; + + Assert(VirtualTransactionIdIsValid(vxid)); + + SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid); + + if (LockAcquire(&tag, ShareLock, false, true) == LOCKACQUIRE_NOT_AVAIL) + return false; + + LockRelease(&tag, ShareLock, false); + + return true; +} + + /* * LockDatabaseObject * diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index a4a0910d393026cc27156be489dc4e8c719a1aa6..06a4f7adae5ba21d429316783921bd73e9404b8d 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.177 2007/07/16 21:09:50 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.178 2007/09/05 18:10:47 tgl Exp $ * * NOTES * A lock table is a shared memory hash table. When @@ -1681,20 +1681,24 @@ LockReassignCurrentOwner(void) /* * GetLockConflicts - * Get a list of TransactionIds of xacts currently holding locks + * Get an array of VirtualTransactionIds of xacts currently holding locks * that would conflict with the specified lock/lockmode. * xacts merely awaiting such a lock are NOT reported. * + * The result array is palloc'd and is terminated with an invalid VXID. + * * Of course, the result could be out of date by the time it's returned, * so use of this function has to be thought about carefully. * - * Only top-level XIDs are reported. Note we never include the current xact - * in the result list, since an xact never blocks itself. + * Note we never include the current xact's vxid in the result array, + * since an xact never blocks itself. Also, prepared transactions are + * ignored, which is a bit more debatable but is appropriate for current + * uses of the result. */ -List * +VirtualTransactionId * GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) { - List *result = NIL; + VirtualTransactionId *vxids; LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; LockMethod lockMethodTable; LOCK *lock; @@ -1703,6 +1707,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) PROCLOCK *proclock; uint32 hashcode; LWLockId partitionLock; + int count = 0; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); @@ -1710,6 +1715,14 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes) elog(ERROR, "unrecognized lock mode: %d", lockmode); + /* + * Allocate memory to store results, and fill with InvalidVXID. We + * only need enough space for MaxBackends + a terminator, since + * prepared xacts don't count. + */ + vxids = (VirtualTransactionId *) + palloc0(sizeof(VirtualTransactionId) * (MaxBackends + 1)); + /* * Look up the lock object matching the tag. */ @@ -1730,7 +1743,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) * on this lockable object. */ LWLockRelease(partitionLock); - return NIL; + return vxids; } /* @@ -1752,18 +1765,17 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) /* A backend never blocks itself */ if (proc != MyProc) { - /* Fetch xid just once - see GetNewTransactionId */ - TransactionId xid = proc->xid; + VirtualTransactionId vxid; + + GET_VXID_FROM_PGPROC(vxid, *proc); /* - * Race condition: during xact commit/abort we zero out - * PGPROC's xid before we mark its locks released. If we see - * zero in the xid field, assume the xact is in process of - * shutting down and act as though the lock is already - * released. + * If we see an invalid VXID, then either the xact has already + * committed (or aborted), or it's a prepared xact. In + * either case we may ignore it. */ - if (TransactionIdIsValid(xid)) - result = lappend_xid(result, xid); + if (VirtualTransactionIdIsValid(vxid)) + vxids[count++] = vxid; } } @@ -1773,7 +1785,10 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) LWLockRelease(partitionLock); - return result; + if (count > MaxBackends) /* should never happen */ + elog(PANIC, "too many conflicting locks found"); + + return vxids; } @@ -1782,7 +1797,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) * Do the preparatory work for a PREPARE: make 2PC state file records * for all locks currently held. * - * Non-transactional locks are ignored. + * Non-transactional locks are ignored, as are VXID locks. * * There are some special cases that we error out on: we can't be holding * any session locks (should be OK since only VACUUM uses those) and we @@ -1812,6 +1827,13 @@ AtPrepare_Locks(void) if (!LockMethods[LOCALLOCK_LOCKMETHOD(*locallock)]->transactional) continue; + /* + * Ignore VXID locks. We don't want those to be held by prepared + * transactions, since they aren't meaningful after a restart. + */ + if (locallock->tag.lock.locktag_type == LOCKTAG_VIRTUALTRANSACTION) + continue; + /* Ignore it if we don't actually hold the lock */ if (locallock->nLocks <= 0) continue; @@ -1899,6 +1921,10 @@ PostPrepare_Locks(TransactionId xid) if (!LockMethods[LOCALLOCK_LOCKMETHOD(*locallock)]->transactional) continue; + /* Ignore VXID locks */ + if (locallock->tag.lock.locktag_type == LOCKTAG_VIRTUALTRANSACTION) + continue; + /* We already checked there are no session locks */ /* Mark the proclock to show we need to release this lockmode */ @@ -1944,6 +1970,10 @@ PostPrepare_Locks(TransactionId xid) if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional) goto next_item; + /* Ignore VXID locks */ + if (lock->tag.locktag_type == LOCKTAG_VIRTUALTRANSACTION) + goto next_item; + PROCLOCK_PRINT("PostPrepare_Locks", proclock); LOCK_PRINT("PostPrepare_Locks", lock, 0); Assert(lock->nRequested >= 0); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 048fa31bccd9e44f86a9f7f82c33f4ece824650b..5441dd322de7925150ff8e5d13153800daedd550 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.192 2007/08/28 03:23:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.193 2007/09/05 18:10:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -282,10 +282,12 @@ InitProcess(void) */ SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; + MyProc->lxid = InvalidLocalTransactionId; MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; MyProc->pid = MyProcPid; - /* databaseId and roleId will be filled in later */ + /* backendId, databaseId and roleId will be filled in later */ + MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyProc->inCommit = false; @@ -359,7 +361,9 @@ InitProcessPhase2(void) * * Auxiliary processes are presently not expected to wait for real (lockmgr) * locks, so we need not set up the deadlock checker. They are never added - * to the ProcArray or the sinval messaging mechanism, either. + * to the ProcArray or the sinval messaging mechanism, either. They also + * don't get a VXID assigned, since this is only useful when we actually + * hold lockmgr locks. */ void InitAuxiliaryProcess(void) @@ -418,8 +422,10 @@ InitAuxiliaryProcess(void) */ SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; + MyProc->lxid = InvalidLocalTransactionId; MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; + MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyProc->inCommit = false; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7137d2dc08c854f432e4182554f26f9500a8c509..22ac13146c8fb1b00d2fd1ea53873d01f2b04200 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.105 2007/07/20 16:29:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.106 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -347,9 +347,8 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) return; /* - * Make a non-transactional XLOG entry showing the file creation. It's - * non-transactional because we should replay it whether the transaction - * commits or not; if not, the file will be dropped at abort time. + * Make an XLOG entry showing the file creation. If we abort, the file + * will be dropped at abort time. */ xlrec.rnode = reln->smgr_rnode; @@ -358,7 +357,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) rdata.buffer = InvalidBuffer; rdata.next = NULL; - lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLOG_NO_TRAN, &rdata); + lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata); /* Add the relation to the list of stuff to delete at abort */ pending = (PendingRelDelete *) @@ -554,10 +553,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) if (!isTemp) { /* - * Make a non-transactional XLOG entry showing the file truncation. - * It's non-transactional because we should replay it whether the - * transaction commits or not; the underlying file change is certainly - * not reversible. + * Make an XLOG entry showing the file truncation. */ XLogRecPtr lsn; XLogRecData rdata; @@ -571,8 +567,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) rdata.buffer = InvalidBuffer; rdata.next = NULL; - lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLOG_NO_TRAN, - &rdata); + lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata); } } @@ -679,11 +674,14 @@ smgrDoPendingDeletes(bool isCommit) * *ptr is set to point to a freshly-palloc'd array of RelFileNodes. * If there are no relations to be deleted, *ptr is set to NULL. * + * If haveNonTemp isn't NULL, the bool it points to gets set to true if + * there is any non-temp table pending to be deleted; false if not. + * * Note that the list does not include anything scheduled for termination * by upper-level transactions. */ int -smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr) +smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) { int nestLevel = GetCurrentTransactionNestLevel(); int nrels; @@ -691,6 +689,8 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr) PendingRelDelete *pending; nrels = 0; + if (haveNonTemp) + *haveNonTemp = false; for (pending = pendingDeletes; pending != NULL; pending = pending->next) { if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit) @@ -707,6 +707,8 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr) { if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit) *rptr++ = pending->relnode; + if (haveNonTemp && !pending->isTemp) + *haveNonTemp = true; } return nrels; } diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 2263a946039b7c3a47b211be415d23bbce36f811..e78d74f9efe0459865241c57f201ec950bf7a220 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -6,7 +6,7 @@ * Copyright (c) 2002-2007, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/lockfuncs.c,v 1.28 2007/01/05 22:19:41 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/lockfuncs.c,v 1.29 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,6 +27,7 @@ static const char *const LockTagTypeNames[] = { "page", "tuple", "transactionid", + "virtualxid", "object", "userlock", "advisory" @@ -39,6 +40,27 @@ typedef struct int currIdx; /* current PROCLOCK index */ } PG_Lock_Status; + +/* + * VXIDGetDatum - Construct a text representation of a VXID + * + * This is currently only used in pg_lock_status, so we put it here. + */ +static Datum +VXIDGetDatum(BackendId bid, LocalTransactionId lxid) +{ + /* + * The representation is "<bid>/<lxid>", decimal and unsigned decimal + * respectively. Note that elog.c also knows how to format a vxid. + */ + char vxidstr[32]; + + snprintf(vxidstr, sizeof(vxidstr), "%d/%u", bid, lxid); + + return DirectFunctionCall1(textin, CStringGetDatum(vxidstr)); +} + + /* * pg_lock_status - produce a view with one row per held or awaited lock mode */ @@ -64,7 +86,7 @@ pg_lock_status(PG_FUNCTION_ARGS) /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ - tupdesc = CreateTemplateTupleDesc(13, false); + tupdesc = CreateTemplateTupleDesc(14, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", @@ -75,21 +97,23 @@ pg_lock_status(PG_FUNCTION_ARGS) INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 6, "transactionid", + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid", XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 7, "classid", + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid", OIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 8, "objid", + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid", OIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objsubid", + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid", INT2OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 10, "transaction", - XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 11, "pid", + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid", INT4OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 12, "mode", + TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 13, "granted", + TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted", BOOLOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); @@ -120,8 +144,8 @@ pg_lock_status(PG_FUNCTION_ARGS) LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; - Datum values[13]; - char nulls[13]; + Datum values[14]; + char nulls[14]; HeapTuple tuple; Datum result; @@ -193,7 +217,6 @@ pg_lock_status(PG_FUNCTION_ARGS) values[0] = DirectFunctionCall1(textin, CStringGetDatum(locktypename)); - switch (lock->tag.locktag_type) { case LOCKTAG_RELATION: @@ -206,6 +229,7 @@ pg_lock_status(PG_FUNCTION_ARGS) nulls[6] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; + nulls[9] = 'n'; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); @@ -216,6 +240,7 @@ pg_lock_status(PG_FUNCTION_ARGS) nulls[6] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; + nulls[9] = 'n'; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); @@ -226,9 +251,22 @@ pg_lock_status(PG_FUNCTION_ARGS) nulls[6] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; + nulls[9] = 'n'; break; case LOCKTAG_TRANSACTION: - values[5] = TransactionIdGetDatum(lock->tag.locktag_field1); + values[6] = TransactionIdGetDatum(lock->tag.locktag_field1); + nulls[1] = 'n'; + nulls[2] = 'n'; + nulls[3] = 'n'; + nulls[4] = 'n'; + nulls[5] = 'n'; + nulls[7] = 'n'; + nulls[8] = 'n'; + nulls[9] = 'n'; + break; + case LOCKTAG_VIRTUALTRANSACTION: + values[5] = VXIDGetDatum(lock->tag.locktag_field1, + lock->tag.locktag_field2); nulls[1] = 'n'; nulls[2] = 'n'; nulls[3] = 'n'; @@ -236,31 +274,33 @@ pg_lock_status(PG_FUNCTION_ARGS) nulls[6] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; + nulls[9] = 'n'; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); - values[6] = ObjectIdGetDatum(lock->tag.locktag_field2); - values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); - values[8] = Int16GetDatum(lock->tag.locktag_field4); + values[7] = ObjectIdGetDatum(lock->tag.locktag_field2); + values[8] = ObjectIdGetDatum(lock->tag.locktag_field3); + values[9] = Int16GetDatum(lock->tag.locktag_field4); nulls[2] = 'n'; nulls[3] = 'n'; nulls[4] = 'n'; nulls[5] = 'n'; + nulls[6] = 'n'; break; } - values[9] = TransactionIdGetDatum(proc->xid); + values[10] = VXIDGetDatum(proc->backendId, proc->lxid); if (proc->pid != 0) - values[10] = Int32GetDatum(proc->pid); + values[11] = Int32GetDatum(proc->pid); else - nulls[10] = 'n'; - values[11] = DirectFunctionCall1(textin, + nulls[11] = 'n'; + values[12] = DirectFunctionCall1(textin, CStringGetDatum(GetLockmodeName(LOCK_LOCKMETHOD(*lock), mode))); - values[12] = BoolGetDatum(granted); + values[13] = BoolGetDatum(granted); tuple = heap_formtuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index d0d024e075a4078dd70c679b64272b2408d0d6d5..e8a3ed3db0e06b7a74d083fd88ff403f5f5ae617 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -42,7 +42,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.195 2007/08/23 01:24:43 adunstan Exp $ + * $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.196 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -66,6 +66,7 @@ #include "postmaster/postmaster.h" #include "postmaster/syslogger.h" #include "storage/ipc.h" +#include "storage/proc.h" #include "tcop/tcopprot.h" #include "utils/memutils.h" #include "utils/ps_status.h" @@ -1592,9 +1593,14 @@ log_line_prefix(StringInfo buf) if (MyProcPort == NULL) i = format_len; break; + case 'v': + /* keep VXID format in sync with lockfuncs.c */ + if (MyProc != NULL) + appendStringInfo(buf, "%d/%u", + MyProc->backendId, MyProc->lxid); + break; case 'x': - if (MyProcPort) - appendStringInfo(buf, "%u", GetTopTransactionId()); + appendStringInfo(buf, "%u", GetTopTransactionIdIfAny()); break; case '%': appendStringInfoChar(buf, '%'); @@ -1785,15 +1791,8 @@ write_csvlog(ErrorData *edata) appendStringInfoString(&buf, formatted_start_time); appendStringInfoChar(&buf, ','); - /* Transaction id */ - if (MyProcPort) - { - if (IsTransactionState()) - appendStringInfo(&buf, "%u", GetTopTransactionId()); - else - appendStringInfo(&buf, "%u", InvalidTransactionId); - } + appendStringInfo(&buf, "%u", GetTopTransactionIdIfAny()); appendStringInfoChar(&buf, ','); diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b22099c2fd7c88bf11586a9ee73fb0791adca5fe..7de3145aa01c59093d3efa3e1aaf98d720351bf7 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -338,7 +338,8 @@ # %c = session id # %l = session line number # %s = session start timestamp - # %x = transaction id + # %v = virtual transaction id + # %x = transaction id (0 if none) # %q = stop here in non-session # processes # %% = '%' diff --git a/src/include/access/xact.h b/src/include/access/xact.h index e8e2b08de42b018ddf2399127f7c6fe657a12dc6..731269af9a073dee29720812b89abb4d70367888 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.88 2007/08/01 22:45:09 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.89 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -139,6 +139,7 @@ typedef struct xl_xact_abort_prepared extern bool IsTransactionState(void); extern bool IsAbortedTransactionBlockState(void); extern TransactionId GetTopTransactionId(void); +extern TransactionId GetTopTransactionIdIfAny(void); extern TransactionId GetCurrentTransactionId(void); extern TransactionId GetCurrentTransactionIdIfAny(void); extern SubTransactionId GetCurrentSubTransactionId(void); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 2e1928dace06d5548ddcb643d484178574c02d47..372a43797a4f2f3347bd9b91c5b30a7276cd905c 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.82 2007/08/01 22:45:09 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.83 2007/09/05 18:10:48 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -85,12 +85,6 @@ typedef struct XLogRecord */ #define XLR_BKP_REMOVABLE 0x01 -/* - * Sometimes we log records which are out of transaction control. - * Rmgr may "or" XLOG_NO_TRAN into info passed to XLogInsert to indicate this. - */ -#define XLOG_NO_TRAN XLR_INFO_MASK - /* Sync methods */ #define SYNC_METHOD_FSYNC 0 #define SYNC_METHOD_FDATASYNC 1 @@ -139,10 +133,7 @@ typedef struct XLogRecData extern TimeLineID ThisTimeLineID; /* current TLI */ extern bool InRecovery; -extern XLogRecPtr MyLastRecPtr; -extern bool MyXactMadeXLogEntry; -extern bool MyXactMadeTempRelUpdate; -extern XLogRecPtr ProcLastRecEnd; +extern XLogRecPtr XactLastRecEnd; /* these variables are GUC parameters related to XLOG */ extern int CheckPointSegments; diff --git a/src/include/c.h b/src/include/c.h index 35e7bb9150fea278bb203a4df37585e30d00335e..d808609ab01786de4ddb6556a45a0ed2671affe1 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -12,7 +12,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/c.h,v 1.220 2007/07/25 12:22:52 mha Exp $ + * $PostgreSQL: pgsql/src/include/c.h,v 1.221 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -370,6 +370,8 @@ typedef regproc RegProcedure; typedef uint32 TransactionId; +typedef uint32 LocalTransactionId; + typedef uint32 SubTransactionId; #define InvalidSubTransactionId ((SubTransactionId) 0) diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index e229f161f945a9d68da7388cca2d1ce5af47fd7d..dcd9c90ecbbac2ed2ecb79d1934a5b680776a715 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.422 2007/09/04 16:41:42 adunstan Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.423 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200709041 +#define CATALOG_VERSION_NO 200709042 #endif diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 36474cd2781fce01139276d1df6c5662c768afe8..fedf6b1fffb3f36a98f1f0c7349509c04e426400 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.58 2007/06/19 20:13:22 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.59 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -55,6 +55,11 @@ extern void XactLockTableDelete(TransactionId xid); extern void XactLockTableWait(TransactionId xid); extern bool ConditionalXactLockTableWait(TransactionId xid); +/* Lock a VXID (used to wait for a transaction to finish) */ +extern void VirtualXactLockTableInsert(VirtualTransactionId vxid); +extern void VirtualXactLockTableWait(VirtualTransactionId vxid); +extern bool ConditionalVirtualXactLockTableWait(VirtualTransactionId vxid); + /* Lock a general object (other than a relation) of the current database */ extern void LockDatabaseObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index e2a5bc7b6f54e4bd94c521ae11182ec53a29d413..30c8a3fa2bc208f71a59f354651772082450fa29 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.106 2007/06/19 20:13:22 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.107 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -15,6 +15,7 @@ #define LOCK_H_ #include "nodes/pg_list.h" +#include "storage/backendid.h" #include "storage/itemptr.h" #include "storage/lwlock.h" #include "storage/shmem.h" @@ -41,6 +42,37 @@ extern bool Debug_deadlocks; #endif /* LOCK_DEBUG */ +/* + * Top-level transactions are identified by VirtualTransactionIDs comprising + * the BackendId of the backend running the xact, plus a locally-assigned + * LocalTransactionId. These are guaranteed unique over the short term, + * but will be reused after a database restart; hence they should never + * be stored on disk. + * + * Note that struct VirtualTransactionId can not be assumed to be atomically + * assignable as a whole. However, type LocalTransactionId is assumed to + * be atomically assignable, and the backend ID doesn't change often enough + * to be a problem, so we can fetch or assign the two fields separately. + * We deliberately refrain from using the struct within PGPROC, to prevent + * coding errors from trying to use struct assignment with it; instead use + * GET_VXID_FROM_PGPROC(). + */ +typedef struct +{ + BackendId backendId; /* determined at backend startup */ + LocalTransactionId localTransactionId; /* backend-local transaction id */ +} VirtualTransactionId; + +#define InvalidLocalTransactionId 0 +#define LocalTransactionIdIsValid(lxid) ((lxid) != InvalidLocalTransactionId) +#define VirtualTransactionIdIsValid(vxid) \ + (((vxid).backendId != InvalidBackendId) && \ + LocalTransactionIdIsValid((vxid).localTransactionId)) +#define GET_VXID_FROM_PGPROC(vxid, proc) \ + ((vxid).backendId = (proc).backendId, \ + (vxid).localTransactionId = (proc).lxid) + + /* * LOCKMODE is an integer (1..N) indicating a lock type. LOCKMASK is a bit * mask indicating a set of held or requested lock types (the bit 1<<mode @@ -139,6 +171,8 @@ typedef enum LockTagType /* ID info for a tuple is PAGE info + OffsetNumber */ LOCKTAG_TRANSACTION, /* transaction (for waiting for xact done) */ /* ID info for a transaction is its TransactionId */ + LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */ + /* ID info for a virtual transaction is its VirtualTransactionId */ LOCKTAG_OBJECT, /* non-relation database object */ /* ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID */ @@ -214,6 +248,14 @@ typedef struct LOCKTAG (locktag).locktag_type = LOCKTAG_TRANSACTION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) +#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \ + ((locktag).locktag_field1 = (vxid).backendId, \ + (locktag).locktag_field2 = (vxid).localTransactionId, \ + (locktag).locktag_field3 = 0, \ + (locktag).locktag_field4 = 0, \ + (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \ + (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) + #define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \ ((locktag).locktag_field1 = (dboid), \ (locktag).locktag_field2 = (classoid), \ @@ -431,7 +473,8 @@ extern bool LockRelease(const LOCKTAG *locktag, extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks); extern void LockReleaseCurrentOwner(void); extern void LockReassignCurrentOwner(void); -extern List *GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode); +extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag, + LOCKMODE lockmode); extern void AtPrepare_Locks(void); extern void PostPrepare_Locks(TransactionId xid); extern int LockCheckConflicts(LockMethod lockMethodTable, diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 756b0ffb0e7dd60e6dc8eb60bd860ecfd473e959..9fefa0a5a93e5bd2dfe4505caca7935e8d8cc222 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.99 2007/07/25 12:22:53 mha Exp $ + * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.100 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,8 +62,13 @@ struct PGPROC PGSemaphoreData sem; /* ONE semaphore to sleep on */ int waitStatus; /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */ - TransactionId xid; /* transaction currently being executed by - * this proc */ + LocalTransactionId lxid; /* local id of top-level transaction currently + * being executed by this proc, if running; + * else InvalidLocalTransactionId */ + + TransactionId xid; /* id of top-level transaction currently being + * executed by this proc, if running and XID + * is assigned; else InvalidTransactionId */ TransactionId xmin; /* minimal running XID as it was when we were * starting our xact, excluding LAZY VACUUM: @@ -71,6 +76,7 @@ struct PGPROC * xid >= xmin ! */ int pid; /* This backend's process id, or 0 */ + BackendId backendId; /* This backend's backend ID (if assigned) */ Oid databaseId; /* OID of database this backend is using */ Oid roleId; /* OID of role using this backend */ diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index dafb83a9658f9503d4de39ac1543592b5ad47f81..524710506a7dfc78ad1d21f8990b54a1b3249ccd 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.14 2007/06/01 19:38:07 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.15 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ extern PGPROC *BackendPidGetProc(int pid); extern int BackendXidGetPid(TransactionId xid); extern bool IsBackendPid(int pid); +extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin); extern int CountActiveBackends(void); extern int CountDBBackends(Oid databaseid); extern int CountUserBackends(Oid roleid); diff --git a/src/include/storage/sinvaladt.h b/src/include/storage/sinvaladt.h index 778d7a4a2ba77bddae8288753027842c1a6eaf5d..ff0a68e25a2c9f052f98b0deb139d61295ca54a7 100644 --- a/src/include/storage/sinvaladt.h +++ b/src/include/storage/sinvaladt.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/sinvaladt.h,v 1.42 2007/01/05 22:19:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/sinvaladt.h,v 1.43 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,6 +85,13 @@ typedef struct SISeg int maxBackends; /* size of procState array */ int freeBackends; /* number of empty procState slots */ + /* + * Next LocalTransactionId to use for each idle backend slot. We keep + * this here because it is indexed by BackendId and it is convenient to + * copy the value to and from local memory when MyBackendId is set. + */ + LocalTransactionId *nextLXID; /* array of maxBackends entries */ + /* * Circular buffer holding shared-inval messages */ @@ -114,4 +121,6 @@ extern int SIGetDataEntry(SISeg *segP, int backendId, SharedInvalidationMessage *data); extern void SIDelExpiredDataEntries(SISeg *segP); +extern LocalTransactionId GetNextLocalTransactionId(void); + #endif /* SINVALADT_H */ diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 3beb14febaf13bca85a620287a921167942efa55..bc071e7ef052d85c8aed73ac8c288c89800e0fd3 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.58 2007/01/17 16:25:01 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.59 2007/09/05 18:10:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -76,7 +76,8 @@ extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp); extern void smgrimmedsync(SMgrRelation reln); extern void smgrDoPendingDeletes(bool isCommit); -extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr); +extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, + bool *haveNonTemp); extern void AtSubCommit_smgr(void); extern void AtSubAbort_smgr(void); extern void PostPrepare_smgr(void); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 39e27a749241f29766b99f5e9978fab37608d2cd..ebbf8d1626210d9e9e86e5c8f2432e4900764fb7 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1282,7 +1282,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem pg_cursors | SELECT c.name, c.statement, c.is_holdable, c.is_binary, c.is_scrollable, c.creation_time FROM pg_cursor() c(name text, statement text, is_holdable boolean, is_binary boolean, is_scrollable boolean, creation_time timestamp with time zone); pg_group | SELECT pg_authid.rolname AS groname, pg_authid.oid AS grosysid, ARRAY(SELECT pg_auth_members.member FROM pg_auth_members WHERE (pg_auth_members.roleid = pg_authid.oid)) AS grolist FROM pg_authid WHERE (NOT pg_authid.rolcanlogin); pg_indexes | SELECT n.nspname AS schemaname, c.relname AS tablename, i.relname AS indexname, t.spcname AS tablespace, pg_get_indexdef(i.oid) AS indexdef FROM ((((pg_index x JOIN pg_class c ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = i.reltablespace))) WHERE ((c.relkind = 'r'::"char") AND (i.relkind = 'i'::"char")); - pg_locks | SELECT l.locktype, l.database, l.relation, l.page, l.tuple, l.transactionid, l.classid, l.objid, l.objsubid, l.transaction, l.pid, l.mode, l.granted FROM pg_lock_status() l(locktype text, database oid, relation oid, page integer, tuple smallint, transactionid xid, classid oid, objid oid, objsubid smallint, transaction xid, pid integer, mode text, granted boolean); + pg_locks | SELECT l.locktype, l.database, l.relation, l.page, l.tuple, l.virtualxid, l.transactionid, l.classid, l.objid, l.objsubid, l.virtualtransaction, l.pid, l.mode, l.granted FROM pg_lock_status() l(locktype text, database oid, relation oid, page integer, tuple smallint, virtualxid text, transactionid xid, classid oid, objid oid, objsubid smallint, virtualtransaction text, pid integer, mode text, granted boolean); pg_prepared_statements | SELECT p.name, p.statement, p.prepare_time, p.parameter_types, p.from_sql FROM pg_prepared_statement() p(name text, statement text, prepare_time timestamp with time zone, parameter_types regtype[], from_sql boolean); pg_prepared_xacts | SELECT p.transaction, p.gid, p.prepared, u.rolname AS owner, d.datname AS database FROM ((pg_prepared_xact() p(transaction xid, gid text, prepared timestamp with time zone, ownerid oid, dbid oid) LEFT JOIN pg_authid u ON ((p.ownerid = u.oid))) LEFT JOIN pg_database d ON ((p.dbid = d.oid))); pg_roles | SELECT pg_authid.rolname, pg_authid.rolsuper, pg_authid.rolinherit, pg_authid.rolcreaterole, pg_authid.rolcreatedb, pg_authid.rolcatupdate, pg_authid.rolcanlogin, pg_authid.rolconnlimit, '********'::text AS rolpassword, pg_authid.rolvaliduntil, pg_authid.rolconfig, pg_authid.oid FROM pg_authid;