From 5df307c7782518c4a3c19ffd05c7cb591b97e23c Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Tue, 6 Aug 2002 02:36:35 +0000 Subject: [PATCH] Restructure local-buffer handling per recent pghackers discussion. The local buffer manager is no longer used for newly-created relations (unless they are TEMP); a new non-TEMP relation goes through the shared bufmgr and thus will participate normally in checkpoints. But TEMP relations use the local buffer manager throughout their lifespan. Also, operations in TEMP relations are not logged in WAL, thus improving performance. Since it's no longer necessary to fsync relations as they move out of the local buffers into shared buffers, quite a lot of smgr.c/md.c/fd.c code is no longer needed and has been removed: there's no concept of a dirty relation anymore in md.c/fd.c, and we never fsync anything but WAL. Still TODO: improve local buffer management algorithms so that it would be reasonable to increase NLocBuffer. --- src/backend/access/heap/heapam.c | 34 ++- src/backend/access/heap/hio.c | 12 +- src/backend/access/heap/tuptoaster.c | 4 +- src/backend/access/nbtree/nbtinsert.c | 14 +- src/backend/access/nbtree/nbtpage.c | 7 +- src/backend/access/transam/xact.c | 180 +++++++++------- src/backend/access/transam/xlog.c | 18 +- src/backend/catalog/heap.c | 4 +- src/backend/catalog/indexing.c | 4 +- src/backend/commands/sequence.c | 20 +- src/backend/commands/vacuum.c | 41 +++- src/backend/commands/vacuumlazy.c | 10 +- src/backend/executor/execUtils.c | 4 +- src/backend/storage/buffer/buf_init.c | 4 +- src/backend/storage/buffer/bufmgr.c | 292 +++++++++----------------- src/backend/storage/buffer/localbuf.c | 129 ++++-------- src/backend/storage/file/fd.c | 140 +----------- src/backend/storage/smgr/md.c | 166 ++------------- src/backend/storage/smgr/mm.c | 35 +-- src/backend/storage/smgr/smgr.c | 119 +++-------- src/backend/utils/cache/relcache.c | 196 +++++++---------- src/include/access/xlog.h | 3 +- src/include/storage/buf_internals.h | 15 +- src/include/storage/bufmgr.h | 8 +- src/include/storage/fd.h | 4 +- src/include/storage/smgr.h | 23 +- src/include/utils/rel.h | 6 +- src/include/utils/relcache.h | 6 +- 28 files changed, 543 insertions(+), 955 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 1abb938fdf8..e9f69476283 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.143 2002/07/30 16:08:33 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.144 2002/08/06 02:36:33 tgl Exp $ * * * INTERFACE ROUTINES @@ -1155,6 +1155,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) pgstat_count_heap_insert(&relation->pgstat_info); /* XLOG stuff */ + if (!relation->rd_istemp) { xl_heap_insert xlrec; xl_heap_header xlhdr; @@ -1204,6 +1205,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } + else + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } + END_CRIT_SECTION(); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); @@ -1323,12 +1330,15 @@ l1: } START_CRIT_SECTION(); + /* store transaction information of xact deleting the tuple */ tp.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID | HEAP_MARKED_FOR_UPDATE); HeapTupleHeaderSetXmax(tp.t_data, GetCurrentTransactionId()); HeapTupleHeaderSetCmax(tp.t_data, cid); + /* XLOG stuff */ + if (!relation->rd_istemp) { xl_heap_delete xlrec; XLogRecPtr recptr; @@ -1351,12 +1361,17 @@ l1: PageSetLSN(dp, recptr); PageSetSUI(dp, ThisStartUpID); } + else + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } + END_CRIT_SECTION(); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); #ifdef TUPLE_TOASTER_ACTIVE - /* * If the relation has toastable attributes, we need to delete no * longer needed items there too. We have to do this before @@ -1659,6 +1674,7 @@ l2: oldtup.t_data->t_ctid = newtup->t_self; /* XLOG stuff */ + if (!relation->rd_istemp) { XLogRecPtr recptr = log_heap_update(relation, buffer, oldtup.t_self, newbuf, newtup, false); @@ -1671,6 +1687,11 @@ l2: PageSetLSN(BufferGetPage(buffer), recptr); PageSetSUI(BufferGetPage(buffer), ThisStartUpID); } + else + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } END_CRIT_SECTION(); @@ -1927,6 +1948,9 @@ log_heap_clean(Relation reln, Buffer buffer, char *unused, int unlen) XLogRecPtr recptr; XLogRecData rdata[3]; + /* Caller should not call me on a temp relation */ + Assert(!reln->rd_istemp); + xlrec.node = reln->rd_node; xlrec.block = BufferGetBlockNumber(buffer); rdata[0].buffer = InvalidBuffer; @@ -1978,6 +2002,9 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, Page page = BufferGetPage(newbuf); uint8 info = (move) ? XLOG_HEAP_MOVE : XLOG_HEAP_UPDATE; + /* Caller should not call me on a temp relation */ + Assert(!reln->rd_istemp); + xlrec.target.node = reln->rd_node; xlrec.target.tid = from; xlrec.newtid = newtup->t_self; @@ -2012,7 +2039,8 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, xid[0] = HeapTupleHeaderGetXmax(newtup->t_data); xid[1] = HeapTupleHeaderGetXmin(newtup->t_data); memcpy((char *) &xlhdr + hsize, - (char *) xid, 2 * sizeof(TransactionId)); + (char *) xid, + 2 * sizeof(TransactionId)); hsize += 2 * sizeof(TransactionId); } rdata[2].buffer = newbuf; diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 602ad748d9b..67eb4ad7e24 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Id: hio.c,v 1.45 2002/06/20 20:29:25 momjian Exp $ + * $Id: hio.c,v 1.46 2002/08/06 02:36:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -102,6 +102,7 @@ RelationGetBufferForTuple(Relation relation, Size len, Size pageFreeSpace; BlockNumber targetBlock, otherBlock; + bool needLock; len = MAXALIGN(len); /* be conservative */ @@ -231,9 +232,12 @@ RelationGetBufferForTuple(Relation relation, Size len, * * We have to use a lock to ensure no one else is extending the rel at * the same time, else we will both try to initialize the same new - * page. + * page. We can skip locking for new or temp relations, however, + * since no one else could be accessing them. */ - if (!relation->rd_myxactonly) + needLock = !(relation->rd_isnew || relation->rd_istemp); + + if (needLock) LockPage(relation, 0, ExclusiveLock); /* @@ -249,7 +253,7 @@ RelationGetBufferForTuple(Relation relation, Size len, * Release the file-extension lock; it's now OK for someone else to * extend the relation some more. */ - if (!relation->rd_myxactonly) + if (needLock) UnlockPage(relation, 0, ExclusiveLock); /* diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 2945cf3458c..1c09af2b308 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.33 2002/07/20 05:16:56 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.34 2002/08/06 02:36:33 tgl Exp $ * * * INTERFACE ROUTINES @@ -915,7 +915,7 @@ toast_save_datum(Relation rel, Datum value) */ idxres = index_insert(toastidx, t_values, t_nulls, &(toasttup->t_self), - toastrel, toastidx->rd_uniqueindex); + toastrel, toastidx->rd_index->indisunique); if (idxres == NULL) elog(ERROR, "Failed to insert index entry for TOAST tuple"); diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index c0190859b7b..16d63e03c99 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.94 2002/07/02 05:48:44 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.95 2002/08/06 02:36:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -623,8 +623,11 @@ _bt_insertuple(Relation rel, Buffer buf, BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page); START_CRIT_SECTION(); + _bt_pgaddtup(rel, page, itemsz, btitem, newitemoff, "page"); + /* XLOG stuff */ + if (!rel->rd_istemp) { xl_btree_insert xlrec; uint8 flag = XLOG_BTREE_INSERT; @@ -866,6 +869,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, * NO ELOG(ERROR) till right sibling is updated. */ START_CRIT_SECTION(); + + /* XLOG stuff */ + if (!rel->rd_istemp) { xl_btree_split xlrec; int flag = (newitemonleft) ? @@ -891,7 +897,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, BlockIdSet(&(xlrec.rightblk), ropaque->btpo_next); /* - * Dirrect access to page is not good but faster - we should + * Direct access to page is not good but faster - we should * implement some new func in page API. */ xlrec.leftlen = ((PageHeader) leftpage)->pd_special - @@ -1352,6 +1358,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) (metad->btm_level)++; /* XLOG stuff */ + if (!rel->rd_istemp) { xl_btree_newroot xlrec; XLogRecPtr recptr; @@ -1366,7 +1373,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) rdata[0].next = &(rdata[1]); /* - * Dirrect access to page is not good but faster - we should + * Direct access to page is not good but faster - we should * implement some new func in page API. */ rdata[1].buffer = InvalidBuffer; @@ -1388,6 +1395,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) PageSetLSN(rpage, recptr); PageSetSUI(rpage, ThisStartUpID); } + END_CRIT_SECTION(); /* write and let go of metapage buffer */ diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 386cb6a07a5..110de694066 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.57 2002/06/20 20:29:25 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.58 2002/08/06 02:36:33 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -173,6 +173,7 @@ _bt_getroot(Relation rel, int access) rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); /* XLOG stuff */ + if (!rel->rd_istemp) { xl_btree_newroot xlrec; XLogRecPtr recptr; @@ -187,7 +188,8 @@ _bt_getroot(Relation rel, int access) rdata.next = NULL; recptr = XLogInsert(RM_BTREE_ID, - XLOG_BTREE_NEWROOT | XLOG_BTREE_LEAF, &rdata); + XLOG_BTREE_NEWROOT | XLOG_BTREE_LEAF, + &rdata); PageSetLSN(rootpage, recptr); PageSetSUI(rootpage, ThisStartUpID); @@ -457,6 +459,7 @@ _bt_itemdel(Relation rel, Buffer buf, ItemPointer tid) PageIndexTupleDelete(page, offno); /* XLOG stuff */ + if (!rel->rd_istemp) { xl_btree_delete xlrec; XLogRecPtr recptr; diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 3a992f6ccfe..c9b60daef56 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.129 2002/08/02 22:36:05 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.130 2002/08/06 02:36:33 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -505,44 +505,32 @@ AtStart_Memory(void) * ---------------------------------------------------------------- */ -/* -------------------------------- +/* * RecordTransactionCommit - * - * Note: the two calls to BufferManagerFlush() exist to ensure - * that data pages are written before log pages. These - * explicit calls should be replaced by a more efficient - * ordered page write scheme in the buffer manager - * -cim 3/18/90 - * -------------------------------- */ void RecordTransactionCommit(void) { - TransactionId xid; - bool leak; - - leak = BufferPoolCheckLeak(); - - xid = GetCurrentTransactionId(); - /* - * We only need to log the commit in xlog and clog if the transaction made - * any transaction-controlled XLOG entries. (Otherwise, its XID appears - * nowhere in permanent storage, so no one will ever care if it - * committed.) However, we must flush XLOG to disk if we made any XLOG - * entries, whether in or out of transaction control. For example, if we - * reported a nextval() result to the client, this ensures that any XLOG - * record generated by nextval will hit the disk before we report the - * transaction committed. + * If we made neither any XLOG entries nor any temp-rel updates, + * we can omit recording the transaction commit at all. */ - if (MyXactMadeXLogEntry) + if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate) { + TransactionId xid = GetCurrentTransactionId(); XLogRecPtr recptr; + /* Tell bufmgr and smgr to prepare for commit */ BufmgrCommit(); START_CRIT_SECTION(); + /* + * We only need to log the commit in xlog if the transaction made any + * transaction-controlled XLOG entries. (Otherwise, its XID appears + * nowhere in permanent storage, so no one else will ever care if it + * committed.) + */ if (MyLastRecPtr.xrecoff != 0) { /* Need to emit a commit record */ @@ -567,30 +555,48 @@ RecordTransactionCommit(void) } /* - * Sleep before flush! So we can flush more than one commit - * records per single fsync. (The idea is some other backend may - * do the XLogFlush while we're sleeping. This needs work still, - * because on most Unixen, the minimum select() delay is 10msec or - * more, which is way too long.) - * - * We do not sleep if enableFsync is not turned on, nor if there are - * fewer than CommitSiblings other backends with active - * transactions. + * We must flush our XLOG entries to disk if we made any XLOG entries, + * whether in or out of transaction control. For example, if we + * reported a nextval() result to the client, this ensures that any + * XLOG record generated by nextval will hit the disk before we report + * the transaction committed. */ - if (CommitDelay > 0 && enableFsync && - CountActiveBackends() >= CommitSiblings) + if (MyXactMadeXLogEntry) { - struct timeval delay; + /* + * Sleep before flush! So we can flush more than one commit + * records per single fsync. (The idea is some other backend may + * do the XLogFlush while we're sleeping. This needs work still, + * because on most Unixen, the minimum select() delay is 10msec or + * more, which is way too long.) + * + * We do not sleep if enableFsync is not turned on, nor if there + * are fewer than CommitSiblings other backends with active + * transactions. + */ + if (CommitDelay > 0 && enableFsync && + CountActiveBackends() >= CommitSiblings) + { + struct timeval delay; - delay.tv_sec = 0; - delay.tv_usec = CommitDelay; - (void) select(0, NULL, NULL, NULL, &delay); - } + delay.tv_sec = 0; + delay.tv_usec = CommitDelay; + (void) select(0, NULL, NULL, NULL, &delay); + } - XLogFlush(recptr); + XLogFlush(recptr); + } - /* Mark the transaction committed in clog, if needed */ - if (MyLastRecPtr.xrecoff != 0) + /* + * We must mark the transaction committed in clog if its XID appears + * either in permanent rels or in local temporary rels. We test + * this by seeing if we made transaction-controlled entries *OR* + * local-rel tuple updates. Note that if we made only the latter, + * we have not emitted an XLOG record for our commit, and so in the + * event of a crash the clog update might be lost. This is okay + * because no one else will ever care whether we committed. + */ + if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) TransactionIdCommit(xid); END_CRIT_SECTION(); @@ -599,12 +605,10 @@ RecordTransactionCommit(void) /* Break the chain of back-links in the XLOG records I output */ MyLastRecPtr.xrecoff = 0; MyXactMadeXLogEntry = false; + MyXactMadeTempRelUpdate = false; /* Show myself as out of the transaction in PGPROC array */ MyProc->logRec.xrecoff = 0; - - if (leak) - ResetBufferPool(true); } @@ -615,8 +619,10 @@ RecordTransactionCommit(void) static void AtCommit_Cache(void) { - /* Check for relcache reference-count leaks */ - AtEOXactRelationCache(true); + /* + * Clean up the relation cache. + */ + AtEOXact_RelationCache(true); /* * Make catalog changes visible to all backends. */ @@ -679,45 +685,60 @@ AtCommit_Memory(void) * ---------------------------------------------------------------- */ -/* -------------------------------- +/* * RecordTransactionAbort - * -------------------------------- */ static void RecordTransactionAbort(void) { - TransactionId xid = GetCurrentTransactionId(); - /* - * We only need to log the abort in xlog and clog if the transaction made - * any transaction-controlled XLOG entries. (Otherwise, its XID appears - * nowhere in permanent storage, so no one will ever care if it - * committed.) We do not flush XLOG to disk in any case, since the - * default assumption after a crash would be that we aborted, anyway. - * - * Extra check here is to catch case that we aborted partway through - * RecordTransactionCommit ... + * If we made neither any transaction-controlled XLOG entries nor any + * temp-rel updates, we can omit recording the transaction abort at all. + * No one will ever care that it aborted. */ - if (MyLastRecPtr.xrecoff != 0 && !TransactionIdDidCommit(xid)) + if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) { - XLogRecData rdata; - xl_xact_abort xlrec; - XLogRecPtr recptr; + TransactionId xid = GetCurrentTransactionId(); - xlrec.xtime = time(NULL); - rdata.buffer = InvalidBuffer; - rdata.data = (char *) (&xlrec); - rdata.len = SizeOfXactAbort; - rdata.next = NULL; + /* + * Catch the scenario where we aborted partway through + * RecordTransactionCommit ... + */ + if (TransactionIdDidCommit(xid)) + elog(PANIC, "RecordTransactionAbort: xact %u already committed", + xid); START_CRIT_SECTION(); /* - * SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP + * We only need to log the abort in XLOG if the transaction made any + * transaction-controlled XLOG entries. (Otherwise, its XID appears + * nowhere in permanent storage, so no one else will ever care if it + * committed.) We do not flush XLOG to disk in any case, since the + * default assumption after a crash would be that we aborted, anyway. */ - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, &rdata); + if (MyLastRecPtr.xrecoff != 0) + { + XLogRecData rdata; + xl_xact_abort xlrec; + XLogRecPtr recptr; + + xlrec.xtime = time(NULL); + rdata.buffer = InvalidBuffer; + rdata.data = (char *) (&xlrec); + rdata.len = SizeOfXactAbort; + rdata.next = NULL; + + /* + * SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP + */ + recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, &rdata); + } - /* Mark the transaction aborted in clog */ + /* + * Mark the transaction aborted in clog. This is not absolutely + * necessary but we may as well do it while we are here. + */ TransactionIdAbort(xid); END_CRIT_SECTION(); @@ -726,14 +747,10 @@ RecordTransactionAbort(void) /* Break the chain of back-links in the XLOG records I output */ MyLastRecPtr.xrecoff = 0; MyXactMadeXLogEntry = false; + MyXactMadeTempRelUpdate = false; /* Show myself as out of the transaction in PGPROC array */ MyProc->logRec.xrecoff = 0; - - /* - * Tell bufmgr and smgr to release resources. - */ - ResetBufferPool(false); /* false -> is abort */ } /* -------------------------------- @@ -743,7 +760,7 @@ RecordTransactionAbort(void) static void AtAbort_Cache(void) { - AtEOXactRelationCache(false); + AtEOXact_RelationCache(false); AtEOXactInvalidationMessages(false); } @@ -975,7 +992,6 @@ CommitTransaction(void) * noncritical resource releasing. */ - RelationPurgeLocalRelation(true); smgrDoPendingDeletes(true); AtEOXact_GUC(true); @@ -989,6 +1005,8 @@ CommitTransaction(void) AtCommit_Locks(); AtEOXact_CatCache(true); AtCommit_Memory(); + AtEOXact_Buffers(true); + smgrabort(); AtEOXact_Files(); /* Count transaction commit in statistics collector */ @@ -1076,7 +1094,6 @@ AbortTransaction(void) LWLockRelease(SInvalLock); } - RelationPurgeLocalRelation(false); smgrDoPendingDeletes(false); AtEOXact_GUC(false); @@ -1089,6 +1106,7 @@ AbortTransaction(void) AtAbort_Cache(); AtEOXact_CatCache(false); AtAbort_Memory(); + AtEOXact_Buffers(false); AtEOXact_Files(); AtAbort_Locks(); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 872722b856c..fbe61e5691c 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.100 2002/08/05 01:24:13 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.101 2002/08/06 02:36:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -136,11 +136,20 @@ bool InRecovery = false; * to be set true. The latter can be used to test whether the current xact * made any loggable changes (including out-of-xact changes, such as * sequence updates). + * + * When we insert/update/delete a tuple in a temporary relation, we do not + * make any XLOG record, since we don't care about recovering the state of + * the temp rel after a crash. However, we will still need to remember + * whether our transaction committed or aborted in that case. So, we must + * set MyXactMadeTempRelUpdate true to indicate that the XID will be of + * interest later. */ XLogRecPtr MyLastRecPtr = {0, 0}; bool MyXactMadeXLogEntry = false; +bool MyXactMadeTempRelUpdate = false; + /* * ProcLastRecPtr points to the start of the last XLOG record inserted by the * current backend. It is updated for all inserts, transaction-controlled @@ -2923,6 +2932,7 @@ ShutdownXLOG(void) /* suppress in-transaction check in CreateCheckPoint */ MyLastRecPtr.xrecoff = 0; MyXactMadeXLogEntry = false; + MyXactMadeTempRelUpdate = false; CritSectionCount++; CreateDummyCaches(); @@ -3084,12 +3094,10 @@ CreateCheckPoint(bool shutdown) /* * Having constructed the checkpoint record, ensure all shmem disk - * buffers are flushed to disk. + * buffers and commit-log buffers are flushed to disk. */ - FlushBufferPool(); - - /* And commit-log buffers, too */ CheckPointCLOG(); + FlushBufferPool(); /* * Now insert the checkpoint record into XLOG. diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index b61ad732127..6bf905c6ea2 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.218 2002/08/05 03:29:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.219 2002/08/06 02:36:33 tgl Exp $ * * * INTERFACE ROUTINES @@ -1919,7 +1919,7 @@ heap_truncate(Oid rid) * a rel created in the current xact (which would be deleted on abort, * anyway). */ - if (IsTransactionBlock() && !rel->rd_myxactonly) + if (IsTransactionBlock() && !rel->rd_isnew) elog(ERROR, "TRUNCATE TABLE cannot run inside a transaction block"); /* diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index 7f558b4d9de..4206c33edb3 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.100 2002/08/05 03:29:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.101 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -121,7 +121,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple) nullv, /* info on nulls */ &(heapTuple->t_self), /* tid of heap tuple */ heapRelation, - relationDescs[i]->rd_uniqueindex); + relationDescs[i]->rd_index->indisunique); if (result) pfree(result); diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index f8a05b619de..a33fcd24a40 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/sequence.c,v 1.83 2002/07/16 22:12:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/sequence.c,v 1.84 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -237,6 +237,7 @@ DefineSequence(CreateSeqStmt *seq) * means two log records instead of one :-( */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + START_CRIT_SECTION(); { @@ -260,6 +261,8 @@ DefineSequence(CreateSeqStmt *seq) tuple->t_data->t_infomask |= HEAP_XMIN_COMMITTED; } + /* XLOG stuff */ + if (!rel->rd_istemp) { xl_seq_rec xlrec; XLogRecPtr recptr; @@ -287,6 +290,7 @@ DefineSequence(CreateSeqStmt *seq) PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } + END_CRIT_SECTION(); LockBuffer(buf, BUFFER_LOCK_UNLOCK); @@ -437,7 +441,9 @@ nextval(PG_FUNCTION_ARGS) elm->cached = last; /* last fetched number */ START_CRIT_SECTION(); - if (logit) + + /* XLOG stuff */ + if (logit && !seqrel->rd_istemp) { xl_seq_rec xlrec; XLogRecPtr recptr; @@ -449,9 +455,11 @@ nextval(PG_FUNCTION_ARGS) rdata[0].len = sizeof(xl_seq_rec); rdata[0].next = &(rdata[1]); + /* set values that will be saved in xlog */ seq->last_value = next; seq->is_called = true; seq->log_cnt = 0; + rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper; rdata[1].len = ((PageHeader) page)->pd_special - @@ -468,6 +476,7 @@ nextval(PG_FUNCTION_ARGS) seq->last_value = last; /* last fetched number */ seq->is_called = true; seq->log_cnt = log; /* how much is logged */ + END_CRIT_SECTION(); LockBuffer(buf, BUFFER_LOCK_UNLOCK); @@ -550,6 +559,9 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled) * values) */ START_CRIT_SECTION(); + + /* XLOG stuff */ + if (!seqrel->rd_istemp) { xl_seq_rec xlrec; XLogRecPtr recptr; @@ -562,9 +574,11 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled) rdata[0].len = sizeof(xl_seq_rec); rdata[0].next = &(rdata[1]); + /* set values that will be saved in xlog */ seq->last_value = next; seq->is_called = true; seq->log_cnt = 0; + rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper; rdata[1].len = ((PageHeader) page)->pd_special - @@ -576,10 +590,12 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled) PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } + /* save info in sequence relation */ seq->last_value = next; /* last fetched number */ seq->is_called = iscalled; seq->log_cnt = (iscalled) ? 0 : 1; + END_CRIT_SECTION(); LockBuffer(buf, BUFFER_LOCK_UNLOCK); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index c893ea86a09..8d2cd4da58c 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.232 2002/07/20 05:16:57 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.233 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1899,6 +1899,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid); ItemPointerSet(&(newtup.t_self), destvacpage->blkno, newoff); + /* XLOG stuff */ + if (!onerel->rd_istemp) { XLogRecPtr recptr = log_heap_move(onerel, Cbuf, tuple.t_self, @@ -1912,6 +1914,12 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, PageSetLSN(ToPage, recptr); PageSetSUI(ToPage, ThisStartUpID); } + else + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } + END_CRIT_SECTION(); if (destvacpage->blkno > last_move_dest_block) @@ -2042,6 +2050,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, tuple.t_data->t_infomask |= HEAP_MOVED_OFF; HeapTupleHeaderSetXvac(tuple.t_data, myXID); + /* XLOG stuff */ + if (!onerel->rd_istemp) { XLogRecPtr recptr = log_heap_move(onerel, buf, tuple.t_self, @@ -2052,6 +2062,12 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, PageSetLSN(ToPage, recptr); PageSetSUI(ToPage, ThisStartUpID); } + else + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } + END_CRIT_SECTION(); cur_page->offsets_used++; @@ -2321,8 +2337,13 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, } Assert(vacpage->offsets_free == num_tuples); + START_CRIT_SECTION(); + uncnt = PageRepairFragmentation(page, unused); + + /* XLOG stuff */ + if (!onerel->rd_istemp) { XLogRecPtr recptr; @@ -2331,7 +2352,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } + else + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } + END_CRIT_SECTION(); + LockBuffer(buf, BUFFER_LOCK_UNLOCK); WriteBuffer(buf); } @@ -2450,12 +2478,17 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage) Assert(vacpage->offsets_used == 0); START_CRIT_SECTION(); + for (i = 0; i < vacpage->offsets_free; i++) { itemid = PageGetItemId(page, vacpage->offsets[i]); itemid->lp_flags &= ~LP_USED; } + uncnt = PageRepairFragmentation(page, unused); + + /* XLOG stuff */ + if (!onerel->rd_istemp) { XLogRecPtr recptr; @@ -2464,6 +2497,12 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage) PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } + else + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } + END_CRIT_SECTION(); } diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index bbf9e39ae80..4fb613cc67e 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.17 2002/07/20 05:16:57 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.18 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -523,6 +523,8 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, uncnt = PageRepairFragmentation(page, unused); + /* XLOG stuff */ + if (!onerel->rd_istemp) { XLogRecPtr recptr; @@ -531,6 +533,12 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } + else + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } + END_CRIT_SECTION(); return tupindex; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index c2bd2de48ab..24f232469b0 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.87 2002/07/20 05:16:58 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.88 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -691,7 +691,7 @@ ExecInsertIndexTuples(TupleTableSlot *slot, nullv, /* info on nulls */ &(heapTuple->t_self), /* tid of heap tuple */ heapRelation, - relationDescs[i]->rd_uniqueindex && !is_vacuum); + relationDescs[i]->rd_index->indisunique && !is_vacuum); /* * keep track of index inserts for debugging diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 6132b732f86..a8c56562f2d 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.49 2002/06/20 20:29:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.50 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -258,7 +258,7 @@ ShutdownBufferPoolAccess(void) /* Release any buffer context locks we are holding */ UnlockBuffers(); /* Release any buffer reference counts we are holding */ - ResetBufferPool(false); + AtEOXact_Buffers(false); } /* ----------------------------------------------------- diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index b2c19e99f47..1ca7af3b775 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.127 2002/07/02 05:47:37 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.128 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,16 +57,9 @@ #include "pgstat.h" #define BufferGetLSN(bufHdr) \ - (*((XLogRecPtr*)MAKE_PTR((bufHdr)->data))) + (*((XLogRecPtr*) MAKE_PTR((bufHdr)->data))) -extern long int ReadBufferCount; -extern long int ReadLocalBufferCount; -extern long int BufferHitCount; -extern long int LocalBufferHitCount; -extern long int BufferFlushCount; -extern long int LocalBufferFlushCount; - static void WaitIO(BufferDesc *buf); static void StartBufferIO(BufferDesc *buf, bool forInput); static void TerminateBufferIO(BufferDesc *buf); @@ -82,16 +75,12 @@ static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum, bool bufferLockHeld); static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); -static int ReleaseBufferWithBufferLock(Buffer buffer); static int BufferReplace(BufferDesc *bufHdr); #ifdef NOT_USED void PrintBufferDescs(void); #endif static void write_buffer(Buffer buffer, bool unpin); -static void drop_relfilenode_buffers(RelFileNode rnode, - bool do_local, bool do_both); -static int release_buffer(Buffer buffer, bool havelock); /* * ReadBuffer -- returns a buffer containing the requested @@ -140,7 +129,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, bool isLocalBuf; isExtend = (blockNum == P_NEW); - isLocalBuf = reln->rd_myxactonly; + isLocalBuf = reln->rd_istemp; if (isLocalBuf) { @@ -684,10 +673,10 @@ ReleaseAndReadBuffer(Buffer buffer, /* * BufferSync -- Write all dirty buffers in the pool. * - * This is called at checkpoint time and write out all dirty buffers. + * This is called at checkpoint time and writes out all dirty shared buffers. */ void -BufferSync() +BufferSync(void) { int i; BufferDesc *bufHdr; @@ -780,8 +769,7 @@ BufferSync() status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data), - true); /* must fsync */ + (char *) MAKE_PTR(bufHdr->data)); } else { @@ -908,19 +896,16 @@ ResetBufferUsage(void) NDirectFileWrite = 0; } -/* ---------------------------------------------- - * ResetBufferPool - * - * This routine is supposed to be called when a transaction aborts. - * It will release all the buffer pins held by the transaction. - * Currently, we also call it during commit if BufferPoolCheckLeak - * detected a problem --- in that case, isCommit is TRUE, and we - * only clean up buffer pin counts. +/* + * AtEOXact_Buffers - clean up at end of transaction. * - * ---------------------------------------------- + * During abort, we need to release any buffer pins we're holding + * (this cleans up in case elog interrupted a routine that pins a + * buffer). During commit, we shouldn't need to do that, but check + * anyway to see if anyone leaked a buffer reference count. */ void -ResetBufferPool(bool isCommit) +AtEOXact_Buffers(bool isCommit) { int i; @@ -928,7 +913,16 @@ ResetBufferPool(bool isCommit) { if (PrivateRefCount[i] != 0) { - BufferDesc *buf = &BufferDescriptors[i]; + BufferDesc *buf = &(BufferDescriptors[i]); + + if (isCommit) + elog(WARNING, + "Buffer Leak: [%03d] (freeNext=%d, freePrev=%d, " + "rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)", + i, buf->freeNext, buf->freePrev, + buf->tag.rnode.tblNode, buf->tag.rnode.relNode, + buf->tag.blockNum, buf->flags, + buf->refcount, PrivateRefCount[i]); PrivateRefCount[i] = 1; /* make sure we release shared pin */ LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); @@ -938,48 +932,15 @@ ResetBufferPool(bool isCommit) } } - ResetLocalBufferPool(); - - if (!isCommit) - smgrabort(); + AtEOXact_LocalBuffers(isCommit); } /* - * BufferPoolCheckLeak - * - * check if there is buffer leak - */ -bool -BufferPoolCheckLeak(void) -{ - int i; - bool result = false; - - for (i = 0; i < NBuffers; i++) - { - if (PrivateRefCount[i] != 0) - { - BufferDesc *buf = &(BufferDescriptors[i]); - - elog(WARNING, - "Buffer Leak: [%03d] (freeNext=%d, freePrev=%d, \ -rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)", - i, buf->freeNext, buf->freePrev, - buf->tag.rnode.tblNode, buf->tag.rnode.relNode, - buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); - result = true; - } - } - return result; -} - -/* ------------------------------------------------ * FlushBufferPool * - * Flush all dirty blocks in buffer pool to disk - * at the checkpoint time - * ------------------------------------------------ + * Flush all dirty blocks in buffer pool to disk at the checkpoint time. + * Local relations do not participate in checkpoints, so they don't need to be + * flushed. */ void FlushBufferPool(void) @@ -989,16 +950,13 @@ FlushBufferPool(void) } /* - * At the commit time we have to flush local buffer pool only + * Do whatever is needed to prepare for commit at the bufmgr and smgr levels */ void BufmgrCommit(void) { - LocalBufferSync(); + /* Nothing to do in bufmgr anymore... */ - /* - * All files created in current transaction will be fsync-ed - */ smgrcommit(); } @@ -1051,15 +1009,15 @@ BufferReplace(BufferDesc *bufHdr) if (reln != (Relation) NULL) { - status = smgrwrite(DEFAULT_SMGR, reln, bufHdr->tag.blockNum, + status = smgrwrite(DEFAULT_SMGR, reln, + bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data)); } else { status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data), - false); /* no fsync */ + (char *) MAKE_PTR(bufHdr->data)); } /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ @@ -1091,31 +1049,55 @@ RelationGetNumberOfBlocks(Relation relation) { /* * relation->rd_nblocks should be accurate already if the relation is - * myxactonly. (XXX how safe is that really?) Don't call smgr on a - * view, either. + * new or temp, because no one else should be modifying it. Otherwise + * we need to ask the smgr for the current physical file length. + * + * Don't call smgr on a view, either. */ if (relation->rd_rel->relkind == RELKIND_VIEW) relation->rd_nblocks = 0; - else if (!relation->rd_myxactonly) + else if (!relation->rd_isnew && !relation->rd_istemp) relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation); return relation->rd_nblocks; } -/* - * drop_relfilenode_buffers -- common functionality for - * DropRelationBuffers and - * DropRelFileNodeBuffers +/* --------------------------------------------------------------------- + * DropRelationBuffers * - * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. + * This function removes all the buffered pages for a relation + * from the buffer pool. Dirty pages are simply dropped, without + * bothering to write them out first. This is NOT rollback-able, + * and so should be used only with extreme caution! + * + * We assume that the caller holds an exclusive lock on the relation, + * which should assure that no new buffers will be acquired for the rel + * meanwhile. + * -------------------------------------------------------------------- */ -static void -drop_relfilenode_buffers(RelFileNode rnode, bool do_local, bool do_both) +void +DropRelationBuffers(Relation rel) +{ + DropRelFileNodeBuffers(rel->rd_node, rel->rd_istemp); +} + +/* --------------------------------------------------------------------- + * DropRelFileNodeBuffers + * + * This is the same as DropRelationBuffers, except that the target + * relation is specified by RelFileNode and temp status. + * + * This is NOT rollback-able. One legitimate use is to clear the + * buffer cache of buffers for a relation that is being deleted + * during transaction abort. + * -------------------------------------------------------------------- + */ +void +DropRelFileNodeBuffers(RelFileNode rnode, bool istemp) { int i; BufferDesc *bufHdr; - if (do_local) + if (istemp) { for (i = 0; i < NLocBuffer; i++) { @@ -1128,8 +1110,7 @@ drop_relfilenode_buffers(RelFileNode rnode, bool do_local, bool do_both) bufHdr->tag.rnode.relNode = InvalidOid; } } - if (!do_both) - return; + return; } LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); @@ -1160,18 +1141,19 @@ recheck: bufHdr->cntxDirty = false; /* - * Release any refcount we may have. - * - * This is very probably dead code, and if it isn't then it's - * probably wrong. I added the Assert to find out --- tgl - * 11/99. + * Release any refcount we may have. If someone else has a + * pin on the buffer, we got trouble. */ if (!(bufHdr->flags & BM_FREE)) { - /* Assert checks that buffer will actually get freed! */ - Assert(PrivateRefCount[i - 1] == 1 && - bufHdr->refcount == 1); - ReleaseBufferWithBufferLock(i); + /* the sole pin should be ours */ + if (bufHdr->refcount != 1 || PrivateRefCount[i - 1] == 0) + elog(FATAL, "DropRelFileNodeBuffers: block %u is referenced (private %ld, global %d)", + bufHdr->tag.blockNum, + PrivateRefCount[i - 1], bufHdr->refcount); + /* Make sure it will be released */ + PrivateRefCount[i - 1] = 1; + UnpinBuffer(bufHdr); } /* @@ -1184,43 +1166,6 @@ recheck: LWLockRelease(BufMgrLock); } -/* --------------------------------------------------------------------- - * DropRelationBuffers - * - * This function removes all the buffered pages for a relation - * from the buffer pool. Dirty pages are simply dropped, without - * bothering to write them out first. This is NOT rollback-able, - * and so should be used only with extreme caution! - * - * We assume that the caller holds an exclusive lock on the relation, - * which should assure that no new buffers will be acquired for the rel - * meanwhile. - * -------------------------------------------------------------------- - */ -void -DropRelationBuffers(Relation rel) -{ - drop_relfilenode_buffers(rel->rd_node, rel->rd_myxactonly, false); -} - -/* --------------------------------------------------------------------- - * DropRelFileNodeBuffers - * - * This is the same as DropRelationBuffers, except that the target - * relation is specified by RelFileNode. - * - * This is NOT rollback-able. One legitimate use is to clear the - * buffer cache of buffers for a relation that is being deleted - * during transaction abort. - * -------------------------------------------------------------------- - */ -void -DropRelFileNodeBuffers(RelFileNode rnode) -{ - /* We have to search both local and shared buffers... */ - drop_relfilenode_buffers(rnode, true, true); -} - /* --------------------------------------------------------------------- * DropBuffers * @@ -1296,7 +1241,7 @@ recheck: */ #ifdef NOT_USED void -PrintBufferDescs() +PrintBufferDescs(void) { int i; BufferDesc *buf = BufferDescriptors; @@ -1331,7 +1276,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)", #ifdef NOT_USED void -PrintPinnedBufs() +PrintPinnedBufs(void) { int i; BufferDesc *buf = BufferDescriptors; @@ -1351,33 +1296,6 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)", } #endif -/* - * BufferPoolBlowaway - * - * this routine is solely for the purpose of experiments -- sometimes - * you may want to blowaway whatever is left from the past in buffer - * pool and start measuring some performance with a clean empty buffer - * pool. - */ -#ifdef NOT_USED -void -BufferPoolBlowaway() -{ - int i; - - BufferSync(); - for (i = 1; i <= NBuffers; i++) - { - if (BufferIsValid(i)) - { - while (BufferIsValid(i)) - ReleaseBuffer(i); - } - BufTableDelete(&BufferDescriptors[i - 1]); - } -} -#endif - /* --------------------------------------------------------------------- * FlushRelationBuffers * @@ -1428,7 +1346,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) XLogRecPtr recptr; int status; - if (rel->rd_myxactonly) + if (rel->rd_istemp) { for (i = 0; i < NLocBuffer; i++) { @@ -1544,12 +1462,14 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) return 0; } +#undef ReleaseBuffer + /* - * release_buffer -- common functionality for - * ReleaseBuffer and ReleaseBufferWithBufferLock + * ReleaseBuffer -- remove the pin on a buffer without + * marking it dirty. */ -static int -release_buffer(Buffer buffer, bool havelock) +int +ReleaseBuffer(Buffer buffer) { BufferDesc *bufHdr; @@ -1570,41 +1490,14 @@ release_buffer(Buffer buffer, bool havelock) PrivateRefCount[buffer - 1]--; else { - if (!havelock) - LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); - + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); UnpinBuffer(bufHdr); - - if (!havelock) - LWLockRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } return STATUS_OK; } -#undef ReleaseBuffer - -/* - * ReleaseBuffer -- remove the pin on a buffer without - * marking it dirty. - */ -int -ReleaseBuffer(Buffer buffer) -{ - return release_buffer(buffer, false); -} - -/* - * ReleaseBufferWithBufferLock - * Same as ReleaseBuffer except we hold the bufmgr lock - */ -static int -ReleaseBufferWithBufferLock(Buffer buffer) -{ - return release_buffer(buffer, true); -} - - #ifdef NOT_USED void IncrBufferRefCount_Debug(char *file, int line, Buffer buffer) @@ -1847,10 +1740,13 @@ SetBufferCommitInfoNeedsSave(Buffer buffer) BufferDesc *bufHdr; if (BufferIsLocal(buffer)) + { + WriteLocalBuffer(buffer, false); return; + } if (BAD_BUFFER_ID(buffer)) - return; + elog(ERROR, "SetBufferCommitInfoNeedsSave: bad buffer %d", buffer); bufHdr = &BufferDescriptors[buffer - 1]; diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index d5edc570b6e..50168c8b306 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -1,48 +1,37 @@ /*------------------------------------------------------------------------- * * localbuf.c - * local buffer manager. Fast buffer manager for temporary tables - * or special cases when the operation is not visible to other backends. - * - * When a relation is being created, the descriptor will have rd_islocal - * set to indicate that the local buffer manager should be used. During - * the same transaction the relation is being created, any inserts or - * selects from the newly created relation will use the local buffer - * pool. rd_islocal is reset at the end of a transaction (commit/abort). - * This is useful for queries like SELECT INTO TABLE and create index. + * local buffer manager. Fast buffer manager for temporary tables, + * which never need to be WAL-logged or checkpointed, etc. * * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994-5, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.44 2002/06/20 20:29:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.45 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include <sys/types.h> -#include <sys/file.h> -#include <math.h> -#include <signal.h> - -#include "executor/execdebug.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "storage/smgr.h" #include "utils/relcache.h" -extern long int LocalBufferFlushCount; +/*#define LBDEBUG*/ + +/* should be a GUC parameter some day */ int NLocBuffer = 64; + BufferDesc *LocalBufferDescriptors = NULL; Block *LocalBufferBlockPointers = NULL; long *LocalRefCount = NULL; static int nextFreeLocalBuf = 0; -/*#define LBDEBUG*/ /* * LocalBufferAlloc - @@ -61,11 +50,11 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) reln->rd_node.relNode && LocalBufferDescriptors[i].tag.blockNum == blockNum) { - #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d) %d\n", RelationGetRelid(reln), blockNum, -i - 1); #endif + LocalRefCount[i]++; *foundPtr = TRUE; return &LocalBufferDescriptors[i]; @@ -94,14 +83,17 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) elog(ERROR, "no empty local buffer."); /* - * this buffer is not referenced but it might still be dirty (the last - * transaction to touch it doesn't need its contents but has not - * flushed it). if that's the case, write it out before reusing it! + * this buffer is not referenced but it might still be dirty. + * if that's the case, write it out before reusing it! */ if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { Relation bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode); + /* + * The relcache is not supposed to throw away temp rels, so this + * should always succeed. + */ Assert(bufrel != NULL); /* flush this page */ @@ -113,26 +105,19 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) RelationDecrementReferenceCount(bufrel); } - /* - * it's all ours now. - * - * We need not in tblNode currently but will in future I think, when - * we'll give up rel->rd_fd to fmgr cache. - */ - bufHdr->tag.rnode = reln->rd_node; - bufHdr->tag.blockNum = blockNum; - bufHdr->flags &= ~BM_DIRTY; - bufHdr->cntxDirty = false; - /* * lazy memory allocation: allocate space on first use of a buffer. + * + * Note this path cannot be taken for a buffer that was previously + * in use, so it's okay to do it (and possibly error out) before + * marking the buffer as valid. */ if (bufHdr->data == (SHMEM_OFFSET) 0) { char *data = (char *) malloc(BLCKSZ); if (data == NULL) - elog(FATAL, "Out of memory in LocalBufferAlloc"); + elog(ERROR, "Out of memory in LocalBufferAlloc"); /* * This is a bit of a hack: bufHdr->data needs to be a shmem @@ -147,13 +132,24 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) LocalBufferBlockPointers[-(bufHdr->buf_id + 2)] = (Block) data; } + /* + * it's all ours now. + * + * We need not in tblNode currently but will in future I think, when + * we'll give up rel->rd_fd to fmgr cache. + */ + bufHdr->tag.rnode = reln->rd_node; + bufHdr->tag.blockNum = blockNum; + bufHdr->flags &= ~BM_DIRTY; + bufHdr->cntxDirty = false; + *foundPtr = FALSE; return bufHdr; } /* * WriteLocalBuffer - - * writes out a local buffer + * writes out a local buffer (actually, just marks it dirty) */ void WriteLocalBuffer(Buffer buffer, bool release) @@ -180,7 +176,7 @@ WriteLocalBuffer(Buffer buffer, bool release) * InitLocalBuffer - * init the local buffer cache. Since most queries (esp. multi-user ones) * don't involve local buffers, we delay allocating actual memory for the - * buffer until we need it. + * buffers until we need them; just make the buffer headers here. */ void InitLocalBuffer(void) @@ -211,65 +207,30 @@ InitLocalBuffer(void) } /* - * LocalBufferSync - * - * Flush all dirty buffers in the local buffer cache at commit time. - * Since the buffer cache is only used for keeping relations visible - * during a transaction, we will not need these buffers again. + * AtEOXact_LocalBuffers - clean up at end of transaction. * - * Note that we have to *flush* local buffers because of them are not - * visible to checkpoint makers. But we can skip XLOG flush check. + * This is just like AtEOXact_Buffers, but for local buffers. */ void -LocalBufferSync(void) +AtEOXact_LocalBuffers(bool isCommit) { int i; for (i = 0; i < NLocBuffer; i++) { - BufferDesc *buf = &LocalBufferDescriptors[i]; - Relation bufrel; - - if (buf->flags & BM_DIRTY || buf->cntxDirty) + if (LocalRefCount[i] != 0) { -#ifdef LBDEBUG - fprintf(stderr, "LB SYNC %d\n", -i - 1); -#endif - bufrel = RelationNodeCacheGetRelation(buf->tag.rnode); - - Assert(bufrel != NULL); + BufferDesc *buf = &(LocalBufferDescriptors[i]); - smgrwrite(DEFAULT_SMGR, bufrel, buf->tag.blockNum, - (char *) MAKE_PTR(buf->data)); - smgrmarkdirty(DEFAULT_SMGR, bufrel, buf->tag.blockNum); - LocalBufferFlushCount++; + if (isCommit) + elog(WARNING, + "Local Buffer Leak: [%03d] (rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)", + i, + buf->tag.rnode.tblNode, buf->tag.rnode.relNode, + buf->tag.blockNum, buf->flags, + buf->refcount, LocalRefCount[i]); - /* drop relcache refcount from RelationNodeCacheGetRelation */ - RelationDecrementReferenceCount(bufrel); - - buf->flags &= ~BM_DIRTY; - buf->cntxDirty = false; + LocalRefCount[i] = 0; } } - - MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer); - nextFreeLocalBuf = 0; -} - -void -ResetLocalBufferPool(void) -{ - int i; - - for (i = 0; i < NLocBuffer; i++) - { - BufferDesc *buf = &LocalBufferDescriptors[i]; - - buf->tag.rnode.relNode = InvalidOid; - buf->flags &= ~BM_DIRTY; - buf->cntxDirty = false; - } - - MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer); - nextFreeLocalBuf = 0; } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 391a078e602..8be2ed219b9 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.92 2002/06/20 20:29:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.93 2002/08/06 02:36:34 tgl Exp $ * * NOTES: * @@ -119,8 +119,7 @@ typedef struct vfd unsigned short fdstate; /* bitflags for VFD's state */ /* these are the assigned bits in fdstate: */ -#define FD_DIRTY (1 << 0) /* written to, but not yet fsync'd */ -#define FD_TEMPORARY (1 << 1) /* should be unlinked when closed */ +#define FD_TEMPORARY (1 << 0) /* should be unlinked when closed */ File nextFree; /* link to next free VFD, if in freelist */ File lruMoreRecently; /* doubly linked recency-of-use list */ @@ -396,15 +395,6 @@ LruDelete(File file) vfdP->seekPos = (long) lseek(vfdP->fd, 0L, SEEK_CUR); Assert(vfdP->seekPos != -1L); - /* if we have written to the file, sync it before closing */ - if (vfdP->fdstate & FD_DIRTY) - { - if (pg_fsync(vfdP->fd)) - elog(LOG, "LruDelete: failed to fsync %s: %m", - vfdP->fileName); - vfdP->fdstate &= ~FD_DIRTY; - } - /* close the file */ if (close(vfdP->fd)) elog(LOG, "LruDelete: failed to close %s: %m", @@ -725,17 +715,8 @@ fileNameOpenFile(FileName fileName, /* Saved flags are adjusted to be OK for re-opening file */ vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL); vfdP->fileMode = fileMode; - vfdP->seekPos = 0; - - /* - * Have to fsync file on commit. Alternative way - log file creation - * and fsync log before actual file creation. - */ - if (fileFlags & O_CREAT) - vfdP->fdstate = FD_DIRTY; - else - vfdP->fdstate = 0x0; + vfdP->fdstate = 0x0; return file; } @@ -841,15 +822,6 @@ FileClose(File file) /* remove the file from the lru ring */ Delete(file); - /* if we did any writes, sync the file before closing */ - if (vfdP->fdstate & FD_DIRTY) - { - if (pg_fsync(vfdP->fd)) - elog(LOG, "FileClose: failed to fsync %s: %m", - vfdP->fileName); - vfdP->fdstate &= ~FD_DIRTY; - } - /* close the file */ if (close(vfdP->fd)) elog(LOG, "FileClose: failed to close %s: %m", @@ -1022,108 +994,11 @@ FileTruncate(File file, long offset) DO_DB(elog(LOG, "FileTruncate %d (%s)", file, VfdCache[file].fileName)); - FileSync(file); FileAccess(file); returnCode = ftruncate(VfdCache[file].fd, (size_t) offset); return returnCode; } -/* - * FileSync --- if a file is marked as dirty, fsync it. - * - * The FD_DIRTY bit is slightly misnamed: it doesn't mean that we need to - * write the file, but that we *have* written it and need to execute an - * fsync() to ensure the changes are down on disk before we mark the current - * transaction committed. - * - * FD_DIRTY is set by FileWrite or by an explicit FileMarkDirty() call. - * It is cleared after successfully fsync'ing the file. FileClose() will - * fsync a dirty File that is about to be closed, since there will be no - * other place to remember the need to fsync after the VFD is gone. - * - * Note that the DIRTY bit is logically associated with the actual disk file, - * not with any particular kernel FD we might have open for it. We assume - * that fsync will force out any dirty buffers for that file, whether or not - * they were written through the FD being used for the fsync call --- they - * might even have been written by some other backend! - * - * Note also that LruDelete currently fsyncs a dirty file that it is about - * to close the kernel file descriptor for. The idea there is to avoid - * having to re-open the kernel descriptor later. But it's not real clear - * that this is a performance win; we could end up fsyncing the same file - * multiple times in a transaction, which would probably cost more time - * than is saved by avoiding an open() call. This should be studied. - * - * This routine used to think it could skip the fsync if the file is - * physically closed, but that is now WRONG; see comments for FileMarkDirty. - */ -int -FileSync(File file) -{ - int returnCode; - - Assert(FileIsValid(file)); - - if (!(VfdCache[file].fdstate & FD_DIRTY)) - { - /* Need not sync if file is not dirty. */ - returnCode = 0; - } - else if (!enableFsync) - { - /* Don't force the file open if pg_fsync isn't gonna sync it. */ - returnCode = 0; - VfdCache[file].fdstate &= ~FD_DIRTY; - } - else - { - /* - * We don't use FileAccess() because we don't want to force the - * file to the front of the LRU ring; we aren't expecting to - * access it again soon. - */ - if (FileIsNotOpen(file)) - { - returnCode = LruInsert(file); - if (returnCode != 0) - return returnCode; - } - returnCode = pg_fsync(VfdCache[file].fd); - if (returnCode == 0) - VfdCache[file].fdstate &= ~FD_DIRTY; - } - - return returnCode; -} - -/* - * FileMarkDirty --- mark a file as needing fsync at transaction commit. - * - * Since FileWrite marks the file dirty, this routine is not needed in - * normal use. It is called when the buffer manager detects that some other - * backend has written out a shared buffer that this backend dirtied (but - * didn't write) in the current xact. In that scenario, we need to fsync - * the file before we can commit. We cannot assume that the other backend - * has fsync'd the file yet; we need to do our own fsync to ensure that - * (a) the disk page is written and (b) this backend's commit is delayed - * until the write is complete. - * - * Note we are assuming that an fsync issued by this backend will write - * kernel disk buffers that were dirtied by another backend. Furthermore, - * it doesn't matter whether we currently have the file physically open; - * we must fsync even if we have to re-open the file to do it. - */ -void -FileMarkDirty(File file) -{ - Assert(FileIsValid(file)); - - DO_DB(elog(LOG, "FileMarkDirty: %d (%s)", - file, VfdCache[file].fileName)); - - VfdCache[file].fdstate |= FD_DIRTY; -} - /* * Routines that want to use stdio (ie, FILE*) should use AllocateFile @@ -1142,7 +1017,6 @@ FileMarkDirty(File file) * * Ideally this should be the *only* direct call of fopen() in the backend. */ - FILE * AllocateFile(char *name, char *mode) { @@ -1229,12 +1103,6 @@ closeAllVfds(void) * exit (it doesn't particularly care which). All still-open temporary-file * VFDs are closed, which also causes the underlying files to be deleted. * Furthermore, all "allocated" stdio files are closed. - * - * This routine is not involved in fsync'ing non-temporary files at xact - * commit; that is done by FileSync under control of the buffer manager. - * During a commit, that is done *before* control gets here. If we still - * have any needs-fsync bits set when we get here, we assume this is abort - * and clear them. */ void AtEOXact_Files(void) @@ -1249,8 +1117,6 @@ AtEOXact_Files(void) if ((VfdCache[i].fdstate & FD_TEMPORARY) && VfdCache[i].fileName != NULL) FileClose(i); - else - VfdCache[i].fdstate &= ~FD_DIRTY; } } diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 978d85d4868..25051a9799c 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.91 2002/06/20 20:29:35 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.92 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -381,16 +381,7 @@ mdclose_fd(int fd) /* if not closed already */ if (v->mdfd_vfd >= 0) - { - /* - * We sync the file descriptor so that we don't need to reopen - * it at transaction commit to force changes to disk. (This - * is not really optional, because we are about to forget that - * the file even exists...) - */ - FileSync(v->mdfd_vfd); FileClose(v->mdfd_vfd); - } /* Now free vector */ v = v->mdfd_chain; if (ov != &Md_fdvec[fd]) @@ -403,16 +394,7 @@ mdclose_fd(int fd) if (v != (MdfdVec *) NULL) { if (v->mdfd_vfd >= 0) - { - /* - * We sync the file descriptor so that we don't need to reopen - * it at transaction commit to force changes to disk. (This - * is not really optional, because we are about to forget that - * the file even exists...) - */ - FileSync(v->mdfd_vfd); FileClose(v->mdfd_vfd); - } } #endif @@ -497,56 +479,16 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer) return SM_SUCCESS; } -/* - * mdflush() -- Synchronously write a block to disk. - * - * This is exactly like mdwrite(), but doesn't return until the file - * system buffer cache has been flushed. - */ -int -mdflush(Relation reln, BlockNumber blocknum, char *buffer) -{ - int status; - long seekpos; - MdfdVec *v; - - v = _mdfd_getseg(reln, blocknum); - -#ifndef LET_OS_MANAGE_FILESIZE - seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); -#ifdef DIAGNOSTIC - if (seekpos >= BLCKSZ * RELSEG_SIZE) - elog(FATAL, "seekpos too big!"); -#endif -#else - seekpos = (long) (BLCKSZ * (blocknum)); -#endif - - if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return SM_FAIL; - - /* write and sync the block */ - status = SM_SUCCESS; - if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ - || FileSync(v->mdfd_vfd) < 0) - status = SM_FAIL; - - return status; -} - /* * mdblindwrt() -- Write a block to disk blind. * - * We have to be able to do this using only the name and OID of - * the database and relation in which the block belongs. Otherwise - * this is much like mdwrite(). If dofsync is TRUE, then we fsync - * the file, making it more like mdflush(). + * We have to be able to do this using only the rnode of the relation + * in which the block belongs. Otherwise this is much like mdwrite(). */ int mdblindwrt(RelFileNode rnode, BlockNumber blkno, - char *buffer, - bool dofsync) + char *buffer) { int status; long seekpos; @@ -568,7 +510,6 @@ mdblindwrt(RelFileNode rnode, #endif errno = 0; - if (lseek(fd, seekpos, SEEK_SET) != seekpos) { elog(LOG, "mdblindwrt: lseek(%ld) failed: %m", seekpos); @@ -578,7 +519,7 @@ mdblindwrt(RelFileNode rnode, status = SM_SUCCESS; - /* write and optionally sync the block */ + /* write the block */ errno = 0; if (write(fd, buffer, BLCKSZ) != BLCKSZ) { @@ -598,54 +539,6 @@ mdblindwrt(RelFileNode rnode, return status; } -/* - * mdmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync). - * - * Returns SM_SUCCESS or SM_FAIL. - */ -int -mdmarkdirty(Relation reln, BlockNumber blkno) -{ - MdfdVec *v; - - v = _mdfd_getseg(reln, blkno); - - FileMarkDirty(v->mdfd_vfd); - - return SM_SUCCESS; -} - -/* - * mdblindmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync). - * - * We have to be able to do this using only the name and OID of - * the database and relation in which the block belongs. Otherwise - * this is much like mdmarkdirty(). However, we do the fsync immediately - * rather than building md/fd datastructures to postpone it till later. - */ -int -mdblindmarkdirty(RelFileNode rnode, - BlockNumber blkno) -{ - int status; - int fd; - - fd = _mdfd_blind_getseg(rnode, blkno); - - if (fd < 0) - return SM_FAIL; - - status = SM_SUCCESS; - - if (pg_fsync(fd) < 0) - status = SM_FAIL; - - if (close(fd) < 0) - status = SM_FAIL; - - return status; -} - /* * mdnblocks() -- Get the number of blocks stored in a relation. * @@ -796,61 +689,36 @@ mdtruncate(Relation reln, BlockNumber nblocks) /* * mdcommit() -- Commit a transaction. * - * All changes to magnetic disk relations must be forced to stable - * storage. This routine makes a pass over the private table of - * file descriptors. Any descriptors to which we have done writes, - * but not synced, are synced here. - * * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int -mdcommit() +mdcommit(void) { - int i; - MdfdVec *v; - - for (i = 0; i < CurFd; i++) - { - v = &Md_fdvec[i]; - if (v->mdfd_flags & MDFD_FREE) - continue; - /* Sync the file entry */ -#ifndef LET_OS_MANAGE_FILESIZE - for (; v != (MdfdVec *) NULL; v = v->mdfd_chain) -#else - if (v != (MdfdVec *) NULL) -#endif - { - if (FileSync(v->mdfd_vfd) < 0) - return SM_FAIL; - } - } - + /* + * We don't actually have to do anything here... + */ return SM_SUCCESS; } /* * mdabort() -- Abort a transaction. * - * Changes need not be forced to disk at transaction abort. We mark - * all file descriptors as clean here. Always returns SM_SUCCESS. + * Changes need not be forced to disk at transaction abort. */ int -mdabort() +mdabort(void) { /* - * We don't actually have to do anything here. fd.c will discard - * fsync-needed bits in its AtEOXact_Files() routine. + * We don't actually have to do anything here... */ return SM_SUCCESS; } /* - * mdsync() -- Sync storage. - * + * mdsync() -- Sync previous writes to stable storage. */ int -mdsync() +mdsync(void) { sync(); if (IsUnderPostmaster) @@ -861,11 +729,9 @@ mdsync() /* * _fdvec_alloc () -- grab a free (or new) md file descriptor vector. - * */ -static -int -_fdvec_alloc() +static int +_fdvec_alloc(void) { MdfdVec *nvec; int fdvec, diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c index 89396d173c9..739e938fe28 100644 --- a/src/backend/storage/smgr/mm.c +++ b/src/backend/storage/smgr/mm.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.31 2002/06/20 20:29:36 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.32 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -81,7 +81,7 @@ static HTAB *MMCacheHT; static HTAB *MMRelCacheHT; int -mminit() +mminit(void) { char *mmcacheblk; int mmsize = 0; @@ -151,7 +151,7 @@ mminit() } int -mmshutdown() +mmshutdown(void) { return SM_SUCCESS; } @@ -442,31 +442,16 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer) return SM_SUCCESS; } -/* - * mmflush() -- Synchronously write a block to stable storage. - * - * For main-memory relations, this is exactly equivalent to mmwrite(). - */ -int -mmflush(Relation reln, BlockNumber blocknum, char *buffer) -{ - return mmwrite(reln, blocknum, buffer); -} - /* * mmblindwrt() -- Write a block to stable storage blind. * - * We have to be able to do this using only the name and OID of - * the database and relation in which the block belongs. + * We have to be able to do this using only the rnode of the relation + * in which the block belongs. Otherwise this is much like mmwrite(). */ int -mmblindwrt(char *dbstr, - char *relstr, - Oid dbid, - Oid relid, +mmblindwrt(RelFileNode rnode, BlockNumber blkno, - char *buffer, - bool dofsync) + char *buffer) { return SM_FAIL; } @@ -512,7 +497,7 @@ mmnblocks(Relation reln) * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int -mmcommit() +mmcommit(void) { return SM_SUCCESS; } @@ -522,7 +507,7 @@ mmcommit() */ int -mmabort() +mmabort(void) { return SM_SUCCESS; } @@ -536,7 +521,7 @@ mmabort() * manager will use. */ int -MMShmemSize() +MMShmemSize(void) { int size = 0; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index a7fb23b4427..252781d9c3f 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.57 2002/06/20 20:29:36 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.58 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,12 +40,8 @@ typedef struct f_smgr char *buffer); int (*smgr_write) (Relation reln, BlockNumber blocknum, char *buffer); - int (*smgr_flush) (Relation reln, BlockNumber blocknum, - char *buffer); int (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno, - char *buffer, bool dofsync); - int (*smgr_markdirty) (Relation reln, BlockNumber blkno); - int (*smgr_blindmarkdirty) (RelFileNode, BlockNumber blkno); + char *buffer); BlockNumber (*smgr_nblocks) (Relation reln); BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks); int (*smgr_commit) (void); /* may be NULL */ @@ -62,15 +58,15 @@ static f_smgr smgrsw[] = { /* magnetic disk */ {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, - mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty, + mdread, mdwrite, mdblindwrt, mdnblocks, mdtruncate, mdcommit, mdabort, mdsync }, #ifdef STABLE_MEMORY_STORAGE /* main memory */ {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose, - mmread, mmwrite, mmflush, mmblindwrt, mmmarkdirty, mmblindmarkdirty, - mmnblocks, NULL, mmcommit, mmabort}, + mmread, mmwrite, mmblindwrt, + mmnblocks, NULL, mmcommit, mmabort, NULL}, #endif }; @@ -110,6 +106,7 @@ typedef struct PendingRelDelete { RelFileNode relnode; /* relation that may need to be deleted */ int16 which; /* which storage manager? */ + bool isTemp; /* is it a temporary relation? */ bool atCommit; /* T=delete at commit; F=delete at abort */ struct PendingRelDelete *next; /* linked-list link */ } PendingRelDelete; @@ -123,7 +120,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ * */ int -smgrinit() +smgrinit(void) { int i; @@ -181,6 +178,7 @@ smgrcreate(int16 which, Relation reln) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->rd_node; pending->which = which; + pending->isTemp = reln->rd_istemp; pending->atCommit = false; /* delete if abort */ pending->next = pendingDeletes; pendingDeletes = pending; @@ -208,6 +206,7 @@ smgrunlink(int16 which, Relation reln) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->rd_node; pending->which = which; + pending->isTemp = reln->rd_istemp; pending->atCommit = true; /* delete if commit */ pending->next = pendingDeletes; pendingDeletes = pending; @@ -312,8 +311,10 @@ smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer) /* * smgrwrite() -- Write the supplied buffer out. * - * This is not a synchronous write -- the interface for that is - * smgrflush(). The buffer is written out via the appropriate + * This is not a synchronous write -- the block is not necessarily + * on disk at return, only dumped out to the kernel. + * + * The buffer is written out via the appropriate * storage manager. This routine returns SM_SUCCESS or aborts * the current transaction. */ @@ -331,23 +332,6 @@ smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer) return status; } -/* - * smgrflush() -- A synchronous smgrwrite(). - */ -int -smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer) -{ - int status; - - status = (*(smgrsw[which].smgr_flush)) (reln, blocknum, buffer); - - if (status == SM_FAIL) - elog(ERROR, "cannot flush block %d of %s to stable store: %m", - blocknum, RelationGetRelationName(reln)); - - return status; -} - /* * smgrblindwrt() -- Write a page out blind. * @@ -357,20 +341,18 @@ smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer) * that has not yet committed, which created a new relation. In * this case, the buffer manager will call smgrblindwrt() with * the name and OID of the database and the relation to which the - * buffer belongs. Every storage manager must be able to force - * this page down to stable storage in this circumstance. The - * write should be synchronous if dofsync is true. + * buffer belongs. Every storage manager must be able to write + * this page out to stable storage in this circumstance. */ int smgrblindwrt(int16 which, RelFileNode rnode, BlockNumber blkno, - char *buffer, - bool dofsync) + char *buffer) { int status; - status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer, dofsync); + status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer); if (status == SM_FAIL) elog(ERROR, "cannot write block %d of %u/%u blind: %m", @@ -379,53 +361,6 @@ smgrblindwrt(int16 which, return status; } -/* - * smgrmarkdirty() -- Mark a page dirty (needs fsync). - * - * Mark the specified page as needing to be fsync'd before commit. - * Ordinarily, the storage manager will do this implicitly during - * smgrwrite(). However, the buffer manager may discover that some - * other backend has written a buffer that we dirtied in the current - * transaction. In that case, we still need to fsync the file to be - * sure the page is down to disk before we commit. - */ -int -smgrmarkdirty(int16 which, - Relation reln, - BlockNumber blkno) -{ - int status; - - status = (*(smgrsw[which].smgr_markdirty)) (reln, blkno); - - if (status == SM_FAIL) - elog(ERROR, "cannot mark block %d of %s: %m", - blkno, RelationGetRelationName(reln)); - - return status; -} - -/* - * smgrblindmarkdirty() -- Mark a page dirty, "blind". - * - * Just like smgrmarkdirty, except we don't have a reldesc. - */ -int -smgrblindmarkdirty(int16 which, - RelFileNode rnode, - BlockNumber blkno) -{ - int status; - - status = (*(smgrsw[which].smgr_blindmarkdirty)) (rnode, blkno); - - if (status == SM_FAIL) - elog(ERROR, "cannot mark block %d of %u/%u blind: %m", - blkno, rnode.tblNode, rnode.relNode); - - return status; -} - /* * smgrnblocks() -- Calculate the number of POSTGRES blocks in the * supplied relation. @@ -504,7 +439,7 @@ smgrDoPendingDeletes(bool isCommit) * any in the commit case, but there can be in the abort * case). */ - DropRelFileNodeBuffers(pending->relnode); + DropRelFileNodeBuffers(pending->relnode, pending->isTemp); /* * Tell the free space map to forget this relation. It won't @@ -531,11 +466,13 @@ smgrDoPendingDeletes(bool isCommit) } /* - * smgrcommit(), smgrabort() -- Commit or abort changes made during the - * current transaction. + * smgrcommit() -- Prepare to commit changes made during the current + * transaction. + * + * This is called before we actually commit. */ int -smgrcommit() +smgrcommit(void) { int i; @@ -553,8 +490,11 @@ smgrcommit() return SM_SUCCESS; } +/* + * smgrabort() -- Abort changes made during the current transaction. + */ int -smgrabort() +smgrabort(void) { int i; @@ -572,8 +512,11 @@ smgrabort() return SM_SUCCESS; } +/* + * Sync files to disk at checkpoint time. + */ int -smgrsync() +smgrsync(void) { int i; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index f1ed253d711..f6c11206bd8 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.170 2002/08/04 18:12:15 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.171 2002/08/06 02:36:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -39,6 +39,7 @@ #include "catalog/catalog.h" #include "catalog/catname.h" #include "catalog/indexing.h" +#include "catalog/namespace.h" #include "catalog/pg_amop.h" #include "catalog/pg_amproc.h" #include "catalog/pg_attrdef.h" @@ -94,13 +95,6 @@ static HTAB *RelationSysNameCache; */ static HTAB *RelationNodeCache; -/* - * newlyCreatedRelns - - * relations created during this transaction. We need to keep track of - * these. - */ -static List *newlyCreatedRelns = NIL; - /* * This flag is false until we have prepared the critical relcache entries * that are needed to do indexscans on the tables read by relcache building. @@ -865,9 +859,12 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo, RelationSetReferenceCount(relation, 1); /* - * normal relations are not nailed into the cache + * normal relations are not nailed into the cache; nor can a pre-existing + * relation be new or temp. */ relation->rd_isnailed = false; + relation->rd_isnew = false; + relation->rd_istemp = false; /* * initialize the tuple descriptor (relation->rd_att). @@ -957,9 +954,6 @@ RelationInitIndexAccessInfo(Relation relation) ReleaseSysCache(tuple); relation->rd_index = iform; - /* this field is now kinda redundant... */ - relation->rd_uniqueindex = iform->indisunique; - /* * Make a copy of the pg_am entry for the index's access method */ @@ -1359,9 +1353,12 @@ formrdesc(const char *relationName, RelationSetReferenceCount(relation, 1); /* - * all entries built with this routine are nailed-in-cache + * all entries built with this routine are nailed-in-cache; none are + * for new or temp relations. */ relation->rd_isnailed = true; + relation->rd_isnew = false; + relation->rd_istemp = false; /* * initialize relation tuple form @@ -1603,7 +1600,9 @@ RelationClose(Relation relation) RelationDecrementReferenceCount(relation); #ifdef RELCACHE_FORCE_RELEASE - if (RelationHasReferenceCountZero(relation) && !relation->rd_myxactonly) + if (RelationHasReferenceCountZero(relation) && + !relation->rd_isnew && + !relation->rd_istemp) RelationClearRelation(relation, false); #endif } @@ -1734,13 +1733,14 @@ RelationClearRelation(Relation relation, bool rebuild) { /* * When rebuilding an open relcache entry, must preserve ref count - * and myxactonly flag. Also attempt to preserve the tupledesc, + * and new/temp flags. Also attempt to preserve the tupledesc, * rewrite rules, and trigger substructures in place. Furthermore - * we save/restore rd_nblocks (in case it is a local relation) + * we save/restore rd_nblocks (in case it is a new/temp relation) * *and* call RelationGetNumberOfBlocks (in case it isn't). */ int old_refcnt = relation->rd_refcnt; - bool old_myxactonly = relation->rd_myxactonly; + bool old_isnew = relation->rd_isnew; + bool old_istemp = relation->rd_istemp; TupleDesc old_att = relation->rd_att; RuleLock *old_rules = relation->rd_rules; MemoryContext old_rulescxt = relation->rd_rulescxt; @@ -1763,7 +1763,8 @@ RelationClearRelation(Relation relation, bool rebuild) buildinfo.i.info_id); } RelationSetReferenceCount(relation, old_refcnt); - relation->rd_myxactonly = old_myxactonly; + relation->rd_isnew = old_isnew; + relation->rd_istemp = old_istemp; if (equalTupleDescs(old_att, relation->rd_att)) { FreeTupleDesc(relation->rd_att); @@ -1810,11 +1811,11 @@ RelationFlushRelation(Relation relation) { bool rebuild; - if (relation->rd_myxactonly) + if (relation->rd_isnew || relation->rd_istemp) { /* - * Local rels should always be rebuilt, not flushed; the relcache - * entry must live until RelationPurgeLocalRelation(). + * New and temp relcache entries must always be rebuilt, not + * flushed; else we'd forget those two important status bits. */ rebuild = true; } @@ -1830,11 +1831,10 @@ RelationFlushRelation(Relation relation) } /* - * RelationForgetRelation - + * RelationForgetRelation - unconditionally remove a relcache entry * - * RelationClearRelation + if the relation is myxactonly then - * remove the relation descriptor from the newly created - * relation list. + * External interface for destroying a relcache entry when we + * drop the relation. */ void RelationForgetRelation(Oid rid) @@ -1849,31 +1849,6 @@ RelationForgetRelation(Oid rid) if (!RelationHasReferenceCountZero(relation)) elog(ERROR, "RelationForgetRelation: relation %u is still open", rid); - /* If local, remove from list */ - if (relation->rd_myxactonly) - { - List *curr; - List *prev = NIL; - - foreach(curr, newlyCreatedRelns) - { - Relation reln = lfirst(curr); - - Assert(reln != NULL && reln->rd_myxactonly); - if (RelationGetRelid(reln) == rid) - break; - prev = curr; - } - if (curr == NIL) - elog(ERROR, "Local relation %s not found in list", - RelationGetRelationName(relation)); - if (prev == NIL) - newlyCreatedRelns = lnext(newlyCreatedRelns); - else - lnext(prev) = lnext(curr); - pfree(curr); - } - /* Unconditionally destroy the relcache entry */ RelationClearRelation(relation, false); } @@ -1909,7 +1884,7 @@ RelationIdInvalidateRelationCacheByRelationId(Oid relationId) * and rebuild those with positive reference counts. * * This is currently used only to recover from SI message buffer overflow, - * so we do not touch transaction-local relations; they cannot be targets + * so we do not touch new-in-transaction relations; they cannot be targets * of cross-backend SI updates (and our own updates now go through a * separate linked list that isn't limited by the SI message buffer size). * @@ -1940,13 +1915,13 @@ RelationCacheInvalidate(void) { relation = idhentry->reldesc; - /* Ignore xact-local relations, since they are never SI targets */ - if (relation->rd_myxactonly) + /* Ignore new relations, since they are never SI targets */ + if (relation->rd_isnew) continue; relcacheInvalsReceived++; - if (RelationHasReferenceCountZero(relation)) + if (RelationHasReferenceCountZero(relation) && !relation->rd_istemp) { /* Delete this entry immediately */ RelationClearRelation(relation, false); @@ -1968,37 +1943,16 @@ RelationCacheInvalidate(void) } /* - * AtEOXactRelationCache + * AtEOXact_RelationCache * * Clean up the relcache at transaction commit or abort. - * - * During transaction abort, we must reset relcache entry ref counts - * to their normal not-in-a-transaction state. A ref count may be - * too high because some routine was exited by elog() between - * incrementing and decrementing the count. - * - * During commit, we should not have to do this, but it's useful to - * check that the counts are correct to catch missed relcache closes. - * Since that's basically a debugging thing, only pay the cost when - * assert checking is enabled. - * - * In bootstrap mode, forget the debugging checks --- the bootstrap code - * expects relations to stay open across start/commit transaction calls. */ void -AtEOXactRelationCache(bool commit) +AtEOXact_RelationCache(bool commit) { HASH_SEQ_STATUS status; RelIdCacheEnt *idhentry; -#ifdef USE_ASSERT_CHECKING - if (commit && IsBootstrapProcessingMode()) - return; -#else - if (commit) - return; -#endif - hash_seq_init(&status, RelationIdCache); while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) @@ -2006,11 +1960,45 @@ AtEOXactRelationCache(bool commit) Relation relation = idhentry->reldesc; int expected_refcnt; + /* + * Is it a relation created in the current transaction? + * + * During commit, reset the flag to false, since we are now out of the + * creating transaction. During abort, simply delete the relcache + * entry --- it isn't interesting any longer. + */ + if (relation->rd_isnew) + { + if (commit) + relation->rd_isnew = false; + else + { + RelationClearRelation(relation, false); + continue; + } + } + + /* + * During transaction abort, we must also reset relcache entry ref + * counts to their normal not-in-a-transaction state. A ref count may + * be too high because some routine was exited by elog() between + * incrementing and decrementing the count. + * + * During commit, we should not have to do this, but it's still useful + * to check that the counts are correct to catch missed relcache + * closes. + * + * In bootstrap mode, do NOT reset the refcnt nor complain that it's + * nonzero --- the bootstrap code expects relations to stay open + * across start/commit transaction calls. (That seems bogus, but it's + * not worth fixing.) + */ expected_refcnt = relation->rd_isnailed ? 1 : 0; if (commit) { - if (relation->rd_refcnt != expected_refcnt) + if (relation->rd_refcnt != expected_refcnt && + !IsBootstrapProcessingMode()) { elog(WARNING, "Relcache reference leak: relation \"%s\" has refcnt %d instead of %d", RelationGetRelationName(relation), @@ -2055,10 +2043,11 @@ RelationBuildLocalRelation(const char *relname, oldcxt = MemoryContextSwitchTo(CacheMemoryContext); /* - * allocate a new relation descriptor. + * allocate a new relation descriptor and fill in basic state fields. */ rel = (Relation) palloc(sizeof(RelationData)); MemSet((char *) rel, 0, sizeof(RelationData)); + rel->rd_targblock = InvalidBlockNumber; /* make sure relation is marked as having no open file yet */ @@ -2066,6 +2055,12 @@ RelationBuildLocalRelation(const char *relname, RelationSetReferenceCount(rel, 1); + /* it's being created in this transaction */ + rel->rd_isnew = true; + + /* is it a temporary relation? */ + rel->rd_istemp = isTempNamespace(relnamespace); + /* * nail the reldesc if this is a bootstrap create reln and we may need * it in the cache later on in the bootstrap process so we don't ever @@ -2121,17 +2116,6 @@ RelationBuildLocalRelation(const char *relname, */ RelationCacheInsert(rel); - /* - * we've just created the relation. It is invisible to anyone else - * before the transaction is committed. Setting rd_myxactonly allows - * us to use the local buffer manager for select/insert/etc before the - * end of transaction. (We also need to keep track of relations - * created during a transaction and do the necessary clean up at the - * end of the transaction.) - ay 3/95 - */ - rel->rd_myxactonly = true; - newlyCreatedRelns = lcons(rel, newlyCreatedRelns); - /* * done building relcache entry. */ @@ -2140,38 +2124,6 @@ RelationBuildLocalRelation(const char *relname, return rel; } -/* - * RelationPurgeLocalRelation - - * find all the Relation descriptors marked rd_myxactonly and reset them. - * This should be called at the end of a transaction (commit/abort) when - * the "local" relations will become visible to others and the multi-user - * buffer pool should be used. - */ -void -RelationPurgeLocalRelation(bool xactCommitted) -{ - while (newlyCreatedRelns) - { - List *l = newlyCreatedRelns; - Relation reln = lfirst(l); - - newlyCreatedRelns = lnext(newlyCreatedRelns); - pfree(l); - - Assert(reln != NULL && reln->rd_myxactonly); - - reln->rd_myxactonly = false; /* mark it not on list anymore */ - - /* - * XXX while we clearly must throw out new Relation entries at - * xact abort, it's not clear why we need to do it at commit. - * Could this be improved? - */ - if (!IsBootstrapProcessingMode()) - RelationClearRelation(reln, false); - } -} - /* * RelationCacheInitialize * diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 4b661f53d32..132fef26c88 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: xlog.h,v 1.33 2002/08/05 01:24:16 thomas Exp $ + * $Id: xlog.h,v 1.34 2002/08/06 02:36:35 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -182,6 +182,7 @@ extern StartUpID ThisStartUpID; /* current SUI */ extern bool InRecovery; extern XLogRecPtr MyLastRecPtr; extern bool MyXactMadeXLogEntry; +extern bool MyXactMadeTempRelUpdate; extern XLogRecPtr ProcLastRecEnd; /* these variables are GUC parameters related to XLOG */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index bd0907ed952..6f8c4ad841d 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: buf_internals.h,v 1.57 2002/06/20 20:29:52 momjian Exp $ + * $Id: buf_internals.h,v 1.58 2002/08/06 02:36:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -149,6 +149,15 @@ typedef struct _bmtrace #endif /* BMTRACE */ +/* counters in buf_init.c */ +extern long int ReadBufferCount; +extern long int ReadLocalBufferCount; +extern long int BufferHitCount; +extern long int LocalBufferHitCount; +extern long int BufferFlushCount; +extern long int LocalBufferFlushCount; + + /* * Bufmgr Interface: */ @@ -177,8 +186,6 @@ extern BufferDesc *LocalBufferDescriptors; extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); extern void WriteLocalBuffer(Buffer buffer, bool release); -extern int FlushLocalBuffer(Buffer buffer, bool sync, bool release); -extern void LocalBufferSync(void); -extern void ResetLocalBufferPool(void); +extern void AtEOXact_LocalBuffers(bool isCommit); #endif /* BUFMGR_INTERNALS_H */ diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index a6952fa1f69..7aebaa73da6 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: bufmgr.h,v 1.61 2002/07/02 05:47:37 momjian Exp $ + * $Id: bufmgr.h,v 1.62 2002/08/06 02:36:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -152,20 +152,18 @@ extern void WriteBuffer(Buffer buffer); extern void WriteNoReleaseBuffer(Buffer buffer); extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum); -extern int FlushBuffer(Buffer buffer, bool sync, bool release); extern void InitBufferPool(void); extern void InitBufferPoolAccess(void); extern char *ShowBufferUsage(void); extern void ResetBufferUsage(void); -extern void ResetBufferPool(bool isCommit); -extern bool BufferPoolCheckLeak(void); +extern void AtEOXact_Buffers(bool isCommit); extern void FlushBufferPool(void); extern BlockNumber BufferGetBlockNumber(Buffer buffer); extern BlockNumber RelationGetNumberOfBlocks(Relation relation); extern int FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock); extern void DropRelationBuffers(Relation rel); -extern void DropRelFileNodeBuffers(RelFileNode rnode); +extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp); extern void DropBuffers(Oid dbid); #ifdef NOT_USED extern void PrintPinnedBufs(void); diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 13f5100c661..a13cec41ea6 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: fd.h,v 1.35 2002/06/20 20:29:52 momjian Exp $ + * $Id: fd.h,v 1.36 2002/08/06 02:36:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,8 +62,6 @@ extern int FileRead(File file, char *buffer, int amount); extern int FileWrite(File file, char *buffer, int amount); extern long FileSeek(File file, long offset, int whence); extern int FileTruncate(File file, long offset); -extern int FileSync(File file); -extern void FileMarkDirty(File file); /* Operations that allow use of regular stdio --- USE WITH CAUTION */ extern FILE *AllocateFile(char *name, char *mode); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 474bfbc9326..d5a96ea0c99 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: smgr.h,v 1.35 2002/06/20 20:29:52 momjian Exp $ + * $Id: smgr.h,v 1.36 2002/08/06 02:36:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,13 +36,8 @@ extern int smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer); extern int smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer); -extern int smgrflush(int16 which, Relation reln, BlockNumber blocknum, - char *buffer); extern int smgrblindwrt(int16 which, RelFileNode rnode, - BlockNumber blkno, char *buffer, bool dofsync); -extern int smgrblindmarkdirty(int16 which, RelFileNode rnode, - BlockNumber blkno); -extern int smgrmarkdirty(int16 which, Relation reln, BlockNumber blkno); + BlockNumber blkno, char *buffer); extern BlockNumber smgrnblocks(int16 which, Relation reln); extern BlockNumber smgrtruncate(int16 which, Relation reln, BlockNumber nblocks); @@ -67,11 +62,7 @@ extern int mdopen(Relation reln); extern int mdclose(Relation reln); extern int mdread(Relation reln, BlockNumber blocknum, char *buffer); extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer); -extern int mdflush(Relation reln, BlockNumber blocknum, char *buffer); -extern int mdmarkdirty(Relation reln, BlockNumber blkno); -extern int mdblindwrt(RelFileNode rnode, BlockNumber blkno, - char *buffer, bool dofsync); -extern int mdblindmarkdirty(RelFileNode rnode, BlockNumber blkno); +extern int mdblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer); extern BlockNumber mdnblocks(Relation reln); extern BlockNumber mdtruncate(Relation reln, BlockNumber nblocks); extern int mdcommit(void); @@ -87,13 +78,7 @@ extern int mmopen(Relation reln); extern int mmclose(Relation reln); extern int mmread(Relation reln, BlockNumber blocknum, char *buffer); extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer); -extern int mmflush(Relation reln, BlockNumber blocknum, char *buffer); -extern int mmblindwrt(char *dbname, char *relname, Oid dbid, Oid relid, - BlockNumber blkno, char *buffer, - bool dofsync); -extern int mmmarkdirty(Relation reln, BlockNumber blkno); -extern int mmblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid, - BlockNumber blkno); +extern int mmblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer); extern BlockNumber mmnblocks(Relation reln); extern BlockNumber mmtruncate(Relation reln, BlockNumber nblocks); extern int mmcommit(void); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 3dce1757ed4..d913f28aba3 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: rel.h,v 1.60 2002/06/20 20:29:53 momjian Exp $ + * $Id: rel.h,v 1.61 2002/08/06 02:36:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -112,10 +112,10 @@ typedef struct RelationData BlockNumber rd_targblock; /* current insertion target block, or * InvalidBlockNumber */ int rd_refcnt; /* reference count */ - bool rd_myxactonly; /* rel uses the local buffer mgr */ + bool rd_isnew; /* rel was created in current xact */ + bool rd_istemp; /* rel uses the local buffer mgr */ bool rd_isnailed; /* rel is nailed in cache */ bool rd_indexfound; /* true if rd_indexlist is valid */ - bool rd_uniqueindex; /* true if rel is a UNIQUE index */ Form_pg_class rd_rel; /* RELATION tuple */ TupleDesc rd_att; /* tuple descriptor */ Oid rd_id; /* relation's object id */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index fd22a65296d..c0495284912 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: relcache.h,v 1.33 2002/08/02 22:36:05 tgl Exp $ + * $Id: relcache.h,v 1.34 2002/08/06 02:36:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -61,9 +61,7 @@ extern void RelationIdInvalidateRelationCacheByRelationId(Oid relationId); extern void RelationCacheInvalidate(void); -extern void RelationPurgeLocalRelation(bool xactComitted); - -extern void AtEOXactRelationCache(bool commit); +extern void AtEOXact_RelationCache(bool commit); /* * Routines to help manage rebuilding of relcache init file -- GitLab