diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 4c013146d4e522edf8623680452b80e89694e2dc..43ec69cab327d3968f4918e8c7f3b73dd3668962 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.75 2008/05/12 00:00:44 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.76 2008/08/11 11:05:10 heikki Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque @@ -158,7 +158,7 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno) if (blkno == P_NEW) elog(ERROR, "hash AM does not use P_NEW"); - buf = ReadOrZeroBuffer(rel, blkno); + buf = ReadOrZeroBuffer(rel, MAIN_FORKNUM, blkno); LockBuffer(buf, HASH_WRITE); @@ -203,7 +203,7 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno) BufferGetBlockNumber(buf), blkno); } else - buf = ReadOrZeroBuffer(rel, blkno); + buf = ReadOrZeroBuffer(rel, MAIN_FORKNUM, blkno); LockBuffer(buf, HASH_WRITE); @@ -737,7 +737,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) MemSet(zerobuf, 0, sizeof(zerobuf)); RelationOpenSmgr(rel); - smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp); + smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, rel->rd_istemp); return true; } diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index cb2f428afe69652e06c6c2f0a25f50346ecd9601..6d3528323bf016339031ed1ea0a0f2795963880e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.261 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.262 2008/08/11 11:05:10 heikki Exp $ * * * INTERFACE ROUTINES @@ -3906,7 +3906,8 @@ log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from, * not do anything that assumes we are touching a heap. */ XLogRecPtr -log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page) +log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, + Page page) { xl_heap_newpage xlrec; XLogRecPtr recptr; @@ -3916,6 +3917,7 @@ log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page) START_CRIT_SECTION(); xlrec.node = *rnode; + xlrec.forknum = forkNum; xlrec.blkno = blkno; rdata[0].data = (char *) &xlrec; @@ -4714,7 +4716,7 @@ heap_sync(Relation rel) /* main heap */ FlushRelationBuffers(rel); /* FlushRelationBuffers will have opened rd_smgr */ - smgrimmedsync(rel->rd_smgr); + smgrimmedsync(rel->rd_smgr, MAIN_FORKNUM); /* toast heap, if any */ if (OidIsValid(rel->rd_rel->reltoastrelid)) @@ -4723,7 +4725,7 @@ heap_sync(Relation rel) toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock); FlushRelationBuffers(toastrel); - smgrimmedsync(toastrel->rd_smgr); + smgrimmedsync(toastrel->rd_smgr, MAIN_FORKNUM); heap_close(toastrel, AccessShareLock); } } diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 57a7430244a3e6d2f619bc21be47dae3d46549f5..cd7302bd5d718b84ba7efa6e77bf9d9881747b10 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -96,7 +96,7 @@ * Portions Copyright (c) 1994-5, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.14 2008/06/19 00:46:03 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -270,10 +270,11 @@ end_heap_rewrite(RewriteState state) { if (state->rs_use_wal) log_newpage(&state->rs_new_rel->rd_node, + MAIN_FORKNUM, state->rs_blockno, state->rs_buffer); RelationOpenSmgr(state->rs_new_rel); - smgrextend(state->rs_new_rel->rd_smgr, state->rs_blockno, + smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno, (char *) state->rs_buffer, true); } @@ -606,6 +607,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup) /* XLOG stuff */ if (state->rs_use_wal) log_newpage(&state->rs_new_rel->rd_node, + MAIN_FORKNUM, state->rs_blockno, page); @@ -616,8 +618,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup) * end_heap_rewrite. */ RelationOpenSmgr(state->rs_new_rel); - smgrextend(state->rs_new_rel->rd_smgr, state->rs_blockno, - (char *) page, true); + smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, + state->rs_blockno, (char *) page, true); state->rs_blockno++; state->rs_buffer_valid = false; diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 5fdccd2fb14a9d34d87ca1513ef83396a2cd7876..7dcfa10eeecbc13125cb743a5324ae65ccb1798f 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -57,7 +57,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.116 2008/06/19 00:46:03 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.117 2008/08/11 11:05:10 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -267,7 +267,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) if (wstate->btws_use_wal) { /* We use the heap NEWPAGE record type for this */ - log_newpage(&wstate->index->rd_node, blkno, page); + log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page); } else { @@ -286,7 +286,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) { if (!wstate->btws_zeropage) wstate->btws_zeropage = (Page) palloc0(BLCKSZ); - smgrextend(wstate->index->rd_smgr, wstate->btws_pages_written++, + smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, + wstate->btws_pages_written++, (char *) wstate->btws_zeropage, true); } @@ -299,13 +300,15 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) if (blkno == wstate->btws_pages_written) { /* extending the file... */ - smgrextend(wstate->index->rd_smgr, blkno, (char *) page, true); + smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno, + (char *) page, true); wstate->btws_pages_written++; } else { /* overwriting a block we zero-filled before */ - smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true); + smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno, + (char *) page, true); } pfree(page); @@ -809,6 +812,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) if (!wstate->index->rd_istemp) { RelationOpenSmgr(wstate->index); - smgrimmedsync(wstate->index->rd_smgr); + smgrimmedsync(wstate->index->rd_smgr, MAIN_FORKNUM); } } diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 57eb7eeea495760fb5888b8df6bd3480ddcfeb3a..b86bdc36677acc73177d062bfe57b00963468fa7 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.44 2008/08/01 13:16:08 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.45 2008/08/11 11:05:10 heikki Exp $ * * NOTES * Each global transaction is associated with a global transaction @@ -141,12 +141,12 @@ static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, - RelFileNode *rels); + RelFileFork *rels); static void RecordTransactionAbortPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, - RelFileNode *rels); + RelFileFork *rels); static void ProcessRecords(char *bufptr, TransactionId xid, const TwoPhaseCallback callbacks[]); @@ -694,8 +694,8 @@ TwoPhaseGetDummyProc(TransactionId xid) * * 1. TwoPhaseFileHeader * 2. TransactionId[] (subtransactions) - * 3. RelFileNode[] (files to be deleted at commit) - * 4. RelFileNode[] (files to be deleted at abort) + * 3. RelFileFork[] (files to be deleted at commit) + * 4. RelFileFork[] (files to be deleted at abort) * 5. TwoPhaseRecordOnDisk * 6. ... * 7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID) @@ -793,8 +793,8 @@ StartPrepare(GlobalTransaction gxact) TransactionId xid = gxact->proc.xid; TwoPhaseFileHeader hdr; TransactionId *children; - RelFileNode *commitrels; - RelFileNode *abortrels; + RelFileFork *commitrels; + RelFileFork *abortrels; /* Initialize linked list */ records.head = palloc0(sizeof(XLogRecData)); @@ -832,12 +832,12 @@ StartPrepare(GlobalTransaction gxact) } if (hdr.ncommitrels > 0) { - save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode)); + save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileFork)); pfree(commitrels); } if (hdr.nabortrels > 0) { - save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode)); + save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileFork)); pfree(abortrels); } } @@ -1140,8 +1140,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit) TwoPhaseFileHeader *hdr; TransactionId latestXid; TransactionId *children; - RelFileNode *commitrels; - RelFileNode *abortrels; + RelFileFork *commitrels; + RelFileFork *abortrels; int i; /* @@ -1169,10 +1169,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit) bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); children = (TransactionId *) bufptr; bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId)); - commitrels = (RelFileNode *) bufptr; - bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode)); - abortrels = (RelFileNode *) bufptr; - bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode)); + commitrels = (RelFileFork *) bufptr; + bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileFork)); + abortrels = (RelFileFork *) bufptr; + bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileFork)); /* compute latestXid among all children */ latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children); @@ -1215,12 +1215,20 @@ FinishPreparedTransaction(const char *gid, bool isCommit) if (isCommit) { for (i = 0; i < hdr->ncommitrels; i++) - smgrdounlink(smgropen(commitrels[i]), false, false); + { + SMgrRelation srel = smgropen(commitrels[i].rnode); + smgrdounlink(srel, commitrels[i].forknum, false, false); + smgrclose(srel); + } } else { for (i = 0; i < hdr->nabortrels; i++) - smgrdounlink(smgropen(abortrels[i]), false, false); + { + SMgrRelation srel = smgropen(abortrels[i].rnode); + smgrdounlink(srel, abortrels[i].forknum, false, false); + smgrclose(srel); + } } /* And now do the callbacks */ @@ -1631,8 +1639,8 @@ RecoverPreparedTransactions(void) bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); subxids = (TransactionId *) bufptr; bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId)); - bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode)); - bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode)); + bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileFork)); + bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileFork)); /* * Reconstruct subtrans state for the transaction --- needed @@ -1685,7 +1693,7 @@ RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, - RelFileNode *rels) + RelFileFork *rels) { XLogRecData rdata[3]; int lastrdata = 0; @@ -1710,7 +1718,7 @@ RecordTransactionCommitPrepared(TransactionId xid, { rdata[0].next = &(rdata[1]); rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].len = nrels * sizeof(RelFileFork); rdata[1].buffer = InvalidBuffer; lastrdata = 1; } @@ -1760,7 +1768,7 @@ RecordTransactionAbortPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, - RelFileNode *rels) + RelFileFork *rels) { XLogRecData rdata[3]; int lastrdata = 0; @@ -1790,7 +1798,7 @@ RecordTransactionAbortPrepared(TransactionId xid, { rdata[0].next = &(rdata[1]); rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].len = nrels * sizeof(RelFileFork); rdata[1].buffer = InvalidBuffer; lastrdata = 1; } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 10645b2b7fb77d284dd045ec7ffd28778039dbac..5f6c9df677a2696a61276007a1d1e2de1465e878 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.264 2008/05/12 20:01:58 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.265 2008/08/11 11:05:10 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -819,7 +819,7 @@ RecordTransactionCommit(void) bool markXidCommitted = TransactionIdIsValid(xid); TransactionId latestXid = InvalidTransactionId; int nrels; - RelFileNode *rels; + RelFileFork *rels; bool haveNonTemp; int nchildren; TransactionId *children; @@ -900,7 +900,7 @@ RecordTransactionCommit(void) { rdata[0].next = &(rdata[1]); rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].len = nrels * sizeof(RelFileFork); rdata[1].buffer = InvalidBuffer; lastrdata = 1; } @@ -1203,7 +1203,7 @@ RecordTransactionAbort(bool isSubXact) TransactionId xid = GetCurrentTransactionIdIfAny(); TransactionId latestXid; int nrels; - RelFileNode *rels; + RelFileFork *rels; int nchildren; TransactionId *children; XLogRecData rdata[3]; @@ -1264,7 +1264,7 @@ RecordTransactionAbort(bool isSubXact) { rdata[0].next = &(rdata[1]); rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].len = nrels * sizeof(RelFileFork); rdata[1].buffer = InvalidBuffer; lastrdata = 1; } @@ -4282,8 +4282,13 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid) /* Make sure files supposed to be dropped are dropped */ for (i = 0; i < xlrec->nrels; i++) { - XLogDropRelation(xlrec->xnodes[i]); - smgrdounlink(smgropen(xlrec->xnodes[i]), false, true); + SMgrRelation srel; + + XLogDropRelation(xlrec->xnodes[i].rnode, xlrec->xnodes[i].forknum); + + srel = smgropen(xlrec->xnodes[i].rnode); + smgrdounlink(srel, xlrec->xnodes[i].forknum, false, true); + smgrclose(srel); } } @@ -4317,8 +4322,13 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid) /* Make sure files supposed to be dropped are dropped */ for (i = 0; i < xlrec->nrels; i++) { - XLogDropRelation(xlrec->xnodes[i]); - smgrdounlink(smgropen(xlrec->xnodes[i]), false, true); + SMgrRelation srel; + + XLogDropRelation(xlrec->xnodes[i].rnode, xlrec->xnodes[i].forknum); + + srel = smgropen(xlrec->xnodes[i].rnode); + smgrdounlink(srel, xlrec->xnodes[i].forknum, false, true); + smgrclose(srel); } } @@ -4374,10 +4384,12 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec) appendStringInfo(buf, "; rels:"); for (i = 0; i < xlrec->nrels; i++) { - RelFileNode rnode = xlrec->xnodes[i]; + RelFileNode rnode = xlrec->xnodes[i].rnode; + ForkNumber forknum = xlrec->xnodes[i].forknum; - appendStringInfo(buf, " %u/%u/%u", - rnode.spcNode, rnode.dbNode, rnode.relNode); + appendStringInfo(buf, " %u/%u/%u/%u", + rnode.spcNode, rnode.dbNode, rnode.relNode, + forknum); } } if (xlrec->nsubxacts > 0) @@ -4402,10 +4414,12 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec) appendStringInfo(buf, "; rels:"); for (i = 0; i < xlrec->nrels; i++) { - RelFileNode rnode = xlrec->xnodes[i]; + RelFileNode rnode = xlrec->xnodes[i].rnode; + ForkNumber forknum = xlrec->xnodes[i].forknum; - appendStringInfo(buf, " %u/%u/%u", - rnode.spcNode, rnode.dbNode, rnode.relNode); + appendStringInfo(buf, " %u/%u/%u/%u", + rnode.spcNode, rnode.dbNode, rnode.relNode, + forknum); } } if (xlrec->nsubxacts > 0) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 72e531d3dc3a232f69de455939908daedd809eae..709836f2be75a27e4b3c4210a4ea62faf8904c3d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.316 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.317 2008/08/11 11:05:10 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -1034,8 +1034,7 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites, /* * The page needs to be backed up, so set up *bkpb */ - bkpb->node = BufferGetFileNode(rdata->buffer); - bkpb->block = BufferGetBlockNumber(rdata->buffer); + BufferGetTag(rdata->buffer, &bkpb->node, &bkpb->fork, &bkpb->block); if (rdata->buffer_std) { @@ -2855,7 +2854,8 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) memcpy(&bkpb, blk, sizeof(BkpBlock)); blk += sizeof(BkpBlock); - buffer = XLogReadBuffer(bkpb.node, bkpb.block, true); + buffer = XLogReadBufferWithFork(bkpb.node, bkpb.fork, bkpb.block, + true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 3e2d5046da66d0dafd0a114bdcb95ba2a36607a4..59124e349e4ad4e88f39fd00aa002300ab7d5bd7 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.57 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.58 2008/08/11 11:05:10 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ typedef struct xl_invalid_page_key { RelFileNode node; /* the relation */ + ForkNumber forkno; /* the fork number */ BlockNumber blkno; /* the page */ } xl_invalid_page_key; @@ -51,7 +52,8 @@ static HTAB *invalid_page_tab = NULL; /* Log a reference to an invalid page */ static void -log_invalid_page(RelFileNode node, BlockNumber blkno, bool present) +log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno, + bool present) { xl_invalid_page_key key; xl_invalid_page *hentry; @@ -63,11 +65,11 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present) * something about the XLOG record that generated the reference). */ if (present) - elog(DEBUG1, "page %u of relation %u/%u/%u is uninitialized", - blkno, node.spcNode, node.dbNode, node.relNode); + elog(DEBUG1, "page %u of relation %u/%u/%u/%u is uninitialized", + blkno, node.spcNode, node.dbNode, node.relNode, forkno); else - elog(DEBUG1, "page %u of relation %u/%u/%u does not exist", - blkno, node.spcNode, node.dbNode, node.relNode); + elog(DEBUG1, "page %u of relation %u/%u/%u/%u does not exist", + blkno, node.spcNode, node.dbNode, node.relNode, forkno); if (invalid_page_tab == NULL) { @@ -87,6 +89,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present) /* we currently assume xl_invalid_page_key contains no padding */ key.node = node; + key.forkno = forkno; key.blkno = blkno; hentry = (xl_invalid_page *) hash_search(invalid_page_tab, (void *) &key, HASH_ENTER, &found); @@ -104,7 +107,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present) /* Forget any invalid pages >= minblkno, because they've been dropped */ static void -forget_invalid_pages(RelFileNode node, BlockNumber minblkno) +forget_invalid_pages(RelFileNode node, ForkNumber forkno, BlockNumber minblkno) { HASH_SEQ_STATUS status; xl_invalid_page *hentry; @@ -117,11 +120,12 @@ forget_invalid_pages(RelFileNode node, BlockNumber minblkno) while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL) { if (RelFileNodeEquals(hentry->key.node, node) && + hentry->key.forkno == forkno && hentry->key.blkno >= minblkno) { - elog(DEBUG2, "page %u of relation %u/%u/%u has been dropped", + elog(DEBUG2, "page %u of relation %u/%u/%u/%u has been dropped", hentry->key.blkno, hentry->key.node.spcNode, - hentry->key.node.dbNode, hentry->key.node.relNode); + hentry->key.node.dbNode, hentry->key.node.relNode, forkno); if (hash_search(invalid_page_tab, (void *) &hentry->key, @@ -223,6 +227,18 @@ XLogCheckInvalidPages(void) */ Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) +{ + return XLogReadBufferWithFork(rnode, MAIN_FORKNUM, blkno, init); +} + +/* + * XLogReadBufferWithFork + * Like XLogReadBuffer, but for reading other relation forks than + * the main one. + */ +Buffer +XLogReadBufferWithFork(RelFileNode rnode, ForkNumber forknum, + BlockNumber blkno, bool init) { BlockNumber lastblock; Buffer buffer; @@ -241,21 +257,21 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) * filesystem loses an inode during a crash. Better to write the data * until we are actually told to delete the file.) */ - smgrcreate(smgr, false, true); + smgrcreate(smgr, forknum, false, true); - lastblock = smgrnblocks(smgr); + lastblock = smgrnblocks(smgr, forknum); if (blkno < lastblock) { /* page exists in file */ - buffer = ReadBufferWithoutRelcache(rnode, false, blkno, init); + buffer = ReadBufferWithoutRelcache(rnode, false, forknum, blkno, init); } else { /* hm, page doesn't exist in file */ if (!init) { - log_invalid_page(rnode, blkno, false); + log_invalid_page(rnode, forknum, blkno, false); return InvalidBuffer; } /* OK to extend the file */ @@ -266,7 +282,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) { if (buffer != InvalidBuffer) ReleaseBuffer(buffer); - buffer = ReadBufferWithoutRelcache(rnode, false, P_NEW, false); + buffer = ReadBufferWithoutRelcache(rnode, false, forknum, + P_NEW, false); lastblock++; } Assert(BufferGetBlockNumber(buffer) == blkno); @@ -282,7 +299,7 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) if (PageIsNew(page)) { UnlockReleaseBuffer(buffer); - log_invalid_page(rnode, blkno, true); + log_invalid_page(rnode, forknum, blkno, true); return InvalidBuffer; } } @@ -363,12 +380,9 @@ FreeFakeRelcacheEntry(Relation fakerel) * any open "invalid-page" records for the relation. */ void -XLogDropRelation(RelFileNode rnode) +XLogDropRelation(RelFileNode rnode, ForkNumber forknum) { - /* Tell smgr to forget about this relation as well */ - smgrclosenode(rnode); - - forget_invalid_pages(rnode, 0); + forget_invalid_pages(rnode, forknum, 0); } /* @@ -397,7 +411,8 @@ XLogDropDatabase(Oid dbid) * We need to clean up any open "invalid-page" records for the dropped pages. */ void -XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks) +XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum, + BlockNumber nblocks) { - forget_invalid_pages(rnode, nblocks); + forget_invalid_pages(rnode, forkNum, nblocks); } diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index 30571267b46b805733b144f1f6cf9167310f6d6a..eb7b39c86bfe6216473ff9d463bdb65d9d1ea9c5 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.77 2008/06/19 00:46:04 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.78 2008/08/11 11:05:10 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -42,7 +42,8 @@ #include "utils/tqual.h" -#define OIDCHARS 10 /* max chars printed by %u */ +#define OIDCHARS 10 /* max chars printed by %u */ +#define FORKNUMCHARS 1 /* max chars for a fork number */ /* @@ -51,7 +52,7 @@ * Result is a palloc'd string. */ char * -relpath(RelFileNode rnode) +relpath(RelFileNode rnode, ForkNumber forknum) { int pathlen; char *path; @@ -60,26 +61,38 @@ relpath(RelFileNode rnode) { /* Shared system relations live in {datadir}/global */ Assert(rnode.dbNode == 0); - pathlen = 7 + OIDCHARS + 1; + pathlen = 7 + OIDCHARS + 1 + FORKNUMCHARS + 1; path = (char *) palloc(pathlen); - snprintf(path, pathlen, "global/%u", - rnode.relNode); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "global/%u_%u", + rnode.relNode, forknum); + else + snprintf(path, pathlen, "global/%u", rnode.relNode); } else if (rnode.spcNode == DEFAULTTABLESPACE_OID) { /* The default tablespace is {datadir}/base */ - pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1; + pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNUMCHARS + 1; path = (char *) palloc(pathlen); - snprintf(path, pathlen, "base/%u/%u", - rnode.dbNode, rnode.relNode); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "base/%u/%u_%u", + rnode.dbNode, rnode.relNode, forknum); + else + snprintf(path, pathlen, "base/%u/%u", + rnode.dbNode, rnode.relNode); } else { /* All other tablespaces are accessed via symlinks */ - pathlen = 10 + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1; + pathlen = 10 + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1 + + FORKNUMCHARS + 1; path = (char *) palloc(pathlen); - snprintf(path, pathlen, "pg_tblspc/%u/%u/%u", - rnode.spcNode, rnode.dbNode, rnode.relNode); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "pg_tblspc/%u/%u/%u_%u", + rnode.spcNode, rnode.dbNode, rnode.relNode, forknum); + else + snprintf(path, pathlen, "pg_tblspc/%u/%u/%u", + rnode.spcNode, rnode.dbNode, rnode.relNode); } return path; } @@ -431,7 +444,7 @@ GetNewRelFileNode(Oid reltablespace, bool relisshared, Relation pg_class) rnode.relNode = GetNewObjectId(); /* Check for existing file of same name */ - rpath = relpath(rnode); + rpath = relpath(rnode, MAIN_FORKNUM); fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0); if (fd >= 0) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index b870d36eb68b847486d4592d13f33e3ce0756dd2..c5cea2f67fd065eaf2ba294a87eac7076991c459 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.336 2008/07/30 19:35:13 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.337 2008/08/11 11:05:10 heikki Exp $ * * * INTERFACE ROUTINES @@ -292,13 +292,16 @@ heap_create(const char *relname, shared_relation); /* - * have the storage manager create the relation's disk file, if needed. + * Have the storage manager create the relation's disk file, if needed. + * + * We only create storage for the main fork here. The caller is + * responsible for creating any additional forks if needed. */ if (create_storage) { Assert(rel->rd_smgr == NULL); RelationOpenSmgr(rel); - smgrcreate(rel->rd_smgr, rel->rd_istemp, false); + smgrcreate(rel->rd_smgr, MAIN_FORKNUM, rel->rd_istemp, false); } return rel; @@ -1385,13 +1388,18 @@ heap_drop_with_catalog(Oid relid) rel = relation_open(relid, AccessExclusiveLock); /* - * Schedule unlinking of the relation's physical file at commit. + * Schedule unlinking of the relation's physical files at commit. */ if (rel->rd_rel->relkind != RELKIND_VIEW && rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE) { + ForkNumber forknum; + RelationOpenSmgr(rel); - smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + if (smgrexists(rel->rd_smgr, forknum)) + smgrscheduleunlink(rel->rd_smgr, forknum, rel->rd_istemp); + RelationCloseSmgr(rel); } /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index f06307a77223f063b4839227e5aeab4a2118c13e..abe8d29ac1658a30851f553382996c6ef6eaf1dc 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.301 2008/08/10 19:02:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.302 2008/08/11 11:05:10 heikki Exp $ * * * INTERFACE ROUTINES @@ -874,6 +874,7 @@ index_drop(Oid indexId) Relation indexRelation; HeapTuple tuple; bool hasexprs; + ForkNumber forknum; /* * To drop an index safely, we must grab exclusive lock on its parent @@ -892,11 +893,14 @@ index_drop(Oid indexId) userIndexRelation = index_open(indexId, AccessExclusiveLock); /* - * Schedule physical removal of the file + * Schedule physical removal of the files */ RelationOpenSmgr(userIndexRelation); - smgrscheduleunlink(userIndexRelation->rd_smgr, - userIndexRelation->rd_istemp); + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + if (smgrexists(userIndexRelation->rd_smgr, forknum)) + smgrscheduleunlink(userIndexRelation->rd_smgr, forknum, + userIndexRelation->rd_istemp); + RelationCloseSmgr(userIndexRelation); /* * Close and flush the index's relcache entry, to ensure relcache doesn't @@ -1260,6 +1264,7 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid) Relation pg_class; HeapTuple tuple; Form_pg_class rd_rel; + ForkNumber i; /* Can't change relfilenode for nailed tables (indexes ok though) */ Assert(!relation->rd_isnailed || @@ -1290,18 +1295,29 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid) RelationGetRelid(relation)); rd_rel = (Form_pg_class) GETSTRUCT(tuple); - /* create another storage file. Is it a little ugly ? */ - /* NOTE: any conflict in relfilenode value will be caught here */ + RelationOpenSmgr(relation); + + /* + * ... and create storage for corresponding forks in the new relfilenode. + * + * NOTE: any conflict in relfilenode value will be caught here + */ newrnode = relation->rd_node; newrnode.relNode = newrelfilenode; - srel = smgropen(newrnode); - smgrcreate(srel, relation->rd_istemp, false); - smgrclose(srel); - /* schedule unlinking old relfilenode */ - RelationOpenSmgr(relation); - smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp); + /* Create the main fork, like heap_create() does */ + smgrcreate(srel, MAIN_FORKNUM, relation->rd_istemp, false); + + /* schedule unlinking old files */ + for (i = 0; i <= MAX_FORKNUM; i++) + { + if (smgrexists(relation->rd_smgr, i)) + smgrscheduleunlink(relation->rd_smgr, i, relation->rd_istemp); + } + + smgrclose(srel); + RelationCloseSmgr(relation); /* update the pg_class row */ rd_rel->relfilenode = newrelfilenode; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 58454dcba977d4844dc866d8e8950f949347e511..5167a40927aa86c9b953101992995c514da2c875 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.261 2008/07/16 19:33:25 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.262 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -318,7 +318,8 @@ static void ATExecEnableDisableRule(Relation rel, char *rulename, char fires_when); static void ATExecAddInherit(Relation rel, RangeVar *parent); static void ATExecDropInherit(Relation rel, RangeVar *parent); -static void copy_relation_data(Relation rel, SMgrRelation dst); +static void copy_relation_data(SMgrRelation rel, SMgrRelation dst, + ForkNumber forkNum, bool istemp); /* ---------------------------------------------------------------- @@ -6483,6 +6484,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace) Relation pg_class; HeapTuple tuple; Form_pg_class rd_rel; + ForkNumber forkNum; /* * Need lock here in case we are recursing to toast table or index @@ -6538,26 +6540,42 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace) elog(ERROR, "cache lookup failed for relation %u", tableOid); rd_rel = (Form_pg_class) GETSTRUCT(tuple); - /* create another storage file. Is it a little ugly ? */ - /* NOTE: any conflict in relfilenode value will be caught here */ + /* + * Since we copy the file directly without looking at the shared buffers, + * we'd better first flush out any pages of the source relation that are + * in shared buffers. We assume no new changes will be made while we are + * holding exclusive lock on the rel. + */ + FlushRelationBuffers(rel); + + /* Open old and new relation */ newrnode = rel->rd_node; newrnode.spcNode = newTableSpace; - dstrel = smgropen(newrnode); - smgrcreate(dstrel, rel->rd_istemp, false); - - /* copy relation data to the new physical file */ - copy_relation_data(rel, dstrel); - /* schedule unlinking old physical file */ RelationOpenSmgr(rel); - smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); /* - * Now drop smgr references. The source was already dropped by - * smgrscheduleunlink. + * Create and copy all forks of the relation, and schedule unlinking + * of old physical files. + * + * NOTE: any conflict in relfilenode value will be caught in + * smgrcreate() below. */ + for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) + { + if (smgrexists(rel->rd_smgr, forkNum)) + { + smgrcreate(dstrel, forkNum, rel->rd_istemp, false); + copy_relation_data(rel->rd_smgr, dstrel, forkNum, rel->rd_istemp); + + smgrscheduleunlink(rel->rd_smgr, forkNum, rel->rd_istemp); + } + } + + /* Close old and new relation */ smgrclose(dstrel); + RelationCloseSmgr(rel); /* update the pg_class row */ rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace; @@ -6584,47 +6602,37 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace) * Copy data, block by block */ static void -copy_relation_data(Relation rel, SMgrRelation dst) +copy_relation_data(SMgrRelation src, SMgrRelation dst, + ForkNumber forkNum, bool istemp) { - SMgrRelation src; bool use_wal; BlockNumber nblocks; BlockNumber blkno; char buf[BLCKSZ]; Page page = (Page) buf; - /* - * Since we copy the file directly without looking at the shared buffers, - * we'd better first flush out any pages of the source relation that are - * in shared buffers. We assume no new changes will be made while we are - * holding exclusive lock on the rel. - */ - FlushRelationBuffers(rel); - /* * We need to log the copied data in WAL iff WAL archiving is enabled AND * it's not a temp rel. */ - use_wal = XLogArchivingActive() && !rel->rd_istemp; + use_wal = XLogArchivingActive() && !istemp; - nblocks = RelationGetNumberOfBlocks(rel); - /* RelationGetNumberOfBlocks will certainly have opened rd_smgr */ - src = rel->rd_smgr; + nblocks = smgrnblocks(src, forkNum); for (blkno = 0; blkno < nblocks; blkno++) { - smgrread(src, blkno, buf); + smgrread(src, forkNum, blkno, buf); /* XLOG stuff */ if (use_wal) - log_newpage(&dst->smgr_rnode, blkno, page); + log_newpage(&dst->smgr_rnode, forkNum, blkno, page); /* * Now write the page. We say isTemp = true even if it's not a temp * rel, because there's no need for smgr to schedule an fsync for this * write; we'll do it ourselves below. */ - smgrextend(dst, blkno, buf, true); + smgrextend(dst, forkNum, blkno, buf, true); } /* @@ -6641,8 +6649,8 @@ copy_relation_data(Relation rel, SMgrRelation dst) * wouldn't replay our earlier WAL entries. If we do not fsync those pages * here, they might still not be on disk when the crash occurs. */ - if (!rel->rd_istemp) - smgrimmedsync(dst); + if (!istemp) + smgrimmedsync(dst, forkNum); } /* diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 21ec395f29dc830eb5c5c4373454dae97838b885..823c5243797f90782d9628a81cba11b7f32bb126 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.50 2008/05/12 00:00:50 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.51 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -113,6 +113,7 @@ typedef struct { RelFileNode rnode; + ForkNumber forknum; BlockNumber segno; /* see md.c for special values */ /* might add a real request-type field later; not needed yet */ } BgWriterRequest; @@ -990,7 +991,7 @@ RequestCheckpoint(int flags) * than we have to here. */ bool -ForwardFsyncRequest(RelFileNode rnode, BlockNumber segno) +ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) { BgWriterRequest *request; @@ -1067,7 +1068,7 @@ AbsorbFsyncRequests(void) LWLockRelease(BgWriterCommLock); for (request = requests; n > 0; request++, n--) - RememberFsyncRequest(request->rnode, request->segno); + RememberFsyncRequest(request->rnode, request->forknum, request->segno); if (requests) pfree(requests); diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index 3c32ddbb7ed6fd50610f58edd13c04fc725c2614..1491aee1635f0d8543941abe243d5df0457a6f49 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.127 2008/06/19 00:46:05 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.128 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -482,8 +482,14 @@ DefineQueryRewrite(char *rulename, */ if (RelisBecomingView) { + ForkNumber forknum; + RelationOpenSmgr(event_relation); - smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp); + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + if (smgrexists(event_relation->rd_smgr, forknum)) + smgrscheduleunlink(event_relation->rd_smgr, forknum, + event_relation->rd_istemp); + RelationCloseSmgr(event_relation); } /* Close rel, but keep lock till commit... */ diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 456dc4233604e7f49e13b0c532f02c10c27ea8c4..281d23136dd793cf7466d22c41a1606b5ddc33ec 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.236 2008/08/05 15:09:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.237 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -78,9 +78,10 @@ static bool IsForInput; static volatile BufferDesc *PinCountWaitBuf = NULL; -static Buffer ReadBuffer_relcache(Relation reln, BlockNumber blockNum, - bool zeroPage, BufferAccessStrategy strategy); -static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf, BlockNumber blockNum, +static Buffer ReadBuffer_relcache(Relation reln, ForkNumber forkNum, + BlockNumber blockNum, bool zeroPage, BufferAccessStrategy strategy); +static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf, + ForkNumber forkNum, BlockNumber blockNum, bool zeroPage, BufferAccessStrategy strategy, bool *hit); static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy); static void PinBuffer_Locked(volatile BufferDesc *buf); @@ -92,7 +93,8 @@ static bool StartBufferIO(volatile BufferDesc *buf, bool forInput); static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, int set_flag_bits); static void buffer_write_error_callback(void *arg); -static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum, +static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, + BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr); static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); @@ -117,7 +119,17 @@ static void AtProcExit_Buffers(int code, Datum arg); Buffer ReadBuffer(Relation reln, BlockNumber blockNum) { - return ReadBuffer_relcache(reln, blockNum, false, NULL); + return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, NULL); +} + +/* + * ReadBufferWithFork -- same as ReadBuffer, but for accessing relation + * forks other than MAIN_FORKNUM. + */ +Buffer +ReadBufferWithFork(Relation reln, ForkNumber forkNum, BlockNumber blockNum) +{ + return ReadBuffer_relcache(reln, forkNum, blockNum, false, NULL); } /* @@ -128,7 +140,7 @@ Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum, BufferAccessStrategy strategy) { - return ReadBuffer_relcache(reln, blockNum, false, strategy); + return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, strategy); } /* @@ -143,32 +155,32 @@ ReadBufferWithStrategy(Relation reln, BlockNumber blockNum, * the page is modified and written out. P_NEW is OK, though. */ Buffer -ReadOrZeroBuffer(Relation reln, BlockNumber blockNum) +ReadOrZeroBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) { - return ReadBuffer_relcache(reln, blockNum, true, NULL); + return ReadBuffer_relcache(reln, forkNum, blockNum, true, NULL); } /* - * ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a + * ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a * relcache entry for the relation. If zeroPage is true, this behaves * like ReadOrZeroBuffer rather than ReadBuffer. */ Buffer -ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, - BlockNumber blockNum, bool zeroPage) +ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, + ForkNumber forkNum, BlockNumber blockNum, bool zeroPage) { bool hit; SMgrRelation smgr = smgropen(rnode); - return ReadBuffer_common(smgr, isTemp, blockNum, zeroPage, NULL, &hit); + return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, zeroPage, NULL, &hit); } /* - * ReadBuffer_relcache -- common logic for ReadBuffer-variants that + * ReadBuffer_relcache -- common logic for ReadBuffer-variants that * operate on a Relation. */ static Buffer -ReadBuffer_relcache(Relation reln, BlockNumber blockNum, +ReadBuffer_relcache(Relation reln, ForkNumber forkNum, BlockNumber blockNum, bool zeroPage, BufferAccessStrategy strategy) { bool hit; @@ -182,7 +194,7 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum, * hit or miss. */ pgstat_count_buffer_read(reln); - buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, blockNum, + buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, forkNum, blockNum, zeroPage, strategy, &hit); if (hit) pgstat_count_buffer_hit(reln); @@ -195,8 +207,9 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum, * *hit is set to true if the request was satisfied from shared buffer cache. */ static Buffer -ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, - bool zeroPage, BufferAccessStrategy strategy, bool *hit) +ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, + BlockNumber blockNum, bool zeroPage, + BufferAccessStrategy strategy, bool *hit) { volatile BufferDesc *bufHdr; Block bufBlock; @@ -212,7 +225,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, /* Substitute proper block number if caller asked for P_NEW */ if (isExtend) - blockNum = smgrnblocks(smgr); + blockNum = smgrnblocks(smgr, forkNum); TRACE_POSTGRESQL_BUFFER_READ_START(blockNum, smgr->smgr_rnode.spcNode, smgr->smgr_rnode.dbNode, smgr->smgr_rnode.relNode, isLocalBuf); @@ -220,7 +233,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, if (isLocalBuf) { ReadLocalBufferCount++; - bufHdr = LocalBufferAlloc(smgr, blockNum, &found); + bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found); if (found) { LocalBufferHitCount++; @@ -239,7 +252,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, * lookup the buffer. IO_IN_PROGRESS is set if the requested block is * not currently in memory. */ - bufHdr = BufferAlloc(smgr, blockNum, strategy, &found); + bufHdr = BufferAlloc(smgr, forkNum, blockNum, strategy, &found); if (found) { BufferHitCount++; @@ -341,7 +354,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, { /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); - smgrextend(smgr, blockNum, (char *) bufBlock, isLocalBuf); + smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, isLocalBuf); } else { @@ -353,7 +366,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, MemSet((char *) bufBlock, 0, BLCKSZ); else { - smgrread(smgr, blockNum, (char *) bufBlock); + smgrread(smgr, forkNum, blockNum, (char *) bufBlock); /* check for garbage data */ if (!PageHeaderIsValid((PageHeader) bufBlock)) @@ -363,7 +376,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid page header in block %u of relation %u/%u/%u; zeroing out page", - blockNum, + blockNum, smgr->smgr_rnode.spcNode, smgr->smgr_rnode.dbNode, smgr->smgr_rnode.relNode))); @@ -421,7 +434,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, * No locks are held either at entry or exit. */ static volatile BufferDesc * -BufferAlloc(SMgrRelation smgr, +BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr) @@ -438,7 +451,7 @@ BufferAlloc(SMgrRelation smgr, bool valid; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); /* determine its hash code and partition lock ID */ newHash = BufTableHashCode(&newTag); @@ -903,6 +916,7 @@ ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum) { + ForkNumber forkNum = MAIN_FORKNUM; volatile BufferDesc *bufHdr; if (BufferIsValid(buffer)) @@ -912,7 +926,8 @@ ReleaseAndReadBuffer(Buffer buffer, Assert(LocalRefCount[-buffer - 1] > 0); bufHdr = &LocalBufferDescriptors[-buffer - 1]; if (bufHdr->tag.blockNum == blockNum && - RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node)) + RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) && + bufHdr->tag.forkNum == forkNum) return buffer; ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer); LocalRefCount[-buffer - 1]--; @@ -923,7 +938,8 @@ ReleaseAndReadBuffer(Buffer buffer, bufHdr = &BufferDescriptors[buffer - 1]; /* we have pin, so it's ok to examine tag without spinlock */ if (bufHdr->tag.blockNum == blockNum && - RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node)) + RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) && + bufHdr->tag.forkNum == forkNum) return buffer; UnpinBuffer(bufHdr, true); } @@ -1734,23 +1750,28 @@ BufferGetBlockNumber(Buffer buffer) } /* - * BufferGetFileNode - * Returns the relation ID (RelFileNode) associated with a buffer. - * - * This should make the same checks as BufferGetBlockNumber, but since the - * two are generally called together, we don't bother. + * BufferGetTag + * Returns the relfilenode, fork number and block number associated with + * a buffer. */ -RelFileNode -BufferGetFileNode(Buffer buffer) +void +BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, + BlockNumber *blknum) { volatile BufferDesc *bufHdr; + /* Do the same checks as BufferGetBlockNumber. */ + Assert(BufferIsPinned(buffer)); + if (BufferIsLocal(buffer)) bufHdr = &(LocalBufferDescriptors[-buffer - 1]); else bufHdr = &BufferDescriptors[buffer - 1]; - return bufHdr->tag.rnode; + /* pinned, so OK to read tag without spinlock */ + *rnode = bufHdr->tag.rnode; + *forknum = bufHdr->tag.forkNum; + *blknum = bufHdr->tag.blockNum; } /* @@ -1820,6 +1841,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) UnlockBufHdr(buf); smgrwrite(reln, + buf->tag.forkNum, buf->tag.blockNum, (char *) BufHdrGetBlock(buf), false); @@ -1849,7 +1871,7 @@ RelationGetNumberOfBlocks(Relation relation) /* Open it at the smgr level if not already done */ RelationOpenSmgr(relation); - return smgrnblocks(relation->rd_smgr); + return smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); } /* @@ -1869,7 +1891,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) rel->rd_targblock = InvalidBlockNumber; /* Do the real work */ - smgrtruncate(rel->rd_smgr, nblocks, rel->rd_istemp); + smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks, rel->rd_istemp); } /* --------------------------------------------------------------------- @@ -1899,14 +1921,14 @@ RelationTruncate(Relation rel, BlockNumber nblocks) * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNode rnode, bool istemp, +DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp, BlockNumber firstDelBlock) { int i; if (istemp) { - DropRelFileNodeLocalBuffers(rnode, firstDelBlock); + DropRelFileNodeLocalBuffers(rnode, forkNum, firstDelBlock); return; } @@ -1916,6 +1938,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp, LockBufHdr(bufHdr); if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + bufHdr->tag.forkNum == forkNum && bufHdr->tag.blockNum >= firstDelBlock) InvalidateBuffer(bufHdr); /* releases spinlock */ else @@ -2055,6 +2078,7 @@ FlushRelationBuffers(Relation rel) error_context_stack = &errcontext; smgrwrite(rel->rd_smgr, + bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), true); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index ae5f3049a5f1059c6a143d2d751aeeba9026e373..386026f14ddc8f559decc58810af84f46ae9ade4 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.80 2008/06/12 09:12:31 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.81 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -61,7 +61,8 @@ static Block GetLocalBufferStorage(void); * (hence, usage_count is always advanced). */ BufferDesc * -LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr) +LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, + bool *foundPtr) { BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; @@ -70,7 +71,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr) int trycounter; bool found; - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); /* Initialize local buffers if first request in this session */ if (LocalBufHash == NULL) @@ -162,6 +163,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr) /* And write... */ smgrwrite(oreln, + bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), true); @@ -250,7 +252,8 @@ MarkLocalBufferDirty(Buffer buffer) * See DropRelFileNodeBuffers in bufmgr.c for more notes. */ void -DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock) +DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, + BlockNumber firstDelBlock) { int i; @@ -261,6 +264,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock) if ((bufHdr->flags & BM_TAG_VALID) && RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + bufHdr->tag.forkNum == forkNum && bufHdr->tag.blockNum >= firstDelBlock) { if (LocalRefCount[i] != 0) diff --git a/src/backend/storage/smgr/README b/src/backend/storage/smgr/README index 5d79ef54161a349db42b7f4375b28e30d268e077..cc798533f2e16a6da291506a6136241d8de564e2 100644 --- a/src/backend/storage/smgr/README +++ b/src/backend/storage/smgr/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.5 2008/03/21 13:23:28 momjian Exp $ +$PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.6 2008/08/11 11:05:11 heikki Exp $ Storage Manager =============== @@ -32,3 +32,20 @@ The files in this directory, and their contents, are md.c The magnetic disk storage manager. Note that md.c in turn relies on src/backend/storage/file/fd.c. + +Relation Forks +============== + +Since 8.4, a single smgr relation can be comprised of multiple physical +files, called relation forks. This allows storing additional metadata like +Free Space information in additional forks, which can be grown and truncated +independently of the main data file, while still treating it all as a single +physical relation in system catalogs. + +It is assumed that the main fork, fork number 0 or MAIN_FORKNUM, always +exists. Fork numbers are assigned in src/include/storage/relfilenode.h. +Functions in smgr.c and md.c take an extra fork number argument, in addition +to relfilenode and block number, to identify which relation fork you want to +access. Since most code wants to access the main fork, a shortcut version of +ReadBuffer that accesses MAIN_FORKNUM is provided in the buffer manager for +convenience. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index acd669f1f74cd7c661189c04e29d6a1c1d5c2772..a76fea454dcfc17da1d03a82ff97fd2c05c1935d 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.138 2008/05/02 01:08:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.139 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,7 @@ #include "postmaster/bgwriter.h" #include "storage/fd.h" #include "storage/bufmgr.h" +#include "storage/relfilenode.h" #include "storage/smgr.h" #include "utils/hsearch.h" #include "utils/memutils.h" @@ -118,6 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */ typedef struct { RelFileNode rnode; /* the targeted relation */ + ForkNumber forknum; BlockNumber segno; /* which segment */ } PendingOperationTag; @@ -151,15 +153,18 @@ typedef enum /* behavior for mdopen & _mdfd_getseg */ } ExtensionBehavior; /* local routines */ -static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior); -static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg); +static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum, + ExtensionBehavior behavior); +static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, + MdfdVec *seg); static void register_unlink(RelFileNode rnode); static MdfdVec *_fdvec_alloc(void); -static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno, - int oflags); -static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, - bool isTemp, ExtensionBehavior behavior); -static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg); +static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno, + BlockNumber segno, int oflags); +static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, + BlockNumber blkno, bool isTemp, ExtensionBehavior behavior); +static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, + MdfdVec *seg); /* @@ -197,23 +202,40 @@ mdinit(void) } } +/* + * mdexists() -- Does the physical file exist? + * + * Note: this will return true for lingering files, with pending deletions + */ +bool +mdexists(SMgrRelation reln, ForkNumber forkNum) +{ + /* + * Close it first, to ensure that we notice if the fork has been + * unlinked since we opened it. + */ + mdclose(reln, forkNum); + + return (mdopen(reln, forkNum, EXTENSION_RETURN_NULL) != NULL); +} + /* * mdcreate() -- Create a new relation on magnetic disk. * * If isRedo is true, it's okay for the relation to exist already. */ void -mdcreate(SMgrRelation reln, bool isRedo) +mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) { char *path; File fd; - if (isRedo && reln->md_fd != NULL) + if (isRedo && reln->md_fd[forkNum] != NULL) return; /* created and opened already... */ - Assert(reln->md_fd == NULL); + Assert(reln->md_fd[forkNum] == NULL); - path = relpath(reln->smgr_rnode); + path = relpath(reln->smgr_rnode, forkNum); fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); @@ -236,20 +258,21 @@ mdcreate(SMgrRelation reln, bool isRedo) errno = save_errno; ereport(ERROR, (errcode_for_file_access(), - errmsg("could not create relation %u/%u/%u: %m", + errmsg("could not create relation %u/%u/%u/%u: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forkNum))); } } pfree(path); - reln->md_fd = _fdvec_alloc(); + reln->md_fd[forkNum] = _fdvec_alloc(); - reln->md_fd->mdfd_vfd = fd; - reln->md_fd->mdfd_segno = 0; - reln->md_fd->mdfd_chain = NULL; + reln->md_fd[forkNum]->mdfd_vfd = fd; + reln->md_fd[forkNum]->mdfd_segno = 0; + reln->md_fd[forkNum]->mdfd_chain = NULL; } /* @@ -285,7 +308,7 @@ mdcreate(SMgrRelation reln, bool isRedo) * we are usually not in a transaction anymore when this is called. */ void -mdunlink(RelFileNode rnode, bool isRedo) +mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) { char *path; int ret; @@ -294,14 +317,14 @@ mdunlink(RelFileNode rnode, bool isRedo) * We have to clean out any pending fsync requests for the doomed * relation, else the next mdsync() will fail. */ - ForgetRelationFsyncRequests(rnode); + ForgetRelationFsyncRequests(rnode, forkNum); - path = relpath(rnode); + path = relpath(rnode, forkNum); /* * Delete or truncate the first segment. */ - if (isRedo) + if (isRedo || forkNum != MAIN_FORKNUM) ret = unlink(path); else { @@ -326,10 +349,11 @@ mdunlink(RelFileNode rnode, bool isRedo) if (!isRedo || errno != ENOENT) ereport(WARNING, (errcode_for_file_access(), - errmsg("could not remove relation %u/%u/%u: %m", + errmsg("could not remove relation %u/%u/%u/%u: %m", rnode.spcNode, rnode.dbNode, - rnode.relNode))); + rnode.relNode, + forkNum))); } /* @@ -353,11 +377,12 @@ mdunlink(RelFileNode rnode, bool isRedo) if (errno != ENOENT) ereport(WARNING, (errcode_for_file_access(), - errmsg("could not remove segment %u of relation %u/%u/%u: %m", + errmsg("could not remove segment %u of relation %u/%u/%u/%u: %m", segno, rnode.spcNode, rnode.dbNode, - rnode.relNode))); + rnode.relNode, + forkNum))); break; } } @@ -367,7 +392,7 @@ mdunlink(RelFileNode rnode, bool isRedo) pfree(path); /* Register request to unlink first segment later */ - if (!isRedo) + if (!isRedo && forkNum == MAIN_FORKNUM) register_unlink(rnode); } @@ -381,7 +406,8 @@ mdunlink(RelFileNode rnode, bool isRedo) * causes intervening file space to become filled with zeroes. */ void -mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) +mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer, bool isTemp) { off_t seekpos; int nbytes; @@ -389,7 +415,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) /* This assert is too expensive to have on normally ... */ #ifdef CHECK_WRITE_VS_EXTEND - Assert(blocknum >= mdnblocks(reln)); + Assert(blocknum >= mdnblocks(reln, forknum)); #endif /* @@ -400,13 +426,14 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) if (blocknum == InvalidBlockNumber) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("cannot extend relation %u/%u/%u beyond %u blocks", + errmsg("cannot extend relation %u/%u/%u/%u beyond %u blocks", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, + forknum, InvalidBlockNumber))); - v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE); + v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); @@ -423,37 +450,40 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek to block %u of relation %u/%u/%u: %m", + errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m", blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { if (nbytes < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not extend relation %u/%u/%u: %m", + errmsg("could not extend relation %u/%u/%u/%u: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode), + reln->smgr_rnode.relNode, + forknum), errhint("Check free disk space."))); /* short write: complain appropriately */ ereport(ERROR, (errcode(ERRCODE_DISK_FULL), - errmsg("could not extend relation %u/%u/%u: wrote only %d of %d bytes at block %u", + errmsg("could not extend relation %u/%u/%u/%u: wrote only %d of %d bytes at block %u", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, + forknum, nbytes, BLCKSZ, blocknum), errhint("Check free disk space."))); } if (!isTemp) - register_dirty_segment(reln, v); + register_dirty_segment(reln, forknum, v); - Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); } /* @@ -467,17 +497,17 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) * invent one out of whole cloth. */ static MdfdVec * -mdopen(SMgrRelation reln, ExtensionBehavior behavior) +mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior) { MdfdVec *mdfd; char *path; File fd; /* No work if already open */ - if (reln->md_fd) - return reln->md_fd; + if (reln->md_fd[forknum]) + return reln->md_fd[forknum]; - path = relpath(reln->smgr_rnode); + path = relpath(reln->smgr_rnode, forknum); fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600); @@ -499,21 +529,22 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior) return NULL; ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open relation %u/%u/%u: %m", + errmsg("could not open relation %u/%u/%u/%u: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); } } pfree(path); - reln->md_fd = mdfd = _fdvec_alloc(); + reln->md_fd[forknum] = mdfd = _fdvec_alloc(); mdfd->mdfd_vfd = fd; mdfd->mdfd_segno = 0; mdfd->mdfd_chain = NULL; - Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE)); return mdfd; } @@ -522,15 +553,15 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior) * mdclose() -- Close the specified relation, if it isn't closed already. */ void -mdclose(SMgrRelation reln) +mdclose(SMgrRelation reln, ForkNumber forknum) { - MdfdVec *v = reln->md_fd; + MdfdVec *v = reln->md_fd[forknum]; /* No work if already closed */ if (v == NULL) return; - reln->md_fd = NULL; /* prevent dangling pointer after error */ + reln->md_fd[forknum] = NULL; /* prevent dangling pointer after error */ while (v != NULL) { @@ -549,13 +580,14 @@ mdclose(SMgrRelation reln) * mdread() -- Read the specified block from a relation. */ void -mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) +mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer) { off_t seekpos; int nbytes; MdfdVec *v; - v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL); + v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); @@ -563,22 +595,24 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek to block %u of relation %u/%u/%u: %m", + errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m", blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { if (nbytes < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read block %u of relation %u/%u/%u: %m", + errmsg("could not read block %u of relation %u/%u/%u/%u: %m", blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); /* * Short read: we are at or past EOF, or we read a partial block at @@ -593,11 +627,12 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("could not read block %u of relation %u/%u/%u: read only %d of %d bytes", + errmsg("could not read block %u of relation %u/%u/%u/%u: read only %d of %d bytes", blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, + forknum, nbytes, BLCKSZ))); } } @@ -610,7 +645,8 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) * use mdextend(). */ void -mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) +mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer, bool isTemp) { off_t seekpos; int nbytes; @@ -618,10 +654,10 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) /* This assert is too expensive to have on normally ... */ #ifdef CHECK_WRITE_VS_EXTEND - Assert(blocknum < mdnblocks(reln)); + Assert(blocknum < mdnblocks(reln, forknum)); #endif - v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL); + v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); @@ -629,36 +665,39 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek to block %u of relation %u/%u/%u: %m", + errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m", blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { if (nbytes < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write block %u of relation %u/%u/%u: %m", + errmsg("could not write block %u of relation %u/%u/%u/%u: %m", blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); /* short write: complain appropriately */ ereport(ERROR, (errcode(ERRCODE_DISK_FULL), - errmsg("could not write block %u of relation %u/%u/%u: wrote only %d of %d bytes", + errmsg("could not write block %u of relation %u/%u/%u/%u: wrote only %d of %d bytes", blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, + forknum, nbytes, BLCKSZ), errhint("Check free disk space."))); } if (!isTemp) - register_dirty_segment(reln, v); + register_dirty_segment(reln, forknum, v); } /* @@ -670,9 +709,9 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) * are present in the chain. */ BlockNumber -mdnblocks(SMgrRelation reln) +mdnblocks(SMgrRelation reln, ForkNumber forknum) { - MdfdVec *v = mdopen(reln, EXTENSION_FAIL); + MdfdVec *v = mdopen(reln, forknum, EXTENSION_FAIL); BlockNumber nblocks; BlockNumber segno = 0; @@ -696,7 +735,7 @@ mdnblocks(SMgrRelation reln) for (;;) { - nblocks = _mdnblocks(reln, v); + nblocks = _mdnblocks(reln, forknum, v); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) @@ -715,15 +754,16 @@ mdnblocks(SMgrRelation reln) * RELSEG_SIZE. While perhaps not strictly necessary, this keeps * the logic simple. */ - v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); + v->mdfd_chain = _mdfd_openseg(reln, forknum, segno, O_CREAT); if (v->mdfd_chain == NULL) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open segment %u of relation %u/%u/%u: %m", + errmsg("could not open segment %u of relation %u/%u/%u/%u: %m", segno, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); } v = v->mdfd_chain; @@ -734,7 +774,8 @@ mdnblocks(SMgrRelation reln) * mdtruncate() -- Truncate relation to specified number of blocks. */ void -mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) +mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, + bool isTemp) { MdfdVec *v; BlockNumber curnblk; @@ -744,23 +785,24 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) * NOTE: mdnblocks makes sure we have opened all active segments, so that * truncation loop will get them all! */ - curnblk = mdnblocks(reln); + curnblk = mdnblocks(reln, forknum); if (nblocks > curnblk) { /* Bogus request ... but no complaint if InRecovery */ if (InRecovery) return; ereport(ERROR, - (errmsg("could not truncate relation %u/%u/%u to %u blocks: it's only %u blocks now", + (errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: it's only %u blocks now", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, + forknum, nblocks, curnblk))); } if (nblocks == curnblk) return; /* no work */ - v = mdopen(reln, EXTENSION_FAIL); + v = mdopen(reln, forknum, EXTENSION_FAIL); priorblocks = 0; while (v != NULL) @@ -777,15 +819,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) if (FileTruncate(v->mdfd_vfd, 0) < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", + errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, + forknum, nblocks))); if (!isTemp) - register_dirty_segment(reln, v); + register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; - Assert(ov != reln->md_fd); /* we never drop the 1st segment */ + Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st segment */ pfree(ov); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) @@ -803,13 +846,14 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", + errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, + forknum, nblocks))); if (!isTemp) - register_dirty_segment(reln, v); + register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; ov->mdfd_chain = NULL; } @@ -832,7 +876,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) * nothing of dirty buffers that may exist inside the buffer manager. */ void -mdimmedsync(SMgrRelation reln) +mdimmedsync(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v; BlockNumber curnblk; @@ -841,20 +885,21 @@ mdimmedsync(SMgrRelation reln) * NOTE: mdnblocks makes sure we have opened all active segments, so that * fsync loop will get them all! */ - curnblk = mdnblocks(reln); + curnblk = mdnblocks(reln, forknum); - v = mdopen(reln, EXTENSION_FAIL); + v = mdopen(reln, forknum, EXTENSION_FAIL); while (v != NULL) { if (FileSync(v->mdfd_vfd) < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not fsync segment %u of relation %u/%u/%u: %m", + errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m", v->mdfd_segno, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); v = v->mdfd_chain; } } @@ -1008,7 +1053,7 @@ mdsync(void) * FileSync, since fd.c might have closed the file behind our * back. */ - seg = _mdfd_getseg(reln, + seg = _mdfd_getseg(reln, entry->tag.forknum, entry->tag.segno * ((BlockNumber) RELSEG_SIZE), false, EXTENSION_RETURN_NULL); if (seg != NULL && @@ -1024,19 +1069,21 @@ mdsync(void) failures > 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not fsync segment %u of relation %u/%u/%u: %m", + errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m", entry->tag.segno, entry->tag.rnode.spcNode, entry->tag.rnode.dbNode, - entry->tag.rnode.relNode))); + entry->tag.rnode.relNode, + entry->tag.forknum))); else ereport(DEBUG1, (errcode_for_file_access(), - errmsg("could not fsync segment %u of relation %u/%u/%u, but retrying: %m", + errmsg("could not fsync segment %u of relation %u/%u/%u/%u but retrying: %m", entry->tag.segno, entry->tag.rnode.spcNode, entry->tag.rnode.dbNode, - entry->tag.rnode.relNode))); + entry->tag.rnode.relNode, + entry->tag.forknum))); /* * Absorb incoming requests and check to see if canceled. @@ -1126,7 +1173,7 @@ mdpostckpt(void) Assert((CycleCtr) (entry->cycle_ctr + 1) == mdckpt_cycle_ctr); /* Unlink the file */ - path = relpath(entry->rnode); + path = relpath(entry->rnode, MAIN_FORKNUM); if (unlink(path) < 0) { /* @@ -1139,10 +1186,11 @@ mdpostckpt(void) if (errno != ENOENT) ereport(WARNING, (errcode_for_file_access(), - errmsg("could not remove relation %u/%u/%u: %m", + errmsg("could not remove relation %u/%u/%u/%u: %m", entry->rnode.spcNode, entry->rnode.dbNode, - entry->rnode.relNode))); + entry->rnode.relNode, + MAIN_FORKNUM))); } pfree(path); @@ -1161,26 +1209,27 @@ mdpostckpt(void) * to be a performance problem). */ static void -register_dirty_segment(SMgrRelation reln, MdfdVec *seg) +register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { if (pendingOpsTable) { /* push it into local pending-ops table */ - RememberFsyncRequest(reln->smgr_rnode, seg->mdfd_segno); + RememberFsyncRequest(reln->smgr_rnode, forknum, seg->mdfd_segno); } else { - if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno)) + if (ForwardFsyncRequest(reln->smgr_rnode, forknum, seg->mdfd_segno)) return; /* passed it off successfully */ if (FileSync(seg->mdfd_vfd) < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not fsync segment %u of relation %u/%u/%u: %m", + errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m", seg->mdfd_segno, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); } } @@ -1196,7 +1245,7 @@ register_unlink(RelFileNode rnode) if (pendingOpsTable) { /* push it into local pending-ops table */ - RememberFsyncRequest(rnode, UNLINK_RELATION_REQUEST); + RememberFsyncRequest(rnode, MAIN_FORKNUM, UNLINK_RELATION_REQUEST); } else { @@ -1208,7 +1257,8 @@ register_unlink(RelFileNode rnode) * XXX should we just leave the file orphaned instead? */ Assert(IsUnderPostmaster); - while (!ForwardFsyncRequest(rnode, UNLINK_RELATION_REQUEST)) + while (!ForwardFsyncRequest(rnode, MAIN_FORKNUM, + UNLINK_RELATION_REQUEST)) pg_usleep(10000L); /* 10 msec seems a good number */ } } @@ -1233,7 +1283,7 @@ register_unlink(RelFileNode rnode) * structure for them.) */ void -RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) +RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) { Assert(pendingOpsTable); @@ -1246,7 +1296,8 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (RelFileNodeEquals(entry->tag.rnode, rnode)) + if (RelFileNodeEquals(entry->tag.rnode, rnode) && + entry->tag.forknum == forknum) { /* Okay, cancel this entry */ entry->canceled = true; @@ -1313,6 +1364,7 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) /* ensure any pad bytes in the hash key are zeroed */ MemSet(&key, 0, sizeof(key)); key.rnode = rnode; + key.forknum = forknum; key.segno = segno; entry = (PendingOperationEntry *) hash_search(pendingOpsTable, @@ -1346,12 +1398,12 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) * ForgetRelationFsyncRequests -- forget any fsyncs for a rel */ void -ForgetRelationFsyncRequests(RelFileNode rnode) +ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) { if (pendingOpsTable) { /* standalone backend or startup process: fsync state is local */ - RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC); + RememberFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC); } else if (IsUnderPostmaster) { @@ -1365,7 +1417,7 @@ ForgetRelationFsyncRequests(RelFileNode rnode) * which would be bad, so I'm inclined to assume that the bgwriter * will always empty the queue soon. */ - while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC)) + while (!ForwardFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC)) pg_usleep(10000L); /* 10 msec seems a good number */ /* @@ -1390,12 +1442,13 @@ ForgetDatabaseFsyncRequests(Oid dbid) if (pendingOpsTable) { /* standalone backend or startup process: fsync state is local */ - RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC); + RememberFsyncRequest(rnode, InvalidForkNumber, FORGET_DATABASE_FSYNC); } else if (IsUnderPostmaster) { /* see notes in ForgetRelationFsyncRequests */ - while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC)) + while (!ForwardFsyncRequest(rnode, InvalidForkNumber, + FORGET_DATABASE_FSYNC)) pg_usleep(10000L); /* 10 msec seems a good number */ } } @@ -1415,14 +1468,15 @@ _fdvec_alloc(void) * and make a MdfdVec object for it. Returns NULL on failure. */ static MdfdVec * -_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) +_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, + int oflags) { MdfdVec *v; int fd; char *path, *fullpath; - path = relpath(reln->smgr_rnode); + path = relpath(reln->smgr_rnode, forknum); if (segno > 0) { @@ -1449,7 +1503,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) v->mdfd_vfd = fd; v->mdfd_segno = segno; v->mdfd_chain = NULL; - Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); /* all done */ return v; @@ -1464,10 +1518,10 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) * in the EXTENSION_CREATE case. */ static MdfdVec * -_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, - ExtensionBehavior behavior) +_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, + bool isTemp, ExtensionBehavior behavior) { - MdfdVec *v = mdopen(reln, behavior); + MdfdVec *v = mdopen(reln, forknum, behavior); BlockNumber targetseg; BlockNumber nextsegno; @@ -1497,20 +1551,21 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, */ if (behavior == EXTENSION_CREATE || InRecovery) { - if (_mdnblocks(reln, v) < RELSEG_SIZE) + if (_mdnblocks(reln, forknum, v) < RELSEG_SIZE) { char *zerobuf = palloc0(BLCKSZ); - mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, + mdextend(reln, forknum, + nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, zerobuf, isTemp); pfree(zerobuf); } - v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT); + v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT); } else { /* We won't create segment if not existent */ - v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0); + v->mdfd_chain = _mdfd_openseg(reln, forknum, nextsegno, 0); } if (v->mdfd_chain == NULL) { @@ -1519,11 +1574,12 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, return NULL; ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open segment %u of relation %u/%u/%u (target block %u): %m", + errmsg("could not open segment %u of relation %u/%u/%u/%u (target block %u): %m", nextsegno, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, + forknum, blkno))); } } @@ -1536,7 +1592,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, * Get number of blocks present in a single disk file */ static BlockNumber -_mdnblocks(SMgrRelation reln, MdfdVec *seg) +_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { off_t len; @@ -1544,11 +1600,12 @@ _mdnblocks(SMgrRelation reln, MdfdVec *seg) if (len < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m", + errmsg("could not seek to end of segment %u of relation %u/%u/%u/%u: %m", seg->mdfd_segno, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + reln->smgr_rnode.relNode, + forknum))); /* note that this calculation will ignore any partial block at EOF */ return (BlockNumber) (len / BLCKSZ); } diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index d0282f8a6d7f504fcbf8cbc674e77ad96c9e8184..da4a9766ca95c406356073324bfb2ea0c3dbc17d 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.110 2008/06/12 09:12:31 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.111 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -42,19 +42,22 @@ typedef struct f_smgr { void (*smgr_init) (void); /* may be NULL */ void (*smgr_shutdown) (void); /* may be NULL */ - void (*smgr_close) (SMgrRelation reln); - void (*smgr_create) (SMgrRelation reln, bool isRedo); - void (*smgr_unlink) (RelFileNode rnode, bool isRedo); - void (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum, - char *buffer, bool isTemp); - void (*smgr_read) (SMgrRelation reln, BlockNumber blocknum, - char *buffer); - void (*smgr_write) (SMgrRelation reln, BlockNumber blocknum, - char *buffer, bool isTemp); - BlockNumber (*smgr_nblocks) (SMgrRelation reln); - void (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks, - bool isTemp); - void (*smgr_immedsync) (SMgrRelation reln); + void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, + bool isRedo); + bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_unlink) (RelFileNode rnode, ForkNumber forknum, + bool isRedo); + void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool isTemp); + void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer); + void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, + BlockNumber nblocks, bool isTemp); + void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); void (*smgr_commit) (void); /* may be NULL */ void (*smgr_abort) (void); /* may be NULL */ void (*smgr_pre_ckpt) (void); /* may be NULL */ @@ -65,7 +68,7 @@ typedef struct f_smgr static const f_smgr smgrsw[] = { /* magnetic disk */ - {mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend, + {mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend, mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync, NULL, NULL, mdpreckpt, mdsync, mdpostckpt } @@ -102,6 +105,7 @@ static HTAB *SMgrRelationHash = NULL; typedef struct PendingRelDelete { RelFileNode relnode; /* relation that may need to be deleted */ + ForkNumber forknum; /* fork number that may need to be deleted */ int which; /* which storage manager? */ bool isTemp; /* is it a temporary relation? */ bool atCommit; /* T=delete at commit; F=delete at abort */ @@ -126,19 +130,21 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ typedef struct xl_smgr_create { RelFileNode rnode; + ForkNumber forknum; } xl_smgr_create; typedef struct xl_smgr_truncate { BlockNumber blkno; RelFileNode rnode; + ForkNumber forknum; } xl_smgr_truncate; /* local function prototypes */ static void smgrshutdown(int code, Datum arg); -static void smgr_internal_unlink(RelFileNode rnode, int which, - bool isTemp, bool isRedo); +static void smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, + int which, bool isTemp, bool isRedo); /* @@ -211,10 +217,15 @@ smgropen(RelFileNode rnode) /* Initialize it if not present before */ if (!found) { + int forknum; + /* hash_search already filled in the lookup key */ reln->smgr_owner = NULL; reln->smgr_which = 0; /* we only have md.c at present */ - reln->md_fd = NULL; /* mark it not open */ + + /* mark it not open */ + for(forknum = 0; forknum <= MAX_FORKNUM; forknum++) + reln->md_fd[forknum] = NULL; } return reln; @@ -243,6 +254,15 @@ smgrsetowner(SMgrRelation *owner, SMgrRelation reln) *owner = reln; } +/* + * smgrexists() -- Does the underlying file for a fork exist? + */ +bool +smgrexists(SMgrRelation reln, ForkNumber forknum) +{ + return (*(smgrsw[reln->smgr_which].smgr_exists)) (reln, forknum); +} + /* * smgrclose() -- Close and delete an SMgrRelation object. */ @@ -250,8 +270,10 @@ void smgrclose(SMgrRelation reln) { SMgrRelation *owner; + ForkNumber forknum; - (*(smgrsw[reln->smgr_which].smgr_close)) (reln); + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + (*(smgrsw[reln->smgr_which].smgr_close)) (reln, forknum); owner = reln->smgr_owner; @@ -315,7 +337,8 @@ smgrclosenode(RelFileNode rnode) * smgrcreate() -- Create a new relation. * * Given an already-created (but presumably unused) SMgrRelation, - * cause the underlying disk file or other storage to be created. + * cause the underlying disk file or other storage for the fork + * to be created. * * If isRedo is true, it is okay for the underlying file to exist * already because we are in a WAL replay sequence. In this case @@ -323,7 +346,7 @@ smgrclosenode(RelFileNode rnode) * tell whether to drop the file. */ void -smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) +smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo) { XLogRecPtr lsn; XLogRecData rdata; @@ -334,7 +357,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) * Exit quickly in WAL replay mode if we've already opened the file. * If it's open, it surely must exist. */ - if (isRedo && reln->md_fd != NULL) + if (isRedo && reln->md_fd[forknum] != NULL) return; /* @@ -350,7 +373,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) reln->smgr_rnode.dbNode, isRedo); - (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo); + (*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo); if (isRedo) return; @@ -360,6 +383,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) * will be dropped at abort time. */ xlrec.rnode = reln->smgr_rnode; + xlrec.forknum = forknum; rdata.data = (char *) &xlrec; rdata.len = sizeof(xlrec); @@ -372,6 +396,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->smgr_rnode; + pending->forknum = forknum; pending->which = reln->smgr_which; pending->isTemp = isTemp; pending->atCommit = false; /* delete if abort */ @@ -383,13 +408,11 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) /* * smgrscheduleunlink() -- Schedule unlinking a relation at xact commit. * - * The relation is marked to be removed from the store if we - * successfully commit the current transaction. - * - * This also implies smgrclose() on the SMgrRelation object. + * The fork is marked to be removed from the store if we successfully + * commit the current transaction. */ void -smgrscheduleunlink(SMgrRelation reln, bool isTemp) +smgrscheduleunlink(SMgrRelation reln, ForkNumber forknum, bool isTemp) { PendingRelDelete *pending; @@ -397,6 +420,7 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp) pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->smgr_rnode; + pending->forknum = forknum; pending->which = reln->smgr_which; pending->isTemp = isTemp; pending->atCommit = true; /* delete if commit */ @@ -413,51 +437,49 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp) * the existing list entry and delete the physical file immediately, but * for now I'll keep the logic simple. */ - - /* Now close the file and throw away the hashtable entry */ - smgrclose(reln); } /* * smgrdounlink() -- Immediately unlink a relation. * - * The relation is removed from the store. This should not be used - * during transactional operations, since it can't be undone. + * The specified fork of the relation is removed from the store. This + * should not be used during transactional operations, since it can't be + * undone. * * If isRedo is true, it is okay for the underlying file to be gone * already. - * - * This also implies smgrclose() on the SMgrRelation object. */ void -smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo) +smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo) { RelFileNode rnode = reln->smgr_rnode; int which = reln->smgr_which; - /* Close the file and throw away the hashtable entry */ - smgrclose(reln); + /* Close the fork */ + (*(smgrsw[which].smgr_close)) (reln, forknum); - smgr_internal_unlink(rnode, which, isTemp, isRedo); + smgr_internal_unlink(rnode, forknum, which, isTemp, isRedo); } /* * Shared subroutine that actually does the unlink ... */ static void -smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) +smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, + int which, bool isTemp, bool isRedo) { /* * Get rid of any remaining buffers for the relation. bufmgr will just * drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(rnode, isTemp, 0); + DropRelFileNodeBuffers(rnode, forknum, isTemp, 0); /* * Tell the free space map to forget this relation. It won't be accessed * any more anyway, but we may as well recycle the map space quickly. */ - FreeSpaceMapForgetRel(&rnode); + if (forknum == MAIN_FORKNUM) + FreeSpaceMapForgetRel(&rnode); /* * It'd be nice to tell the stats collector to forget it immediately, too. @@ -473,7 +495,7 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) * ERROR, because we've already decided to commit or abort the current * xact. */ - (*(smgrsw[which].smgr_unlink)) (rnode, isRedo); + (*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo); } /* @@ -486,9 +508,11 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) * causes intervening file space to become filled with zeroes. */ void -smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) +smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer, bool isTemp) { - (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer, isTemp); + (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum, + buffer, isTemp); } /* @@ -500,9 +524,10 @@ smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) * return pages in the format that POSTGRES expects. */ void -smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer) +smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer) { - (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer); + (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer); } /* @@ -521,9 +546,11 @@ smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer) * made to fsync the write before checkpointing. */ void -smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) +smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer, bool isTemp) { - (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer, isTemp); + (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum, + buffer, isTemp); } /* @@ -531,9 +558,9 @@ smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) * supplied relation. */ BlockNumber -smgrnblocks(SMgrRelation reln) +smgrnblocks(SMgrRelation reln, ForkNumber forknum) { - return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln); + return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum); } /* @@ -541,13 +568,14 @@ smgrnblocks(SMgrRelation reln) * of blocks */ void -smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) +smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, + bool isTemp) { /* * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, isTemp, nblocks); + DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks); /* * Tell the free space map to forget anything it may have stored for the @@ -557,7 +585,8 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks); /* Do the truncation */ - (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks, isTemp); + (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks, + isTemp); if (!isTemp) { @@ -570,6 +599,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) xlrec.blkno = nblocks; xlrec.rnode = reln->smgr_rnode; + xlrec.forknum = forknum; rdata.data = (char *) &xlrec; rdata.len = sizeof(xlrec); @@ -604,9 +634,9 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) * otherwise the sync is not very meaningful. */ void -smgrimmedsync(SMgrRelation reln) +smgrimmedsync(SMgrRelation reln, ForkNumber forknum) { - (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln); + (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln, forknum); } @@ -666,6 +696,7 @@ smgrDoPendingDeletes(bool isCommit) /* do deletion if called for */ if (pending->atCommit == isCommit) smgr_internal_unlink(pending->relnode, + pending->forknum, pending->which, pending->isTemp, false); @@ -680,7 +711,7 @@ smgrDoPendingDeletes(bool isCommit) * smgrGetPendingDeletes() -- Get a list of relations to be deleted. * * The return value is the number of relations scheduled for termination. - * *ptr is set to point to a freshly-palloc'd array of RelFileNodes. + * *ptr is set to point to a freshly-palloc'd array of RelFileForks. * If there are no relations to be deleted, *ptr is set to NULL. * * If haveNonTemp isn't NULL, the bool it points to gets set to true if @@ -690,11 +721,11 @@ smgrDoPendingDeletes(bool isCommit) * by upper-level transactions. */ int -smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) +smgrGetPendingDeletes(bool forCommit, RelFileFork **ptr, bool *haveNonTemp) { int nestLevel = GetCurrentTransactionNestLevel(); int nrels; - RelFileNode *rptr; + RelFileFork *rptr; PendingRelDelete *pending; nrels = 0; @@ -710,12 +741,16 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) *ptr = NULL; return 0; } - rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode)); + rptr = (RelFileFork *) palloc(nrels * sizeof(RelFileFork)); *ptr = rptr; for (pending = pendingDeletes; pending != NULL; pending = pending->next) { if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit) - *rptr++ = pending->relnode; + { + rptr->rnode = pending->relnode; + rptr->forknum = pending->forknum; + rptr++; + } if (haveNonTemp && !pending->isTemp) *haveNonTemp = true; } @@ -843,7 +878,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) SMgrRelation reln; reln = smgropen(xlrec->rnode); - smgrcreate(reln, false, true); + smgrcreate(reln, xlrec->forknum, false, true); } else if (info == XLOG_SMGR_TRUNCATE) { @@ -858,7 +893,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) * XLogOpenRelation, we prefer to recreate the rel and replay the log * as best we can until the drop is seen. */ - smgrcreate(reln, false, true); + smgrcreate(reln, xlrec->forknum, false, true); /* Can't use smgrtruncate because it would try to xlog */ @@ -867,7 +902,8 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) * truncated blocks. We must do this, else subsequent XLogReadBuffer * operations will not re-extend the file properly. */ - DropRelFileNodeBuffers(xlrec->rnode, false, xlrec->blkno); + DropRelFileNodeBuffers(xlrec->rnode, xlrec->forknum, false, + xlrec->blkno); /* * Tell the free space map to forget anything it may have stored for @@ -878,11 +914,12 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) /* Do the truncation */ (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, + xlrec->forknum, xlrec->blkno, false); /* Also tell xlogutils.c about it */ - XLogTruncateRelation(xlrec->rnode, xlrec->blkno); + XLogTruncateRelation(xlrec->rnode, xlrec->forknum, xlrec->blkno); } else elog(PANIC, "smgr_redo: unknown op code %u", info); @@ -897,17 +934,18 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec) { xl_smgr_create *xlrec = (xl_smgr_create *) rec; - appendStringInfo(buf, "file create: %u/%u/%u", + appendStringInfo(buf, "file create: %u/%u/%u/%u", xlrec->rnode.spcNode, xlrec->rnode.dbNode, - xlrec->rnode.relNode); + xlrec->rnode.relNode, xlrec->forknum); } else if (info == XLOG_SMGR_TRUNCATE) { xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec; - appendStringInfo(buf, "file truncate: %u/%u/%u to %u blocks", + appendStringInfo(buf, "file truncate: %u/%u/%u/%u to %u blocks", xlrec->rnode.spcNode, xlrec->rnode.dbNode, - xlrec->rnode.relNode, xlrec->blkno); + xlrec->rnode.relNode, xlrec->forknum, + xlrec->blkno); } else appendStringInfo(buf, "UNKNOWN"); diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index c358cdea7b4211ecbb308a4ccc65f7b9e84d60c7..e85d11c3834079c90858b27e22df23b687478f0b 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -5,7 +5,7 @@ * Copyright (c) 2002-2008, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.19 2008/06/19 00:46:05 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.20 2008/08/11 11:05:11 heikki Exp $ * */ @@ -255,7 +255,8 @@ calculate_relation_size(RelFileNode *rfn) char pathname[MAXPGPATH]; unsigned int segcount = 0; - relationpath = relpath(*rfn); + /* XXX: This ignores the other forks. */ + relationpath = relpath(*rfn, MAIN_FORKNUM); for (segcount = 0;; segcount++) { diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index c6b7d5dc1141337e5632eb4128f07427a49f2ffd..d6f9473c5eee5030d768494a26f1dbf66d0a20b5 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.137 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.138 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -127,7 +127,8 @@ extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, OffsetNumber *offsets, int offcnt); -extern XLogRecPtr log_newpage(RelFileNode *rnode, BlockNumber blk, Page page); +extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, + BlockNumber blk, Page page); /* in heap/pruneheap.c */ extern void heap_page_prune_opt(Relation relation, Buffer buffer, diff --git a/src/include/access/htup.h b/src/include/access/htup.h index 8bfdf26697ee7228cdec852e557903a86f22fe65..85271c26c3ac80bf941cefc0d338826fc78fa19c 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.100 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.101 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -670,6 +670,7 @@ typedef struct xl_heap_clean typedef struct xl_heap_newpage { RelFileNode node; + ForkNumber forknum; BlockNumber blkno; /* location of new page */ /* entire page contents follow at end of record */ } xl_heap_newpage; diff --git a/src/include/access/xact.h b/src/include/access/xact.h index dff05c73db58a579010424d7c6d2796a75d0fb39..c887716e591cde239b6acfa9c012d44c921fbb45 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.94 2008/03/04 19:54:06 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.95 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -88,10 +88,10 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid, typedef struct xl_xact_commit { TimestampTz xact_time; /* time of commit */ - int nrels; /* number of RelFileNodes */ + int nrels; /* number of RelFileForks */ int nsubxacts; /* number of subtransaction XIDs */ - /* Array of RelFileNode(s) to drop at commit */ - RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */ + /* Array of RelFileFork(s) to drop at commit */ + RelFileFork xnodes[1]; /* VARIABLE LENGTH ARRAY */ /* ARRAY OF COMMITTED SUBTRANSACTION XIDs FOLLOWS */ } xl_xact_commit; @@ -100,10 +100,10 @@ typedef struct xl_xact_commit typedef struct xl_xact_abort { TimestampTz xact_time; /* time of abort */ - int nrels; /* number of RelFileNodes */ + int nrels; /* number of RelFileForks */ int nsubxacts; /* number of subtransaction XIDs */ - /* Array of RelFileNode(s) to drop at abort */ - RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */ + /* Array of RelFileFork(s) to drop at abort */ + RelFileFork xnodes[1]; /* VARIABLE LENGTH ARRAY */ /* ARRAY OF ABORTED SUBTRANSACTION XIDs FOLLOWS */ } xl_xact_abort; diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 88fe0b6a95ff781c4b80502fd0c60c19b3dc4f1e..c5f1a0f502fac9ef78eb8711b089400eecb28780 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.23 2008/02/17 02:09:30 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.24 2008/08/11 11:05:11 heikki Exp $ */ #ifndef XLOG_INTERNAL_H #define XLOG_INTERNAL_H @@ -40,6 +40,7 @@ typedef struct BkpBlock { RelFileNode node; /* relation containing block */ + ForkNumber fork; /* fork within the relation */ BlockNumber block; /* block number */ uint16 hole_offset; /* number of bytes before "hole" */ uint16 hole_length; /* number of bytes in "hole" */ diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index f1585c032a59e7de35caaeff960d6eacc1871af9..0c81d42e44dcd136dd18075bb6227ea2d4b44699 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.25 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.26 2008/08/11 11:05:11 heikki Exp $ */ #ifndef XLOG_UTILS_H #define XLOG_UTILS_H @@ -19,11 +19,14 @@ extern void XLogCheckInvalidPages(void); -extern void XLogDropRelation(RelFileNode rnode); +extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum); extern void XLogDropDatabase(Oid dbid); -extern void XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks); +extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum, + BlockNumber nblocks); extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init); +extern Buffer XLogReadBufferWithFork(RelFileNode rnode, ForkNumber forknum, + BlockNumber blkno, bool init); extern Relation CreateFakeRelcacheEntry(RelFileNode rnode); extern void FreeFakeRelcacheEntry(Relation fakerel); diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index 845ef035bd54dd99afd879c4a22ae351dc45b6c1..52a69e1341973e0684becdc74d258d353f448d03 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.40 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.41 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -19,7 +19,7 @@ #include "utils/relcache.h" -extern char *relpath(RelFileNode rnode); +extern char *relpath(RelFileNode rnode, ForkNumber forknum); extern char *GetDatabasePath(Oid dbNode, Oid spcNode); extern bool IsSystemRelation(Relation relation); diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h index b1c6fd59fcb6dc72ead658aab1a346be8f84d8ec..3dc02bc400d54e49749483c2e0005de7f07189ec 100644 --- a/src/include/postmaster/bgwriter.h +++ b/src/include/postmaster/bgwriter.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.11 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.12 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -27,7 +27,8 @@ extern void BackgroundWriterMain(void); extern void RequestCheckpoint(int flags); extern void CheckpointWriteDelay(int flags, double progress); -extern bool ForwardFsyncRequest(RelFileNode rnode, BlockNumber segno); +extern bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, + BlockNumber segno); extern void AbsorbFsyncRequests(void); extern Size BgWriterShmemSize(void); diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 589baa0a7e5263cbe028d15394c41c008c091e08..a8861d29a8ef7d7b629821035bc038bc69fc18d6 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.97 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.98 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -65,6 +65,7 @@ typedef bits16 BufFlags; typedef struct buftag { RelFileNode rnode; /* physical relation identifier */ + ForkNumber forkNum; BlockNumber blockNum; /* blknum relative to begin of reln */ } BufferTag; @@ -73,19 +74,22 @@ typedef struct buftag (a).rnode.spcNode = InvalidOid, \ (a).rnode.dbNode = InvalidOid, \ (a).rnode.relNode = InvalidOid, \ + (a).forkNum = InvalidForkNumber, \ (a).blockNum = InvalidBlockNumber \ ) -#define INIT_BUFFERTAG(a,xx_rnode,xx_blockNum) \ +#define INIT_BUFFERTAG(a,xx_rnode,xx_forkNum,xx_blockNum) \ ( \ (a).rnode = (xx_rnode), \ + (a).forkNum = (xx_forkNum), \ (a).blockNum = (xx_blockNum) \ ) #define BUFFERTAGS_EQUAL(a,b) \ ( \ RelFileNodeEquals((a).rnode, (b).rnode) && \ - (a).blockNum == (b).blockNum \ + (a).blockNum == (b).blockNum && \ + (a).forkNum == (b).forkNum \ ) /* @@ -202,10 +206,10 @@ extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id); extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode); /* localbuf.c */ -extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, BlockNumber blockNum, - bool *foundPtr); +extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, ForkNumber forkNum, + BlockNumber blockNum, bool *foundPtr); extern void MarkLocalBufferDirty(Buffer buffer); -extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, +extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber firstDelBlock); extern void AtEOXact_LocalBuffers(bool isCommit); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index f940ae466d5002c13cdce04b5a0470465cbee408..72d4aec39797497175f95c4d8b5a4cee5c0ec6d7 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.114 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.115 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -144,11 +144,13 @@ extern PGDLLIMPORT int32 *LocalRefCount; * prototypes for functions in bufmgr.c */ extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); +extern Buffer ReadBufferWithFork(Relation reln, ForkNumber forkNum, BlockNumber blockNum); extern Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum, BufferAccessStrategy strategy); -extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum); +extern Buffer ReadOrZeroBuffer(Relation reln, ForkNumber forkNum, + BlockNumber blockNum); extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, - BlockNumber blockNum, bool zeroPage); + ForkNumber forkNum, BlockNumber blockNum, bool zeroPage); extern void ReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer); extern void MarkBufferDirty(Buffer buffer); @@ -169,15 +171,16 @@ extern BlockNumber RelationGetNumberOfBlocks(Relation relation); extern void RelationTruncate(Relation rel, BlockNumber nblocks); extern void FlushRelationBuffers(Relation rel); extern void FlushDatabaseBuffers(Oid dbid); -extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp, - BlockNumber firstDelBlock); +extern void DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, + bool istemp, BlockNumber firstDelBlock); extern void DropDatabaseBuffers(Oid dbid); #ifdef NOT_USED extern void PrintPinnedBufs(void); #endif extern Size BufferShmemSize(void); -extern RelFileNode BufferGetFileNode(Buffer buffer); +extern void BufferGetTag(Buffer buffer, RelFileNode *rnode, + ForkNumber *forknum, BlockNumber *blknum); extern void SetBufferCommitInfoNeedsSave(Buffer buffer); diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h index 9638294b4a48c2b068c760f29008029b9fb34bd6..8ac8147ed939759224bd5e2c2ce1c0bede1b6e00 100644 --- a/src/include/storage/relfilenode.h +++ b/src/include/storage/relfilenode.h @@ -7,16 +7,33 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.15 2008/01/01 19:45:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.16 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ #ifndef RELFILENODE_H #define RELFILENODE_H +/* + * The physical storage of a relation consists of one or more forks. The + * main fork is always created, but in addition to that there can be + * additional forks for storing various metadata. ForkNumber is used when + * we need to refer to a specific fork in a relation. + */ +typedef enum ForkNumber +{ + InvalidForkNumber = -1, + MAIN_FORKNUM = 0 + /* NOTE: change NUM_FORKS below when you add new forks */ +} ForkNumber; + +#define MAX_FORKNUM MAIN_FORKNUM + /* * RelFileNode must provide all that we need to know to physically access - * a relation. + * a relation. Note, however, that a "physical" relation is comprised of + * multiple files on the filesystem, as each fork is stored as a separate + * file, and each fork can be divided into multiple segments. See md.c. * * spcNode identifies the tablespace of the relation. It corresponds to * pg_tablespace.oid. @@ -57,4 +74,13 @@ typedef struct RelFileNode (node1).dbNode == (node2).dbNode && \ (node1).spcNode == (node2).spcNode) +/* + * RelFileFork identifies a particular fork of a relation. + */ +typedef struct RelFileFork +{ + RelFileNode rnode; + ForkNumber forknum; +} RelFileFork; + #endif /* RELFILENODE_H */ diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 9e9c6c023b90c0d2f2d0a68475b06faf723e9d7b..d4999c1049a4b00f358c5b655fe23f7fe61849dc 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.62 2008/01/01 19:45:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.63 2008/08/11 11:05:11 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -51,7 +51,8 @@ typedef struct SMgrRelationData */ int smgr_which; /* storage manager selector */ - struct _MdfdVec *md_fd; /* for md.c; NULL if not open */ + /* for md.c; NULL for forks that are not open */ + struct _MdfdVec *md_fd[MAX_FORKNUM + 1]; } SMgrRelationData; typedef SMgrRelationData *SMgrRelation; @@ -59,24 +60,29 @@ typedef SMgrRelationData *SMgrRelation; extern void smgrinit(void); extern SMgrRelation smgropen(RelFileNode rnode); +extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln); extern void smgrclose(SMgrRelation reln); extern void smgrcloseall(void); extern void smgrclosenode(RelFileNode rnode); -extern void smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo); -extern void smgrscheduleunlink(SMgrRelation reln, bool isTemp); -extern void smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo); -extern void smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, - bool isTemp); -extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer); -extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, - bool isTemp); -extern BlockNumber smgrnblocks(SMgrRelation reln); -extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks, - bool isTemp); -extern void smgrimmedsync(SMgrRelation reln); +extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, + bool isTemp, bool isRedo); +extern void smgrscheduleunlink(SMgrRelation reln, ForkNumber forknum, + bool isTemp); +extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum, + bool isTemp, bool isRedo); +extern void smgrextend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool isTemp); +extern void smgrread(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer); +extern void smgrwrite(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool isTemp); +extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum); +extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum, + BlockNumber nblocks, bool isTemp); +extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum); extern void smgrDoPendingDeletes(bool isCommit); -extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, +extern int smgrGetPendingDeletes(bool forCommit, RelFileFork **ptr, bool *haveNonTemp); extern void AtSubCommit_smgr(void); extern void AtSubAbort_smgr(void); @@ -95,23 +101,27 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec); /* in md.c */ extern void mdinit(void); -extern void mdclose(SMgrRelation reln); -extern void mdcreate(SMgrRelation reln, bool isRedo); -extern void mdunlink(RelFileNode rnode, bool isRedo); -extern void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, - bool isTemp); -extern void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer); -extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, - bool isTemp); -extern BlockNumber mdnblocks(SMgrRelation reln); -extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp); -extern void mdimmedsync(SMgrRelation reln); +extern void mdclose(SMgrRelation reln, ForkNumber forknum); +extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); +extern bool mdexists(SMgrRelation reln, ForkNumber forknum); +extern void mdunlink(RelFileNode rnode, ForkNumber forknum, bool isRedo); +extern void mdextend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool isTemp); +extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer); +extern void mdwrite(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool isTemp); +extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum); +extern void mdtruncate(SMgrRelation reln, ForkNumber forknum, + BlockNumber nblocks, bool isTemp); +extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum); extern void mdpreckpt(void); extern void mdsync(void); extern void mdpostckpt(void); -extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno); -extern void ForgetRelationFsyncRequests(RelFileNode rnode); +extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, + BlockNumber segno); +extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum); extern void ForgetDatabaseFsyncRequests(Oid dbid); /* smgrtype.c */