From 85e2cedf985bfecaf43a18ca17433070f439fb0e Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Thu, 6 Nov 2008 20:51:15 +0000 Subject: [PATCH] Improve bulk-insert performance by keeping the current target buffer pinned (but not locked, as that would risk deadlocks). Also, make it work in a small ring of buffers to avoid having bulk inserts trash the whole buffer arena. Robert Haas, after an idea of Simon Riggs'. --- src/backend/access/heap/heapam.c | 68 ++++++++++++++------ src/backend/access/heap/hio.c | 89 +++++++++++++++++++++------ src/backend/access/heap/rewriteheap.c | 6 +- src/backend/access/heap/tuptoaster.c | 23 +++---- src/backend/commands/copy.c | 18 +++--- src/backend/executor/execMain.c | 27 ++++---- src/backend/storage/buffer/README | 6 +- src/backend/storage/buffer/freelist.c | 5 +- src/include/access/heapam.h | 13 +++- src/include/access/hio.h | 21 ++++++- src/include/access/tuptoaster.h | 4 +- src/include/storage/bufmgr.h | 3 +- 12 files changed, 201 insertions(+), 82 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 49bca5b3299..7139b03471e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.269 2008/11/06 20:51:14 tgl Exp $ * * * INTERFACE ROUTINES @@ -1799,23 +1799,53 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) } +/* + * GetBulkInsertState - prepare status object for a bulk insert + */ +BulkInsertState +GetBulkInsertState(void) +{ + BulkInsertState bistate; + + bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData)); + bistate->strategy = GetAccessStrategy(BAS_BULKWRITE); + bistate->current_buf = InvalidBuffer; + return bistate; +} + +/* + * FreeBulkInsertState - clean up after finishing a bulk insert + */ +void +FreeBulkInsertState(BulkInsertState bistate) +{ + if (bistate->current_buf != InvalidBuffer) + ReleaseBuffer(bistate->current_buf); + FreeAccessStrategy(bistate->strategy); + pfree(bistate); +} + + /* * heap_insert - insert tuple into a heap * * The new tuple is stamped with current transaction ID and the specified * command ID. * - * If use_wal is false, the new tuple is not logged in WAL, even for a - * non-temp relation. Safe usage of this behavior requires that we arrange - * that all new tuples go into new pages not containing any tuples from other - * transactions, and that the relation gets fsync'd before commit. - * (See also heap_sync() comments) + * If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not + * logged in WAL, even for a non-temp relation. Safe usage of this behavior + * requires that we arrange that all new tuples go into new pages not + * containing any tuples from other transactions, and that the relation gets + * fsync'd before commit. (See also heap_sync() comments) + * + * The HEAP_INSERT_SKIP_FSM option is passed directly to + * RelationGetBufferForTuple, which see for more info. * - * use_fsm is passed directly to RelationGetBufferForTuple, which see for - * more info. + * Note that these options will be applied when inserting into the heap's + * TOAST table, too, if the tuple requires any out-of-line data. * - * Note that use_wal and use_fsm will be applied when inserting into the - * heap's TOAST table, too, if the tuple requires any out-of-line data. + * The BulkInsertState object (if any; bistate can be NULL for default + * behavior) is also just passed through to RelationGetBufferForTuple. * * The return value is the OID assigned to the tuple (either here or by the * caller), or InvalidOid if no OID. The header fields of *tup are updated @@ -1825,7 +1855,7 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) */ Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, - bool use_wal, bool use_fsm) + int options, BulkInsertState bistate) { TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup; @@ -1877,14 +1907,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, heaptup = tup; } else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) - heaptup = toast_insert_or_update(relation, tup, NULL, - use_wal, use_fsm); + heaptup = toast_insert_or_update(relation, tup, NULL, options); else heaptup = tup; /* Find buffer to insert this tuple into */ buffer = RelationGetBufferForTuple(relation, heaptup->t_len, - InvalidBuffer, use_fsm); + InvalidBuffer, options, bistate); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -1905,7 +1934,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, MarkBufferDirty(buffer); /* XLOG stuff */ - if (use_wal && !relation->rd_istemp) + if (!(options & HEAP_INSERT_SKIP_WAL) && !relation->rd_istemp) { xl_heap_insert xlrec; xl_heap_header xlhdr; @@ -2000,7 +2029,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, Oid simple_heap_insert(Relation relation, HeapTuple tup) { - return heap_insert(relation, tup, GetCurrentCommandId(true), true, true); + return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL); } /* @@ -2595,8 +2624,7 @@ l2: if (need_toast) { /* Note we always use WAL and FSM during updates */ - heaptup = toast_insert_or_update(relation, newtup, &oldtup, - true, true); + heaptup = toast_insert_or_update(relation, newtup, &oldtup, 0); newtupsize = MAXALIGN(heaptup->t_len); } else @@ -2623,7 +2651,7 @@ l2: { /* Assume there's no chance to put heaptup on same page. */ newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, - buffer, true); + buffer, 0, NULL); } else { @@ -2640,7 +2668,7 @@ l2: */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, - buffer, true); + buffer, 0, NULL); } else { diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 3723977fe09..5cfd150b8ef 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -8,13 +8,14 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.74 2008/11/06 20:51:14 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/heapam.h" #include "access/hio.h" #include "storage/bufmgr.h" #include "storage/freespace.h" @@ -56,6 +57,43 @@ RelationPutHeapTuple(Relation relation, ((HeapTupleHeader) item)->t_ctid = tuple->t_self; } +/* + * Read in a buffer, using bulk-insert strategy if bistate isn't NULL. + */ +static Buffer +ReadBufferBI(Relation relation, BlockNumber targetBlock, + BulkInsertState bistate) +{ + Buffer buffer; + + /* If not bulk-insert, exactly like ReadBuffer */ + if (!bistate) + return ReadBuffer(relation, targetBlock); + + /* If we have the desired block already pinned, re-pin and return it */ + if (bistate->current_buf != InvalidBuffer) + { + if (BufferGetBlockNumber(bistate->current_buf) == targetBlock) + { + IncrBufferRefCount(bistate->current_buf); + return bistate->current_buf; + } + /* ... else drop the old buffer */ + ReleaseBuffer(bistate->current_buf); + bistate->current_buf = InvalidBuffer; + } + + /* Perform a read using the buffer strategy */ + buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock, + RBM_NORMAL, bistate->strategy); + + /* Save the selected block as target for future inserts */ + IncrBufferRefCount(buffer); + bistate->current_buf = buffer; + + return buffer; +} + /* * RelationGetBufferForTuple * @@ -80,13 +118,13 @@ RelationPutHeapTuple(Relation relation, * happen if space is freed in that page after heap_update finds there's not * enough there). In that case, the page will be pinned and locked only once. * - * If use_fsm is true (the normal case), we use FSM to help us find free - * space. If use_fsm is false, we always append a new empty page to the - * end of the relation if the tuple won't fit on the current target page. + * We normally use FSM to help us find free space. However, + * if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to + * the end of the relation if the tuple won't fit on the current target page. * This can save some cycles when we know the relation is new and doesn't * contain useful amounts of free space. * - * The use_fsm = false case is also useful for non-WAL-logged additions to a + * HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a * relation, if the caller holds exclusive lock and is careful to invalidate * relation->rd_targblock before the first insertion --- that ensures that * all insertions will occur into newly added pages and not be intermixed @@ -94,6 +132,12 @@ RelationPutHeapTuple(Relation relation, * any committed data of other transactions. (See heap_insert's comments * for additional constraints needed for safe usage of this behavior.) * + * The caller can also provide a BulkInsertState object to optimize many + * insertions into the same relation. This keeps a pin on the current + * insertion target page (to save pin/unpin cycles) and also passes a + * BULKWRITE buffer selection strategy object to the buffer manager. + * Passing NULL for bistate selects the default behavior. + * * We always try to avoid filling existing pages further than the fillfactor. * This is OK since this routine is not consulted when updating a tuple and * keeping it on the same page, which is the scenario fillfactor is meant @@ -104,8 +148,10 @@ RelationPutHeapTuple(Relation relation, */ Buffer RelationGetBufferForTuple(Relation relation, Size len, - Buffer otherBuffer, bool use_fsm) + Buffer otherBuffer, int options, + struct BulkInsertStateData *bistate) { + bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; Size pageFreeSpace, @@ -116,6 +162,9 @@ RelationGetBufferForTuple(Relation relation, Size len, len = MAXALIGN(len); /* be conservative */ + /* Bulk insert is not supported for updates, only inserts. */ + Assert(otherBuffer == InvalidBuffer || !bistate); + /* * If we're gonna fail for oversize tuple, do it right away */ @@ -137,25 +186,27 @@ RelationGetBufferForTuple(Relation relation, Size len, /* * We first try to put the tuple on the same page we last inserted a tuple - * on, as cached in the relcache entry. If that doesn't work, we ask the - * shared Free Space Map to locate a suitable page. Since the FSM's info - * might be out of date, we have to be prepared to loop around and retry - * multiple times. (To insure this isn't an infinite loop, we must update - * the FSM with the correct amount of free space on each page that proves - * not to be suitable.) If the FSM has no record of a page with enough - * free space, we give up and extend the relation. + * on, as cached in the BulkInsertState or relcache entry. If that + * doesn't work, we ask the Free Space Map to locate a suitable page. + * Since the FSM's info might be out of date, we have to be prepared to + * loop around and retry multiple times. (To insure this isn't an infinite + * loop, we must update the FSM with the correct amount of free space on + * each page that proves not to be suitable.) If the FSM has no record of + * a page with enough free space, we give up and extend the relation. * * When use_fsm is false, we either put the tuple onto the existing target * page or extend the relation. */ - if (len + saveFreeSpace <= MaxHeapTupleSize) - targetBlock = relation->rd_targblock; - else + if (len + saveFreeSpace > MaxHeapTupleSize) { - /* can't fit, don't screw up FSM request tracking by trying */ + /* can't fit, don't bother asking FSM */ targetBlock = InvalidBlockNumber; use_fsm = false; } + else if (bistate && bistate->current_buf != InvalidBuffer) + targetBlock = BufferGetBlockNumber(bistate->current_buf); + else + targetBlock = relation->rd_targblock; if (targetBlock == InvalidBlockNumber && use_fsm) { @@ -189,7 +240,7 @@ RelationGetBufferForTuple(Relation relation, Size len, if (otherBuffer == InvalidBuffer) { /* easy case */ - buffer = ReadBuffer(relation, targetBlock); + buffer = ReadBufferBI(relation, targetBlock, bistate); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } else if (otherBlock == targetBlock) @@ -274,7 +325,7 @@ RelationGetBufferForTuple(Relation relation, Size len, * it worth keeping an accurate file length in shared memory someplace, * rather than relying on the kernel to do it for us? */ - buffer = ReadBuffer(relation, P_NEW); + buffer = ReadBufferBI(relation, P_NEW, bistate); /* * We can be certain that locking the otherBuffer first is OK, since it diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index cd7302bd5d7..18c7a72d64f 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -96,7 +96,7 @@ * Portions Copyright (c) 1994-5, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.16 2008/11/06 20:51:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -575,7 +575,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup) } else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL, - state->rs_use_wal, false); + HEAP_INSERT_SKIP_FSM | + (state->rs_use_wal ? + 0 : HEAP_INSERT_SKIP_WAL)); else heaptup = tup; diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index e3014e288ab..f8bb77bd0a9 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.90 2008/11/02 01:45:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.91 2008/11/06 20:51:14 tgl Exp $ * * * INTERFACE ROUTINES @@ -74,8 +74,7 @@ do { \ static void toast_delete_datum(Relation rel, Datum value); -static Datum toast_save_datum(Relation rel, Datum value, - bool use_wal, bool use_fsm); +static Datum toast_save_datum(Relation rel, Datum value, int options); static struct varlena *toast_fetch_datum(struct varlena * attr); static struct varlena *toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length); @@ -400,7 +399,7 @@ toast_delete(Relation rel, HeapTuple oldtup) * Inputs: * newtup: the candidate new tuple to be inserted * oldtup: the old row version for UPDATE, or NULL for INSERT - * use_wal, use_fsm: flags to be passed to heap_insert() for toast rows + * options: options to be passed to heap_insert() for toast rows * Result: * either newtup if no toasting is needed, or a palloc'd modified tuple * that is what should actually get stored @@ -411,7 +410,7 @@ toast_delete(Relation rel, HeapTuple oldtup) */ HeapTuple toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, - bool use_wal, bool use_fsm) + int options) { HeapTuple result_tuple; TupleDesc tupleDesc; @@ -677,8 +676,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, { old_value = toast_values[i]; toast_action[i] = 'p'; - toast_values[i] = toast_save_datum(rel, toast_values[i], - use_wal, use_fsm); + toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; @@ -728,8 +726,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; - toast_values[i] = toast_save_datum(rel, toast_values[i], - use_wal, use_fsm); + toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; @@ -838,8 +835,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; - toast_values[i] = toast_save_datum(rel, toast_values[i], - use_wal, use_fsm); + toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; @@ -1120,8 +1116,7 @@ toast_compress_datum(Datum value) * ---------- */ static Datum -toast_save_datum(Relation rel, Datum value, - bool use_wal, bool use_fsm) +toast_save_datum(Relation rel, Datum value, int options) { Relation toastrel; Relation toastidx; @@ -1218,7 +1213,7 @@ toast_save_datum(Relation rel, Datum value, memcpy(VARDATA(&chunk_data), data_p, chunk_size); toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull); - heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm); + heap_insert(toastrel, toasttup, mycid, options, NULL); /* * Create the index entry. We cheat a little here by not using diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 4d6fd988d77..2c680008a7a 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.300 2008/11/02 01:45:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.301 2008/11/06 20:51:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1653,8 +1653,8 @@ CopyFrom(CopyState cstate) MemoryContext oldcontext = CurrentMemoryContext; ErrorContextCallback errcontext; CommandId mycid = GetCurrentCommandId(true); - bool use_wal = true; /* by default, use WAL logging */ - bool use_fsm = true; /* by default, use FSM for free space */ + int hi_options = 0; /* start with default heap_insert options */ + BulkInsertState bistate; Assert(cstate->rel); @@ -1707,9 +1707,9 @@ CopyFrom(CopyState cstate) if (cstate->rel->rd_createSubid != InvalidSubTransactionId || cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId) { - use_fsm = false; + hi_options |= HEAP_INSERT_SKIP_FSM; if (!XLogArchivingActive()) - use_wal = false; + hi_options |= HEAP_INSERT_SKIP_WAL; } if (pipe) @@ -1886,6 +1886,8 @@ CopyFrom(CopyState cstate) cstate->cur_attname = NULL; cstate->cur_attval = NULL; + bistate = GetBulkInsertState(); + /* Set up callback to identify error line number */ errcontext.callback = copy_in_error_callback; errcontext.arg = (void *) cstate; @@ -2108,7 +2110,7 @@ CopyFrom(CopyState cstate) ExecConstraints(resultRelInfo, slot, estate); /* OK, store the tuple and create index entries for it */ - heap_insert(cstate->rel, tuple, mycid, use_wal, use_fsm); + heap_insert(cstate->rel, tuple, mycid, hi_options, bistate); if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false); @@ -2128,6 +2130,8 @@ CopyFrom(CopyState cstate) /* Done, clean up */ error_context_stack = errcontext.previous; + FreeBulkInsertState(bistate); + MemoryContextSwitchTo(oldcontext); /* Execute AFTER STATEMENT insertion triggers */ @@ -2164,7 +2168,7 @@ CopyFrom(CopyState cstate) * If we skipped writing WAL, then we need to sync the heap (but not * indexes since those use WAL anyway) */ - if (!use_wal) + if (hi_options & HEAP_INSERT_SKIP_WAL) heap_sync(cstate->rel); } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 47840d42ebc..350381ad4b5 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -26,7 +26,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.314 2008/10/31 21:07:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.315 2008/11/06 20:51:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1623,8 +1623,7 @@ ExecInsert(TupleTableSlot *slot, * t_self field. */ newId = heap_insert(resultRelationDesc, tuple, - estate->es_output_cid, - true, true); + estate->es_output_cid, 0, NULL); IncrAppended(); (estate->es_processed)++; @@ -2621,7 +2620,8 @@ typedef struct DestReceiver pub; /* publicly-known function pointers */ EState *estate; /* EState we are working with */ Relation rel; /* Relation to write to */ - bool use_wal; /* do we need to WAL-log our writes? */ + int hi_options; /* heap_insert performance options */ + BulkInsertState bistate; /* bulk insert state */ } DR_intorel; /* @@ -2753,14 +2753,17 @@ OpenIntoRel(QueryDesc *queryDesc) myState = (DR_intorel *) queryDesc->dest; Assert(myState->pub.mydest == DestIntoRel); myState->estate = estate; + myState->rel = intoRelationDesc; /* - * We can skip WAL-logging the insertions, unless PITR is in use. + * We can skip WAL-logging the insertions, unless PITR is in use. We + * can skip the FSM in any case. */ - myState->use_wal = XLogArchivingActive(); - myState->rel = intoRelationDesc; + myState->hi_options = HEAP_INSERT_SKIP_FSM | + (XLogArchivingActive() ? 0 : HEAP_INSERT_SKIP_WAL); + myState->bistate = GetBulkInsertState(); - /* use_wal off requires rd_targblock be initially invalid */ + /* Not using WAL requires rd_targblock be initially invalid */ Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber); } @@ -2775,8 +2778,10 @@ CloseIntoRel(QueryDesc *queryDesc) /* OpenIntoRel might never have gotten called */ if (myState && myState->pub.mydest == DestIntoRel && myState->rel) { + FreeBulkInsertState(myState->bistate); + /* If we skipped using WAL, must heap_sync before commit */ - if (!myState->use_wal) + if (myState->hi_options & HEAP_INSERT_SKIP_WAL) heap_sync(myState->rel); /* close rel, but keep lock until commit */ @@ -2834,8 +2839,8 @@ intorel_receive(TupleTableSlot *slot, DestReceiver *self) heap_insert(myState->rel, tuple, myState->estate->es_output_cid, - myState->use_wal, - false); /* never any point in using FSM */ + myState->hi_options, + myState->bistate); /* We know this is a newly created relation, so there are no indexes */ diff --git a/src/backend/storage/buffer/README b/src/backend/storage/buffer/README index 057f817b7e4..696e5e8c305 100644 --- a/src/backend/storage/buffer/README +++ b/src/backend/storage/buffer/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.14 2008/03/21 13:23:28 momjian Exp $ +$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.15 2008/11/06 20:51:14 tgl Exp $ Notes About Shared Buffer Access Rules ====================================== @@ -235,6 +235,10 @@ buffers were sent to the freelist, which was effectively a buffer ring of 1 buffer, resulting in excessive WAL flushing. Allowing VACUUM to update 256KB between WAL flushes should be more efficient. +Bulk writes work similarly to VACUUM. Currently this applies only to +COPY IN and CREATE TABLE AS SELECT. (Might it be interesting to make +seqscan UPDATE and DELETE use the bulkwrite strategy?) + Background Writer's Processing ------------------------------ diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 4e55db9adc2..5f4c05cef6d 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.64 2008/01/01 19:45:51 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.65 2008/11/06 20:51:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -384,6 +384,9 @@ GetAccessStrategy(BufferAccessStrategyType btype) case BAS_BULKREAD: ring_size = 256 * 1024 / BLCKSZ; break; + case BAS_BULKWRITE: + ring_size = 256 * 1024 / BLCKSZ; + break; case BAS_VACUUM: ring_size = 256 * 1024 / BLCKSZ; break; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index b24edea6e40..1b78b40cc59 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.139 2008/10/08 01:14:44 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.140 2008/11/06 20:51:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,6 +25,12 @@ #include "utils/snapshot.h" +/* "options" flag bits for heap_insert */ +#define HEAP_INSERT_SKIP_WAL 0x0001 +#define HEAP_INSERT_SKIP_FSM 0x0002 + +typedef struct BulkInsertStateData *BulkInsertState; + typedef enum { LockTupleShared, @@ -86,8 +92,11 @@ extern void heap_get_latest_tid(Relation relation, Snapshot snapshot, ItemPointer tid); extern void setLastTid(const ItemPointer tid); +extern BulkInsertState GetBulkInsertState(void); +extern void FreeBulkInsertState(BulkInsertState); + extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, - bool use_wal, bool use_fsm); + int options, BulkInsertState bistate); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, TransactionId *update_xmax, CommandId cid, Snapshot crosscheck, bool wait); diff --git a/src/include/access/hio.h b/src/include/access/hio.h index a089bddbf3f..813347dccbe 100644 --- a/src/include/access/hio.h +++ b/src/include/access/hio.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/hio.h,v 1.36 2008/06/19 00:46:06 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/access/hio.h,v 1.37 2008/11/06 20:51:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,9 +18,26 @@ #include "utils/relcache.h" #include "storage/buf.h" + +/* + * state for bulk inserts --- private to heapam.c and hio.c + * + * If current_buf isn't InvalidBuffer, then we are holding an extra pin + * on that buffer. + * + * "typedef struct BulkInsertStateData *BulkInsertState" is in heapam.h + */ +typedef struct BulkInsertStateData +{ + BufferAccessStrategy strategy; /* our BULKWRITE strategy object */ + Buffer current_buf; /* current insertion target page */ +} BulkInsertStateData; + + extern void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple); extern Buffer RelationGetBufferForTuple(Relation relation, Size len, - Buffer otherBuffer, bool use_fsm); + Buffer otherBuffer, int options, + struct BulkInsertStateData *bistate); #endif /* HIO_H */ diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h index a87aee62dfc..3cf7aad2b87 100644 --- a/src/include/access/tuptoaster.h +++ b/src/include/access/tuptoaster.h @@ -6,7 +6,7 @@ * * Copyright (c) 2000-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/access/tuptoaster.h,v 1.41 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/tuptoaster.h,v 1.42 2008/11/06 20:51:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -93,7 +93,7 @@ */ extern HeapTuple toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, - bool use_wal, bool use_fsm); + int options); /* ---------- * toast_delete - diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 56f584a78ba..f2252c8f460 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.116 2008/10/31 15:05:00 heikki Exp $ + * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.117 2008/11/06 20:51:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,6 +28,7 @@ typedef enum BufferAccessStrategyType BAS_NORMAL, /* Normal random access */ BAS_BULKREAD, /* Large read-only scan (hint bit updates are * ok) */ + BAS_BULKWRITE, /* Large multi-block write (e.g. COPY IN) */ BAS_VACUUM /* VACUUM */ } BufferAccessStrategyType; -- GitLab