diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 46c6c9666e5e846daac7074baa0b0501c8ae3e22..9a779454a0cd8e2b78a8b24271bd77c054f7ea1e 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -37,6 +37,7 @@ static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks); static void _hash_splitbucket(Relation rel, Buffer metabuf, + Buffer nbuf, Bucket obucket, Bucket nbucket, BlockNumber start_oblkno, BlockNumber start_nblkno, @@ -176,7 +177,9 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno) * EOF but before updating the metapage to reflect the added page.) * * It is caller's responsibility to ensure that only one process can - * extend the index at a time. + * extend the index at a time. In practice, this function is called + * only while holding write lock on the metapage, because adding a page + * is always associated with an update of metapage data. */ Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum) @@ -503,6 +506,7 @@ _hash_expandtable(Relation rel, Buffer metabuf) uint32 spare_ndx; BlockNumber start_oblkno; BlockNumber start_nblkno; + Buffer buf_nblkno; uint32 maxbucket; uint32 highmask; uint32 lowmask; @@ -603,6 +607,13 @@ _hash_expandtable(Relation rel, Buffer metabuf) } } + /* + * Physically allocate the new bucket's primary page. We want to do this + * before changing the metapage's mapping info, in case we can't get the + * disk space. + */ + buf_nblkno = _hash_getnewbuf(rel, start_nblkno, MAIN_FORKNUM); + /* * Okay to proceed with split. Update the metapage bucket mapping info. * @@ -653,7 +664,8 @@ _hash_expandtable(Relation rel, Buffer metabuf) _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK); /* Relocate records to the new bucket */ - _hash_splitbucket(rel, metabuf, old_bucket, new_bucket, + _hash_splitbucket(rel, metabuf, buf_nblkno, + old_bucket, new_bucket, start_oblkno, start_nblkno, maxbucket, highmask, lowmask); @@ -733,10 +745,16 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) * The caller must hold a pin, but no lock, on the metapage buffer. * The buffer is returned in the same state. (The metapage is only * touched if it becomes necessary to add or remove overflow pages.) + * + * In addition, the caller must have created the new bucket's base page, + * which is passed in buffer nbuf, pinned and write-locked. The lock + * and pin are released here. (The API is set up this way because we must + * do _hash_getnewbuf() before releasing the metapage write lock.) */ static void _hash_splitbucket(Relation rel, Buffer metabuf, + Buffer nbuf, Bucket obucket, Bucket nbucket, BlockNumber start_oblkno, @@ -748,7 +766,6 @@ _hash_splitbucket(Relation rel, BlockNumber oblkno; BlockNumber nblkno; Buffer obuf; - Buffer nbuf; Page opage; Page npage; HashPageOpaque oopaque; @@ -765,7 +782,7 @@ _hash_splitbucket(Relation rel, oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); nblkno = start_nblkno; - nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM); + Assert(nblkno == BufferGetBlockNumber(nbuf)); npage = BufferGetPage(nbuf); /* initialize the new bucket's primary page */ @@ -814,6 +831,11 @@ _hash_splitbucket(Relation rel, * insert the tuple into the new bucket. if it doesn't fit on * the current page in the new bucket, we must allocate a new * overflow page and place the tuple on that page instead. + * + * XXX we have a problem here if we fail to get space for a + * new overflow page: we'll error out leaving the bucket split + * only partially complete, meaning the index is corrupt, + * since searches may fail to find entries they should find. */ itemsz = IndexTupleDSize(*itup); itemsz = MAXALIGN(itemsz);