diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 8b349cfaf87d2655b78e3f32f98a5c2f634a065d..72d2029b32ec5e03a4bd65f18b4cac794075a07a 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -37,6 +37,7 @@ static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks); static void _hash_splitbucket(Relation rel, Buffer metabuf, + Buffer nbuf, Bucket obucket, Bucket nbucket, BlockNumber start_oblkno, BlockNumber start_nblkno, @@ -176,7 +177,9 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno) * EOF but before updating the metapage to reflect the added page.) * * It is caller's responsibility to ensure that only one process can - * extend the index at a time. + * extend the index at a time. In practice, this function is called + * only while holding write lock on the metapage, because adding a page + * is always associated with an update of metapage data. */ Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum) @@ -503,6 +506,7 @@ _hash_expandtable(Relation rel, Buffer metabuf) uint32 spare_ndx; BlockNumber start_oblkno; BlockNumber start_nblkno; + Buffer buf_nblkno; uint32 maxbucket; uint32 highmask; uint32 lowmask; @@ -615,6 +619,13 @@ _hash_expandtable(Relation rel, Buffer metabuf) } } + /* + * Physically allocate the new bucket's primary page. We want to do this + * before changing the metapage's mapping info, in case we can't get the + * disk space. + */ + buf_nblkno = _hash_getnewbuf(rel, start_nblkno, MAIN_FORKNUM); + /* * Okay to proceed with split. Update the metapage bucket mapping info. * @@ -668,7 +679,8 @@ _hash_expandtable(Relation rel, Buffer metabuf) _hash_droplock(rel, 0, HASH_EXCLUSIVE); /* Relocate records to the new bucket */ - _hash_splitbucket(rel, metabuf, old_bucket, new_bucket, + _hash_splitbucket(rel, metabuf, buf_nblkno, + old_bucket, new_bucket, start_oblkno, start_nblkno, maxbucket, highmask, lowmask); @@ -751,10 +763,16 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) * The caller must hold a pin, but no lock, on the metapage buffer. * The buffer is returned in the same state. (The metapage is only * touched if it becomes necessary to add or remove overflow pages.) + * + * In addition, the caller must have created the new bucket's base page, + * which is passed in buffer nbuf, pinned and write-locked. The lock + * and pin are released here. (The API is set up this way because we must + * do _hash_getnewbuf() before releasing the metapage write lock.) */ static void _hash_splitbucket(Relation rel, Buffer metabuf, + Buffer nbuf, Bucket obucket, Bucket nbucket, BlockNumber start_oblkno, @@ -766,7 +784,6 @@ _hash_splitbucket(Relation rel, BlockNumber oblkno; BlockNumber nblkno; Buffer obuf; - Buffer nbuf; Page opage; Page npage; HashPageOpaque oopaque; @@ -783,7 +800,7 @@ _hash_splitbucket(Relation rel, oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); nblkno = start_nblkno; - nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM); + Assert(nblkno == BufferGetBlockNumber(nbuf)); npage = BufferGetPage(nbuf); /* initialize the new bucket's primary page */ @@ -832,6 +849,11 @@ _hash_splitbucket(Relation rel, * insert the tuple into the new bucket. if it doesn't fit on * the current page in the new bucket, we must allocate a new * overflow page and place the tuple on that page instead. + * + * XXX we have a problem here if we fail to get space for a + * new overflow page: we'll error out leaving the bucket split + * only partially complete, meaning the index is corrupt, + * since searches may fail to find entries they should find. */ itemsz = IndexTupleDSize(*itup); itemsz = MAXALIGN(itemsz);