From 329fb11262b79a45749a8005e7a31ff178eb6d10 Mon Sep 17 00:00:00 2001 From: "Vadim B. Mikheev" <vadim4o@yahoo.com> Date: Wed, 16 Apr 1997 01:48:29 +0000 Subject: [PATCH] 1. BTREE_VERSION_1: using bti_itup->t_tid as unique identifier for a given index tuple (logical position within A LEVEL). bti_oid & bti_dummy taken off from BTItemData. 2. Fix for multi-column indices (nbtsearch.c): _bt_binsrch() - for searches on internal pages having keysize < number of attrs we point at the last item < the scankey, not at the first item = the scankey; _bt_moveright() - if keysize < number of attrs we compare scankey with _last_ item on current page to decide should we move right or not. --- src/backend/access/nbtree/nbtinsert.c | 63 ++++------- src/backend/access/nbtree/nbtpage.c | 41 +++++++- src/backend/access/nbtree/nbtsearch.c | 146 +++++++++++++++++--------- src/backend/access/nbtree/nbtsort.c | 12 ++- src/backend/access/nbtree/nbtutils.c | 4 +- 5 files changed, 167 insertions(+), 99 deletions(-) diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 06c54a456dc..743583bccab 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.11 1997/03/24 08:48:09 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.12 1997/04/16 01:48:11 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -33,7 +33,7 @@ static OffsetNumber _bt_findsplitloc(Relation rel, Page page, OffsetNumber start static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf); static OffsetNumber _bt_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, BTItem btitem, BTItem afteritem); static bool _bt_goesonpg(Relation rel, Buffer buf, Size keysz, ScanKey scankey, BTItem afteritem); -static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, Oid bti_oid, BTItem newItem); +static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, BTItem oldItem, BTItem newItem); static bool _bt_isequal (TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey); /* @@ -357,7 +357,7 @@ _bt_insertonpg(Relation rel, DOUBLEALIGN (IndexTupleDSize (stack->bts_btitem->bti_itup)) ) { _bt_updateitem(rel, keysz, pbuf, - stack->bts_btitem->bti_oid, lowLeftItem); + stack->bts_btitem, lowLeftItem); _bt_relbuf(rel, buf, BT_WRITE); _bt_relbuf(rel, rbuf, BT_WRITE); } @@ -644,23 +644,14 @@ _bt_findsplitloc(Relation rel, OffsetNumber saferight; ItemId nxtitemid, safeitemid; BTItem safeitem, nxtitem; - IndexTuple safetup, nxttup; Size nbytes; - TupleDesc itupdesc; int natts; - int attno; - Datum attsafe; - Datum attnext; - bool null; - itupdesc = RelationGetTupleDescriptor(rel); natts = rel->rd_rel->relnatts; - saferight = start; safeitemid = PageGetItemId(page, saferight); nbytes = ItemIdGetLength(safeitemid) + sizeof(ItemIdData); safeitem = (BTItem) PageGetItem(page, safeitemid); - safetup = &(safeitem->bti_itup); i = OffsetNumberNext(start); @@ -670,26 +661,17 @@ _bt_findsplitloc(Relation rel, nxtitemid = PageGetItemId(page, i); nbytes += (ItemIdGetLength(nxtitemid) + sizeof(ItemIdData)); nxtitem = (BTItem) PageGetItem(page, nxtitemid); - nxttup = &(nxtitem->bti_itup); - - /* test against last known safe item */ - for (attno = 1; attno <= natts; attno++) { - attsafe = index_getattr(safetup, attno, itupdesc, &null); - attnext = index_getattr(nxttup, attno, itupdesc, &null); - /* - * If the tuple we're looking at isn't equal to the last safe one - * we saw, then it's our new safe tuple. - */ - - if (!_bt_invokestrat(rel, attno, BTEqualStrategyNumber, - attsafe, attnext)) { - safetup = nxttup; - saferight = i; - - /* break is for the attno for loop */ - break; - } + /* + * Test against last known safe item: + * if the tuple we're looking at isn't equal to the last safe + * one we saw, then it's our new safe tuple. + */ + if ( !_bt_itemcmp (rel, natts, + safeitem, nxtitem, BTEqualStrategyNumber) ) + { + safeitem = nxtitem; + saferight = i; } i = OffsetNumberNext(i); } @@ -753,7 +735,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) rbkno = BufferGetBlockNumber(rbuf); lpage = BufferGetPage(lbuf); rpage = BufferGetPage(rbuf); - + /* * step over the high key on the left page while building the * left page pointer. @@ -793,7 +775,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) _bt_wrtbuf(rel, rootbuf); /* update metadata page with new root block number */ - _bt_metaproot(rel, rootbknum); + _bt_metaproot(rel, rootbknum, 0); } /* @@ -820,7 +802,6 @@ _bt_pgaddtup(Relation rel, Page page; BTPageOpaque opaque; BTItem chkitem; - Oid afteroid; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -829,14 +810,13 @@ _bt_pgaddtup(Relation rel, if (afteritem == (BTItem) NULL) { itup_off = _bt_binsrch(rel, buf, keysz, itup_scankey, BT_INSERTION); } else { - afteroid = afteritem->bti_oid; itup_off = first; do { chkitem = (BTItem) PageGetItem(page, PageGetItemId(page, itup_off)); itup_off = OffsetNumberNext(itup_off); - } while (chkitem->bti_oid != afteroid); + } while ( ! BTItemSame (chkitem, afteritem) ); } (void) PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED); @@ -870,7 +850,6 @@ _bt_goesonpg(Relation rel, BTPageOpaque opaque; BTItem chkitem; OffsetNumber offnum, maxoff; - Oid afteroid; bool found; page = BufferGetPage(buf); @@ -908,7 +887,6 @@ _bt_goesonpg(Relation rel, return (false); /* damn, have to work for it. i hate that. */ - afteroid = afteritem->bti_oid; maxoff = PageGetMaxOffsetNumber(page); /* @@ -924,7 +902,8 @@ _bt_goesonpg(Relation rel, offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { chkitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); - if (chkitem->bti_oid == afteroid) { + + if ( BTItemSame (chkitem, afteritem) ) { found = true; break; } @@ -1029,7 +1008,7 @@ static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, - Oid bti_oid, + BTItem oldItem, BTItem newItem) { Page page; @@ -1050,10 +1029,10 @@ _bt_updateitem(Relation rel, do { item = (BTItem) PageGetItem(page, PageGetItemId(page, i)); i = OffsetNumberNext(i); - } while (i <= maxoff && item->bti_oid != bti_oid); + } while (i <= maxoff && ! BTItemSame (item, oldItem)); /* this should never happen (in theory) */ - if (item->bti_oid != bti_oid) { + if ( ! BTItemSame (item, oldItem) ) { elog(FATAL, "_bt_getstackbuf was lying!!"); } diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 280f67def1a..440a118fbd8 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.6 1996/11/05 10:35:30 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.7 1997/04/16 01:48:15 vadim Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -38,12 +38,20 @@ #define BTREE_METAPAGE 0 #define BTREE_MAGIC 0x053162 + +#ifdef BTREE_VERSION_1 +#define BTREE_VERSION 1 +#else #define BTREE_VERSION 0 +#endif typedef struct BTMetaPageData { uint32 btm_magic; uint32 btm_version; BlockNumber btm_root; +#ifdef BTREE_VERSION_1 + int32 btm_level; +#endif } BTMetaPageData; #define BTPageGetMeta(p) \ @@ -95,6 +103,9 @@ _bt_metapinit(Relation rel) metad.btm_magic = BTREE_MAGIC; metad.btm_version = BTREE_VERSION; metad.btm_root = P_NONE; +#ifdef BTREE_VERSION_1 + metad.btm_level = 0; +#endif memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad)); op = (BTPageOpaque) PageGetSpecialPointer(pg); @@ -179,6 +190,17 @@ _bt_getroot(Relation rel, int access) metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); Assert(metaopaque->btpo_flags & BTP_META); metad = BTPageGetMeta(metapg); + + if (metad->btm_magic != BTREE_MAGIC) { + elog(WARN, "Index %s is not a btree", + RelationGetRelationName(rel)); + } + + if (metad->btm_version != BTREE_VERSION) { + elog(WARN, "Version mismatch on %s: version %d file, version %d code", + RelationGetRelationName(rel), + metad->btm_version, BTREE_VERSION); + } /* if no root page initialized yet, do it */ if (metad->btm_root == P_NONE) { @@ -209,6 +231,9 @@ _bt_getroot(Relation rel, int access) rootblkno = BufferGetBlockNumber(rootbuf); rootpg = BufferGetPage(rootbuf); metad->btm_root = rootblkno; +#ifdef BTREE_VERSION_1 + metad->btm_level = 1; +#endif _bt_pageinit(rootpg, BufferGetPageSize(rootbuf)); rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); @@ -387,7 +412,7 @@ _bt_pageinit(Page page, Size size) * a reference to or lock on the metapage. */ void -_bt_metaproot(Relation rel, BlockNumber rootbknum) +_bt_metaproot(Relation rel, BlockNumber rootbknum, int level) { Buffer metabuf; Page metap; @@ -400,6 +425,12 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum) Assert(metaopaque->btpo_flags & BTP_META); metad = BTPageGetMeta(metap); metad->btm_root = rootbknum; +#ifdef BTREE_VERSION_1 + if ( level == 0 ) /* called from _do_insert */ + metad->btm_level += 1; + else + metad->btm_level = level; /* called from btsort */ +#endif _bt_wrtbuf(rel, metabuf); } @@ -434,7 +465,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) item = (BTItem) PageGetItem(page, itemid); /* if the item is where we left it, we're done */ - if (item->bti_oid == stack->bts_btitem->bti_oid) + if ( BTItemSame (item, stack->bts_btitem) ) return (buf); /* if the item has just moved right on this page, we're done */ @@ -445,7 +476,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) item = (BTItem) PageGetItem(page, itemid); /* if the item is where we left it, we're done */ - if (item->bti_oid == stack->bts_btitem->bti_oid) + if ( BTItemSame (item, stack->bts_btitem) ) return (buf); } } @@ -471,7 +502,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) offnum = OffsetNumberNext(offnum)) { itemid = PageGetItemId(page, offnum); item = (BTItem) PageGetItem(page, itemid); - if (item->bti_oid == stack->bts_btitem->bti_oid) + if ( BTItemSame (item, stack->bts_btitem) ) return (buf); } } diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 99fb38f18ce..b72ccedf96e 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.16 1997/03/24 08:48:12 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.17 1997/04/16 01:48:17 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -160,7 +160,8 @@ _bt_moveright(Relation rel, ItemId hikey; ItemId itemid; BlockNumber rblkno; - + int natts = rel->rd_rel->relnatts; + page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -178,22 +179,43 @@ _bt_moveright(Relation rel, */ if (_bt_skeycmp(rel, keysz, scankey, page, hikey, - BTGreaterEqualStrategyNumber)) { - + BTGreaterEqualStrategyNumber)) + { /* move right as long as we need to */ - do { + do + { + OffsetNumber offmax; /* * If this page consists of all duplicate keys (hikey and first * key on the page have the same value), then we don't need to * step right. + * + * NOTE for multi-column indices: we may do scan using + * keys not for all attrs. But we handle duplicates + * using all attrs in _bt_insert/_bt_spool code. + * And so we've to compare scankey with _last_ item + * on this page to do not lose "good" tuples if number + * of attrs > keysize. Example: (2,0) - last items on + * this page, (2,1) - first item on next page (hikey), + * our scankey is x = 2. Scankey >= (2,1) because of + * we compare first attrs only, but we shouldn't to move + * right of here. - vadim 04/15/97 */ - if (PageGetMaxOffsetNumber(page) > P_HIKEY) { + if ( (offmax = PageGetMaxOffsetNumber(page)) > P_HIKEY) + { itemid = PageGetItemId(page, P_FIRSTKEY); if (_bt_skeycmp(rel, keysz, scankey, page, itemid, BTEqualStrategyNumber)) { /* break is for the "move right" while loop */ break; } + else if ( natts > keysz ) + { + itemid = PageGetItemId(page, offmax); + if (_bt_skeycmp(rel, keysz, scankey, page, itemid, + BTLessEqualStrategyNumber)) + break; + } } /* step right one page */ @@ -346,6 +368,7 @@ _bt_binsrch(Relation rel, Page page; BTPageOpaque opaque; OffsetNumber low, mid, high; + int natts = rel->rd_rel->relnatts; int result; page = BufferGetPage(buf); @@ -379,55 +402,84 @@ _bt_binsrch(Relation rel, else if (result < 0) high = mid - 1; else - return (_bt_firsteq(rel, itupdesc, page, keysz, scankey, mid)); + { + mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid); + /* + * NOTE for multi-column indices: we may do scan using + * keys not for all attrs. But we handle duplicates using + * all attrs in _bt_insert/_bt_spool code. And so while + * searching on internal pages having number of attrs > keysize + * we want to point at the last item < the scankey, not at the + * first item = the scankey (!!!), and let _bt_moveright + * decide later whether to move right or not (see comments and + * example there). Note also that INSERTions are not affected + * by this code (natts == keysz). - vadim 04/15/97 + */ + if ( natts == keysz || opaque->btpo_flags & BTP_LEAF ) + return (mid); + low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + if ( mid == low ) + return (mid); + return (OffsetNumberPrev(mid)); + } } - /* - * We terminated because the endpoints got too close together. There - * are two cases to take care of. - * - * For non-insertion searches on internal pages, we want to point at - * the last key <, or first key =, the scankey on the page. This - * guarantees that we'll descend the tree correctly. - * - * For all other cases, we want to point at the first key >= - * the scankey on the page. This guarantees that scans and - * insertions will happen correctly. + /* + * We terminated because the endpoints got too close together. There + * are two cases to take care of. + * + * For non-insertion searches on internal pages, we want to point at + * the last key <, or first key =, the scankey on the page. This + * guarantees that we'll descend the tree correctly. + * (NOTE comments above for multi-column indices). + * + * For all other cases, we want to point at the first key >= + * the scankey on the page. This guarantees that scans and + * insertions will happen correctly. + */ + + if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT) + { /* + * We want the last key <, or first key ==, the scan key. */ + result = _bt_compare(rel, itupdesc, page, keysz, scankey, high); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT) { - + if (result == 0) + { + mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high); /* - * We want the last key <, or first key ==, the scan key. + * If natts > keysz we want last item < the scan key. + * See comments above for multi-column indices. */ - + if ( natts == keysz ) + return (mid); + low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + if ( mid == low ) + return (mid); + return (OffsetNumberPrev(mid)); + } + else if (result > 0) + return (high); + else + return (low); + } + else /* we want the first key >= the scan key */ + { + result = _bt_compare(rel, itupdesc, page, keysz, scankey, low); + if (result <= 0) + return (low); + else + { + if (low == high) + return (OffsetNumberNext(low)); + result = _bt_compare(rel, itupdesc, page, keysz, scankey, high); - - if (result == 0) { - return (_bt_firsteq(rel, itupdesc, page, keysz, scankey, high)); - } else if (result > 0) { + if (result <= 0) return (high); - } else { - return (low); - } - } else { - - /* we want the first key >= the scan key */ - result = _bt_compare(rel, itupdesc, page, keysz, scankey, low); - if (result <= 0) { - return (low); - } else { - if (low == high) - return (OffsetNumberNext(low)); - - result = _bt_compare(rel, itupdesc, page, keysz, scankey, high); - if (result <= 0) - return (high); - else - return (OffsetNumberNext(high)); - } + else + return (OffsetNumberNext(high)); } + } } static OffsetNumber @@ -1107,7 +1159,7 @@ _bt_twostep(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) while (offnum <= maxoff) { itemid = PageGetItemId(page, offnum); btitem = (BTItem) PageGetItem(page, itemid); - if (btitem->bti_oid == svitem->bti_oid) { + if ( BTItemSame (btitem, svitem) ) { pfree(svitem); ItemPointerSet(current, blkno, offnum); return (false); diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 7ec926f9e24..6e382982a9b 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -5,7 +5,7 @@ * * * IDENTIFICATION - * $Id: nbtsort.c,v 1.13 1997/03/24 08:48:15 vadim Exp $ + * $Id: nbtsort.c,v 1.14 1997/04/16 01:48:27 vadim Exp $ * * NOTES * @@ -1021,9 +1021,13 @@ _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags) } #endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ #endif - if (last_bti == (BTItem) NULL) { + if (last_bti == (BTItem) NULL) + { first_off = P_FIRSTKEY; - } else if (!_bt_itemcmp(index, _bt_nattr, bti, last_bti, BTEqualStrategyNumber)) { + } + else if ( !_bt_itemcmp(index, _bt_nattr, + bti, last_bti, BTEqualStrategyNumber) ) + { first_off = off; } last_off = off; @@ -1061,7 +1065,7 @@ _bt_uppershutdown(Relation index, BTPageState *state) if (s->btps_doupper) { if (s->btps_next == (BTPageState *) NULL) { opaque->btpo_flags |= BTP_ROOT; - _bt_metaproot(index, blkno); + _bt_metaproot(index, blkno, s->btps_level + 1); } else { bti = _bt_minitem(s->btps_page, blkno, 0); (void) _bt_buildadd(index, s->btps_next, bti, 0); diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index fa2ff890fe9..f74a476bab8 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.9 1997/03/24 08:48:16 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.10 1997/04/16 01:48:29 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -297,7 +297,9 @@ _bt_formitem(IndexTuple itup) btitem = (BTItem) palloc(nbytes_btitem); memmove((char *) &(btitem->bti_itup), (char *) itup, tuplen); +#ifndef BTREE_VERSION_1 btitem->bti_oid = newoid(); +#endif return (btitem); } -- GitLab