diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 2ecce3086392673b23eca030cc0a19a0018d1147..0624bd06a87acc4014302827b2f2b2f70284299f 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.12 1997/01/10 09:46:33 vadim Exp $
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.13 1997/02/12 05:04:17 scrappy Exp $
  *
  * NOTES
  *    This file contains only the public interface routines.
@@ -33,8 +33,8 @@
 # include <string.h>
 #endif
 
-bool	BuildingBtree = false;
-bool	FastBuild = false; /* turn this on to make bulk builds work*/
+bool	BuildingBtree = false;	/* see comment in btbuild() */
+bool	FastBuild = true;	/* use sort/build instead of insertion build */
 
 /*
  *  btbuild() -- build a new btree index.
@@ -67,21 +67,34 @@ btbuild(Relation heap,
     int i;
     BTItem btitem;
 #ifndef OMIT_PARTIAL_INDEX
-    ExprContext *econtext;
-    TupleTable tupleTable;
-    TupleTableSlot *slot;
+    ExprContext *econtext = (ExprContext *) NULL;
+    TupleTable tupleTable = (TupleTable) NULL;
+    TupleTableSlot *slot = (TupleTableSlot *) NULL;
 #endif
     Oid hrelid, irelid;
     Node *pred, *oldPred;
-    void *spool;
+    void *spool = (void *) NULL;
     bool isunique;
-    
+    bool usefast;
+
+#if 0
+    ResetBufferUsage();
+#endif
+
     /* note that this is a new btree */
     BuildingBtree = true;
     
     pred = predInfo->pred;
     oldPred = predInfo->oldPred;
 
+    /*
+     * bootstrap processing does something strange, so don't use
+     * sort/build for initial catalog indices.  at some point i need
+     * to look harder at this.  (there is some kind of incremental
+     * processing going on there.) -- pma 08/29/95
+     */
+    usefast = (FastBuild && IsNormalProcessingMode());
+
     /* see if index is unique */
     isunique = IndexIsUniqueNoCache(RelationGetRelationId(index));
 
@@ -110,13 +123,16 @@ btbuild(Relation heap,
 	slot = ExecAllocTableSlot(tupleTable);
 	econtext = makeNode(ExprContext);
 	FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer);
+
+	/*
+	 * we never want to use sort/build if we are extending an
+	 * existing partial index -- it works by inserting the
+	 * newly-qualifying tuples into the existing index.
+	 * (sort/build would overwrite the existing index with one
+	 * consisting of the newly-qualifying tuples.)
+	 */
+	usefast = false;
     }
-	else
-	{
-		econtext = NULL;
-		tupleTable = NULL;
-		slot = NULL;
-	}
 #endif /* OMIT_PARTIAL_INDEX */
     
     /* start a heap scan */
@@ -126,12 +142,10 @@ btbuild(Relation heap,
     /* build the index */
     nhtups = nitups = 0;
     
-    if (FastBuild) {
+    if (usefast) {
 	spool = _bt_spoolinit(index, 7);
 	res = (InsertIndexResult) NULL;
     }
-	else
-		spool = NULL;
 
     for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
 	
@@ -219,7 +233,7 @@ btbuild(Relation heap,
 	 * into a spool page for subsequent processing.  otherwise, we
 	 * insert into the btree.
 	 */
-	if (FastBuild) {
+	if (usefast) {
 	    _bt_spool(index, btitem, spool);
 	} else {
 	    res = _bt_doinsert(index, btitem, isunique, heap);
@@ -248,12 +262,24 @@ btbuild(Relation heap,
      * merging the runs, (2) inserting the sorted tuples into btree
      * pages and (3) building the upper levels.
      */
-    if (FastBuild) {
-	_bt_spool(index, (BTItem) NULL, spool);	/* flush spool */
+    if (usefast) {
+	_bt_spool(index, (BTItem) NULL, spool);	/* flush the spool */
 	_bt_leafbuild(index, spool);
 	_bt_spooldestroy(spool);
     }
 
+#if 0
+    {
+	extern int ReadBufferCount, BufferHitCount, BufferFlushCount;
+	extern long NDirectFileRead, NDirectFileWrite;
+
+	printf("buffer(%d): r=%d w=%d\n", heap->rd_rel->relblocksz,
+	       ReadBufferCount - BufferHitCount, BufferFlushCount);
+	printf("direct(%d): r=%d w=%d\n", LocalBlockSize,
+	       NDirectFileRead, NDirectFileWrite);
+    }
+#endif
+
     /*
      *  Since we just counted the tuples in the heap, we update its
      *  stats in pg_class to guarantee that the planner takes advantage
@@ -312,7 +338,10 @@ btinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation
 
     pfree(btitem);
     pfree(itup);
-    
+
+    /* adjust any active scans that will be affected by this insertion */
+    _bt_adjscans(rel, &(res->pointerData), BT_INSERT);
+
     return (res);
 }
 
@@ -533,7 +562,7 @@ void
 btdelete(Relation rel, ItemPointer tid)
 {
     /* adjust any active scans that will be affected by this deletion */
-    _bt_adjscans(rel, tid);
+    _bt_adjscans(rel, tid, BT_DELETE);
     
     /* delete the data from the page */
     _bt_pagedel(rel, tid);
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 6bf86d6f6e092ca92625843dfddebdf604bdcc18..00bf3bb85273c2dbaa1aa70286f152ec1079cde4 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -5,7 +5,7 @@
  *
  *
  * IDENTIFICATION
- *    $Id: nbtsort.c,v 1.8 1996/11/05 10:35:35 scrappy Exp $
+ *    $Id: nbtsort.c,v 1.9 1997/02/12 05:04:20 scrappy Exp $
  *
  * NOTES
  *
@@ -20,15 +20,15 @@
  *     - when each input run has been exhausted, switch to another output
  *       tape and start processing another run.
  * - when we have fewer runs than tapes, we know we are ready to start
- *   merging into the btree leaf pages. 
- * - every time we complete a level of the btree, we can construct the
- *   next level up.  when we have only one page on a level, it can be
- *   attached to the btree metapage and we are done.
+ *   merging into the btree leaf pages.  (i.e., we do not have to wait
+ *   until we have exactly one tape.)
+ * - as we extract tuples from the final runs, we build the pages for
+ *   each level.  when we have only one page on a level, it must be the
+ *   root -- it can be attached to the btree metapage and we are done.
  *
  * conventions:
  * - external interface routines take in and return "void *" for their
- *   opaque handles.  this is for modularity reasons (i prefer not to
- *   export these structures without good reason).
+ *   opaque handles.  this is for modularity reasons.
  *
  * this code is moderately slow (~10% slower) compared to the regular
  * btree (insertion) build code on sorted or well-clustered data.  on
@@ -63,12 +63,23 @@
 # include <string.h>
 #endif
 
-#ifdef FASTBUILD
+/*
+ * turn on debugging output.
+ *
+ * XXX this code just does a numeric printf of the index key, so it's
+ * only really useful for integer keys.
+ */
+/*#define FASTBUILD_DEBUG*/
+#define FASTBUILD_SPOOL
+#define FASTBUILD_MERGE
 
 #define	MAXTAPES	(7)
-#define	TAPEBLCKSZ	(BLCKSZ << 2)
+#define	TAPEBLCKSZ	(MAXBLCKSZ << 2)
 #define	TAPETEMP	"pg_btsortXXXXXX"
 
+extern int NDirectFileRead;
+extern int NDirectFileWrite;
+extern char *mktemp(char *template);
 
 /*-------------------------------------------------------------------------
  * sorting comparison routine - returns {-1,0,1} depending on whether
@@ -88,6 +99,11 @@
  * what the heck.
  * *-------------------------------------------------------------------------
  */
+typedef struct {
+    Datum	btsk_datum;
+    BTItem	btsk_item;
+} BTSortKey;
+
 static Relation _bt_sortrel;
 
 static void
@@ -97,28 +113,41 @@ _bt_isortcmpinit(Relation index)
 }
 
 static int
-_bt_isortcmp(const void *bti1p,const void *bti2p)
+_bt_isortcmp(BTSortKey *k1, BTSortKey *k2)
 {
-    BTItem bti1 = *(BTItem *)bti1p;
-    BTItem bti2 = *(BTItem *)bti2p;
-
-    if (bti1 == (BTItem) NULL) {
-	if (bti2 == (BTItem) NULL) {
+    if (k1->btsk_item == (BTItem) NULL) {
+	if (k2->btsk_item == (BTItem) NULL) {
 	    return(0);	/* 1 = 2 */
 	}
 	return(1);	/* 1 > 2 */
-    } else if (bti2 == (BTItem) NULL) {
+    } else if (k2->btsk_item == (BTItem) NULL) {
 	return(-1);	/* 1 < 2 */
-    } else if (_bt_itemcmp(_bt_sortrel, 1, bti1, bti2,
-			   BTGreaterStrategyNumber)) {
+    } else if (_bt_invokestrat(_bt_sortrel, 1, BTGreaterStrategyNumber,
+			       k1->btsk_datum, k2->btsk_datum)) {
 	return(1);	/* 1 > 2 */
-    } else if (_bt_itemcmp(_bt_sortrel, 1, bti2, bti1,
-			   BTGreaterStrategyNumber)) {
+    } else if (_bt_invokestrat(_bt_sortrel, 1, BTGreaterStrategyNumber,
+			       k2->btsk_datum, k1->btsk_datum)) {
 	return(-1);	/* 1 < 2 */
     }
     return(0);		/* 1 = 2 */
 }
 
+static void
+_bt_setsortkey(Relation index, BTItem bti, BTSortKey *sk)
+{
+    sk->btsk_item = (BTItem) NULL;
+    sk->btsk_datum = (Datum) NULL;
+    if (bti != (BTItem) NULL) {
+	bool isnull;
+	Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull);
+	
+	if (!isnull) {
+	    sk->btsk_item = bti;
+	    sk->btsk_datum = d;
+	}
+    }
+}
+
 /*-------------------------------------------------------------------------
  * priority queue methods
  *
@@ -133,7 +162,7 @@ _bt_isortcmp(const void *bti1p,const void *bti2p)
 
 typedef struct {
     int		btpqe_tape;	/* tape identifier */
-    BTItem	btpqe_item;	/* pointer to BTItem in tape buffer */
+    BTSortKey	btpqe_item;	/* pointer to BTItem in tape buffer */
 } BTPriQueueElem;
 
 #define	MAXELEM	MAXTAPES
@@ -336,7 +365,8 @@ static void
 _bt_tapewrite(BTTapeBlock *tape, int eor)
 {
     tape->bttb_eor = eor;
-    FileWrite(tape->bttb_fd, (char*)tape, TAPEBLCKSZ);
+    FileWrite(tape->bttb_fd, (char *) tape, TAPEBLCKSZ);
+    NDirectFileWrite += TAPEBLCKSZ;
     _bt_tapereset(tape);
 }
 
@@ -356,7 +386,7 @@ _bt_taperead(BTTapeBlock *tape)
     int nread;
 
     if (tape->bttb_eor) {
-	return(0);		/* we are at End-Of-Run */
+	return(0);		/* we are already at End-Of-Run */
     }
 
     /*
@@ -364,7 +394,7 @@ _bt_taperead(BTTapeBlock *tape)
      * VFD (the one in the block we're reading is bogus).
      */
     fd = tape->bttb_fd;
-    nread = FileRead(fd, (char*) tape, TAPEBLCKSZ);
+    nread = FileRead(fd, (char *) tape, TAPEBLCKSZ);
     tape->bttb_fd = fd;
 
     if (nread != TAPEBLCKSZ) {
@@ -372,6 +402,7 @@ _bt_taperead(BTTapeBlock *tape)
 	return(0);
     }
     Assert(tape->bttb_magic == BTTAPEMAGIC);
+    NDirectFileRead += TAPEBLCKSZ;
     return(1);
 }
 
@@ -445,8 +476,6 @@ typedef struct {
 void *
 _bt_spoolinit(Relation index, int ntapes)
 {
-    char *mktemp();
-
     BTSpool *btspool = (BTSpool *) palloc(sizeof(BTSpool));
     int i;
     char *fname = (char *) palloc(sizeof(TAPETEMP) + 1);
@@ -567,6 +596,7 @@ _bt_spool(Relation index, BTItem btitem, void *spool)
     BTSpool *btspool = (BTSpool *) spool;
     BTTapeBlock *itape;
     Size itemsz;
+    int i;
 
     itape = btspool->bts_itape[btspool->bts_tape];
     itemsz = BTITEMSZ(btitem);
@@ -579,7 +609,7 @@ _bt_spool(Relation index, BTItem btitem, void *spool)
      * buffer.
      */
     if (btitem == (BTItem) NULL || SPCLEFT(itape) < itemsz) {
-	BTItem *parray;
+	BTSortKey *parray = (BTSortKey *) NULL;
 	BTTapeBlock *otape;
 	BTItem bti;
 	char *pos;
@@ -590,45 +620,49 @@ _bt_spool(Relation index, BTItem btitem, void *spool)
 	 * build an array of pointers to the BTItemDatas on the input
 	 * block.
 	 */
-	parray = (BTItem *) palloc(itape->bttb_ntup * sizeof(BTItem));
-	if (parray == (BTItem *) NULL) {
-	    elog(WARN, "_bt_spool: out of memory");
-	}
-	pos = itape->bttb_data;
-	for (i = 0; i < itape->bttb_ntup; ++i) {
-	    parray[i] = _bt_tapenext(itape, &pos);
+	if (itape->bttb_ntup > 0) {
+	    parray =
+		(BTSortKey *) palloc(itape->bttb_ntup * sizeof(BTSortKey));
+	    if (parray == (BTSortKey *) NULL) {
+		elog(WARN, "_bt_spool: out of memory");
+	    }
+	    pos = itape->bttb_data;
+	    for (i = 0; i < itape->bttb_ntup; ++i) {
+		_bt_setsortkey(index, _bt_tapenext(itape, &pos), &(parray[i]));
+	    }
+	    
+	    /*
+	     * qsort the pointer array.
+	     */
+	    _bt_isortcmpinit(index);
+	    qsort((void *) parray, itape->bttb_ntup, sizeof(BTSortKey),
+		  _bt_isortcmp);
 	}
 
-	/*
-	 * qsort the pointer array.
-	 */
-	_bt_isortcmpinit(index);
-	qsort((void *) parray, itape->bttb_ntup, sizeof(BTItem), _bt_isortcmp);
-
 	/*
 	 * write the spooled run into the output tape.  we copy the
 	 * BTItemDatas in the order dictated by the sorted array of
 	 * BTItems, not the original order.
 	 *
 	 * (since everything was DOUBLEALIGN'd and is all on a single
-	 * page, everything had *better* still fit on one page..)
+	 * tape block, everything had *better* still fit on one tape
+	 * block..)
 	 */
 	otape = btspool->bts_otape[btspool->bts_tape];
 	for (i = 0; i < itape->bttb_ntup; ++i) {
-	    bti = parray[i];
+	    bti = parray[i].btsk_item;
 	    btisz = BTITEMSZ(bti);
 	    btisz = DOUBLEALIGN(btisz);
 	    _bt_tapeadd(otape, bti, btisz);
-#ifdef FASTBUILD_DEBUG
+#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_SPOOL)
 	    {
 		bool isnull;
-		Datum d = index_getattr(&(bti->bti_itup), 1,
-					RelationGetTupleDescriptor(index),
-					&isnull);
+		Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att,
+					&isnull);	
 		printf("_bt_spool: inserted <%x> into output tape %d\n",
 		       d, btspool->bts_tape);
 	    }
-#endif /* FASTBUILD_DEBUG */
+#endif /* FASTBUILD_DEBUG && FASTBUILD_SPOOL */
 	}
 
 	/*
@@ -653,7 +687,9 @@ _bt_spool(Relation index, BTItem btitem, void *spool)
 	/*
 	 * destroy the pointer array.
 	 */
-	pfree((void *) parray);
+	if (parray != (BTSortKey *) NULL) {
+	    pfree((void *) parray);
+	}
     }
 
     /* insert this item into the current buffer */
@@ -671,6 +707,9 @@ _bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags)
     BTPageOpaque opaque;
 
     *buf = _bt_getbuf(index, P_NEW, BT_WRITE);
+#if 0
+    printf("\tblk=%d\n", BufferGetBlockNumber(*buf));
+#endif
     *page = BufferGetPage(*buf);
     _bt_pageinit(*page, BufferGetPageSize(*buf));
     opaque = (BTPageOpaque) PageGetSpecialPointer(*page);
@@ -680,8 +719,9 @@ _bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags)
 
 /*
  * slide an array of ItemIds back one slot (from P_FIRSTKEY to
- * P_HIKEY).  we need to do this when we discover that we have built
- * an ItemId array in what has turned out to be a P_RIGHTMOST page.
+ * P_HIKEY, overwriting P_HIKEY).  we need to do this when we discover
+ * that we have built an ItemId array in what has turned out to be a
+ * P_RIGHTMOST page.
  */
 static void
 _bt_slideleft(Relation index, Buffer buf, Page page)
@@ -691,24 +731,71 @@ _bt_slideleft(Relation index, Buffer buf, Page page)
     ItemId previi;
     ItemId thisii;
 
-    maxoff = PageGetMaxOffsetNumber(page);
-    previi = PageGetItemId(page, P_HIKEY);
-    for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off)) {
-	thisii = PageGetItemId(page, off);
-	*previi = *thisii;
-	previi = thisii;
+    if (!PageIsEmpty(page)) {
+	maxoff = PageGetMaxOffsetNumber(page);
+	previi = PageGetItemId(page, P_HIKEY);
+	for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off)) {
+	    thisii = PageGetItemId(page, off);
+	    *previi = *thisii;
+	    previi = thisii;
+	}
+	((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
     }
-    ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
 }
 
-typedef struct {
+typedef struct BTPageState {
     Buffer		btps_buf;
     Page		btps_page;
     BTItem		btps_lastbti;
     OffsetNumber	btps_lastoff;
     OffsetNumber	btps_firstoff;
+    int			btps_level;
+    bool		btps_doupper;
+    struct BTPageState	*btps_next;
 } BTPageState;
 
+/*
+ * allocate and initialize a new BTPageState.  the returned structure
+ * is suitable for immediate use by _bt_buildadd.
+ */
+void *
+_bt_pagestate(Relation index, int flags, int level, bool doupper)
+{
+    BTPageState *state = (BTPageState *) palloc(sizeof(BTPageState));
+
+    (void) memset((char *) state, 0, sizeof(BTPageState));
+    _bt_blnewpage(index, &(state->btps_buf), &(state->btps_page), flags);
+    state->btps_firstoff = InvalidOffsetNumber;
+    state->btps_lastoff = P_HIKEY;
+    state->btps_lastbti = (BTItem) NULL;
+    state->btps_next = (BTPageState *) NULL;
+    state->btps_level = level;
+    state->btps_doupper = doupper;
+
+    return((void *) state);
+}
+
+/*
+ * return a copy of the minimum (P_HIKEY or P_FIRSTKEY) item on
+ * 'opage'.  the copy is modified to point to 'opage' (as opposed to
+ * the page to which the item used to point, e.g., a heap page if
+ * 'opage' is a leaf page).
+ */
+BTItem
+_bt_minitem(Page opage, BlockNumber oblkno, int atend)
+{
+    OffsetNumber off;
+    BTItem obti;
+    BTItem nbti;
+
+    off = atend ? P_HIKEY : P_FIRSTKEY;
+    obti = (BTItem) PageGetItem(opage, PageGetItemId(opage, off));
+    nbti = _bt_formitem(&(obti->bti_itup));
+    ItemPointerSet(&(nbti->bti_itup.t_tid), oblkno, P_HIKEY);
+
+    return(nbti);
+}
+
 /*
  * add an item to a disk page from a merge tape block.
  *
@@ -748,11 +835,13 @@ typedef struct {
  *
  * if all keys are unique, 'first' will always be the same as 'last'.
  */
-static void
-_bt_buildadd(Relation index, BTPageState *state, BTItem bti, int flags)
+BTItem
+_bt_buildadd(Relation index, void *pstate, BTItem bti, int flags)
 {
+    BTPageState *state = (BTPageState *) pstate;
     Buffer nbuf;
     Page npage;
+    char *pos;
     BTItem last_bti;
     OffsetNumber first_off;
     OffsetNumber last_off;
@@ -804,19 +893,26 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti, int flags)
 	    ii = PageGetItemId(opage, o);
 	    (void) PageAddItem(npage, PageGetItem(opage, ii),
 			       ii->lp_len, n, LP_USED);
-#ifdef FASTBUILD_DEBUG
+#if 0
+#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
 	    {
 		bool isnull;
 		BTItem tmpbti =
 		    (BTItem) PageGetItem(npage, PageGetItemId(npage, n));
 		Datum d = index_getattr(&(tmpbti->bti_itup), 1,
-					RelationGetTupleDescriptor(index),
-					&isnull);
-		printf("_bt_buildadd: moved <%x> to offset %d\n",
-		       d, n);
+					index->rd_att, &isnull);
+		printf("_bt_buildadd: moved <%x> to offset %d at level %d\n",
+		       d, n, state->btps_level);
 	    }
-#endif /* FASTBUILD_DEBUG */
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
+#endif
 	}
+	/*
+	 * this loop is backward because PageIndexTupleDelete shuffles
+	 * the tuples to fill holes in the page -- by starting at the
+	 * end and working back, we won't create holes (and thereby
+	 * avoid shuffling).
+	 */
 	for (o = last_off; o > first_off; o = OffsetNumberPrev(o)) {
 	    PageIndexTupleDelete(opage, o);
 	}
@@ -842,6 +938,23 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti, int flags)
 	    nopaque->btpo_next = P_NONE;
 	}
 
+	/*
+	 * copy the old buffer's minimum key to its parent.  if we
+	 * don't have a parent, we have to create one; this adds a new
+	 * btree level.
+	 */
+	if (state->btps_doupper) {
+	    BTItem nbti;
+
+	    if (state->btps_next == (BTPageState *) NULL) {
+		state->btps_next =
+		    _bt_pagestate(index, 0, state->btps_level + 1, true);
+	    }
+	    nbti = _bt_minitem(opage, BufferGetBlockNumber(obuf), 0);
+	    (void) _bt_buildadd(index, state->btps_next, nbti, 0);
+	    pfree((void *) nbti);
+	}
+
 	/*
 	 * write out the old stuff.  we never want to see it again, so
 	 * we can give up our lock (if we had one; BuildingBtree is
@@ -856,16 +969,16 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti, int flags)
      */
     off = OffsetNumberNext(last_off);
     (void) PageAddItem(npage, (Item) bti, btisz, off, LP_USED);
-#ifdef FASTBUILD_DEBUG
+#if 0
+#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
     {
 	bool isnull;
-	Datum d = index_getattr(&(bti->bti_itup), 1, 
-				RelationGetTupleDescriptor(index),
-				&isnull);
-	printf("_bt_buildadd: inserted <%x> at offset %d\n",
-	       d, off);
+	Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull);
+	printf("_bt_buildadd: inserted <%x> at offset %d at level %d\n",
+	       d, off, state->btps_level);
     }
-#endif /* FASTBUILD_DEBUG */
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
+#endif
     if (last_bti == (BTItem) NULL) {
 	first_off = P_FIRSTKEY;
     } else if (!_bt_itemcmp(index, 1, bti, last_bti, BTEqualStrategyNumber)) {
@@ -879,6 +992,48 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti, int flags)
     state->btps_lastbti = last_bti;
     state->btps_lastoff = last_off;
     state->btps_firstoff = first_off;
+
+    return(last_bti);
+}
+
+void
+_bt_uppershutdown(Relation index, BTPageState *state)
+{
+    BTPageState *s;
+    BlockNumber blkno;
+    BTPageOpaque opaque;
+    BTItem bti;
+
+    for (s = state; s != (BTPageState *) NULL; s = s->btps_next) {
+	blkno = BufferGetBlockNumber(s->btps_buf);
+	opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page);
+
+	/*
+	 * if this is the root, attach it to the metapage.  otherwise,
+	 * stick the minimum key of the last page on this level (which
+	 * has not been split, or else it wouldn't be the last page)
+	 * into its parent.  this may cause the last page of upper
+	 * levels to split, but that's not a problem -- we haven't
+	 * gotten to them yet.
+	 */
+	if (s->btps_doupper) {
+	    if (s->btps_next == (BTPageState *) NULL) {
+		opaque->btpo_flags |= BTP_ROOT;
+		_bt_metaproot(index, blkno);
+	    } else {
+		bti = _bt_minitem(s->btps_page, blkno, 0);
+		(void) _bt_buildadd(index, s->btps_next, bti, 0);
+		pfree((void *) bti);
+	    }
+	}
+
+	/*
+	 * this is the rightmost page, so the ItemId array needs to be
+	 * slid back one slot.
+	 */
+	_bt_slideleft(index, s->btps_buf, s->btps_page);
+	_bt_wrtbuf(index, s->btps_buf);
+    }
 }
 
 /*
@@ -888,11 +1043,10 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti, int flags)
  *
  * XXX three nested loops?  gross.  cut me up into smaller routines.
  */
-static BlockNumber
+static void
 _bt_merge(Relation index, BTSpool *btspool)
 {
-    BTPageState state;
-    BlockNumber firstblk;
+    BTPageState *state;
     BTPriQueue q;
     BTPriQueueElem e;
     BTItem bti;
@@ -902,29 +1056,31 @@ _bt_merge(Relation index, BTSpool *btspool)
     int tapedone[MAXTAPES];
     int t;
     int goodtapes;
+    int npass;
     int nruns;
     Size btisz;
     bool doleaf = false;
+    BTPageState *s;
+    BTPageOpaque *opaque;
 
     /*
      * initialize state needed for the merge into the btree leaf pages.
      */
-    (void) memset((char *) &state, 0, sizeof(BTPageState));
-    _bt_blnewpage(index, &(state.btps_buf), &(state.btps_page), BTP_LEAF);
-    state.btps_lastoff = P_HIKEY;
-    state.btps_lastbti = (BTItem) NULL;
-    firstblk = BufferGetBlockNumber(state.btps_buf);
+    state = (BTPageState *) _bt_pagestate(index, BTP_LEAF, 0, true);
 
+    npass = 0;
     do {							/* pass */
 	/*
 	 * each pass starts by flushing the previous outputs and
-	 * swapping inputs and outputs.  this process also clears the
-	 * new output tapes and rewinds the new input tapes.
+	 * swapping inputs and outputs.  flushing sets End-of-Run for
+	 * any dirty output tapes.  swapping clears the new output
+	 * tapes and rewinds the new input tapes.
 	 */
 	btspool->bts_tape = btspool->bts_ntapes - 1;
 	_bt_spoolflush(btspool);
 	_bt_spoolswap(btspool);
 
+	++npass;
 	nruns = 0;
 
 	for (;;) {						/* run */
@@ -949,22 +1105,27 @@ _bt_merge(Relation index, BTSpool *btspool)
 	    for (t = 0; t < btspool->bts_ntapes; ++t) {
 		itape = btspool->bts_itape[t];
 		tapepos[t] = itape->bttb_data;
+		tapedone[t] = 0;
 		_bt_tapereset(itape);
-		if (_bt_taperead(itape) == 0) {
-		    tapedone[t] = 1;
-		} else {
+		do {
+		    if (_bt_taperead(itape) == 0) {
+			tapedone[t] = 1;
+		    }
+		} while (!tapedone[t] && EMPTYTAPE(itape));
+		if (!tapedone[t]) {
 		    ++goodtapes;
-		    tapedone[t] = 0;
 		    e.btpqe_tape = t;
-		    e.btpqe_item = _bt_tapenext(itape, &tapepos[t]);
-		    if (e.btpqe_item != (BTItem) NULL) {
+		    _bt_setsortkey(index, _bt_tapenext(itape, &tapepos[t]),
+				   &(e.btpqe_item));
+		    if (e.btpqe_item.btsk_item != (BTItem) NULL) {
 			_bt_pqadd(&q, &e);
 		    }
 		}
 	    }
 	    /*
 	     * if we don't have any tapes with any input (i.e., they
-	     * are all at EOF), we must be done with this pass.
+	     * are all at EOF), there is no work to do in this run --
+	     * we must be done with this pass.
 	     */
 	    if (goodtapes == 0) {
 		break;	/* for */
@@ -972,8 +1133,8 @@ _bt_merge(Relation index, BTSpool *btspool)
 	    ++nruns;
 	
 	    /*
-	     * output the smallest element from the queue until there are no
-	     * more.
+	     * output the smallest element from the queue until there
+	     * are no more.
 	     */
 	    while (_bt_pqnext(&q, &e) >= 0) {			/* item */
 		/*
@@ -982,63 +1143,59 @@ _bt_merge(Relation index, BTSpool *btspool)
 		 * if it hits either End-Of-Run or EOF.
 		 */
 		t = e.btpqe_tape;
-		bti = e.btpqe_item;
+		bti = e.btpqe_item.btsk_item;
 		if (bti != (BTItem) NULL) {
 		    btisz = BTITEMSZ(bti);
 		    btisz = DOUBLEALIGN(btisz);
 		    if (doleaf) {
-			_bt_buildadd(index, &state, bti, BTP_LEAF);
-#ifdef FASTBUILD_DEBUG
+			(void) _bt_buildadd(index, state, bti, BTP_LEAF);
+#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
 			{
 			    bool isnull;
 			    Datum d = index_getattr(&(bti->bti_itup), 1,
-				    RelationGetTupleDescriptor(index),
-						    &isnull);
-			    printf("_bt_merge: inserted <%x> into block %d\n",
-				   d, BufferGetBlockNumber(state.btps_buf));
+						    index->rd_att, &isnull);
+			    printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into block %d\n",
+				   npass, nruns, d, t,
+				   BufferGetBlockNumber(state->btps_buf));
 			}
-#endif /* FASTBUILD_DEBUG */
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
 		    } else {
 			if (SPCLEFT(otape) < btisz) {
 			    /*
 			     * if it's full, write it out and add the
-			     * item to the next block.  (since we know
-			     * there will be at least one more block,
-			     * we know we do *not* want to set
-			     * End-Of-Run here!)
+			     * item to the next block.  (since we will
+			     * be adding another tuple immediately
+			     * after this, we can be sure that there
+			     * will be at least one more block in this
+			     * run and so we know we do *not* want to
+			     * set End-Of-Run here.)
 			     */
 			    _bt_tapewrite(otape, 0);
 			}
 			_bt_tapeadd(otape, bti, btisz);
-#ifdef FASTBUILD_DEBUG
+#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
 			{
 			    bool isnull;
 			    Datum d = index_getattr(&(bti->bti_itup), 1,
-				  RelationGetTupleDescriptor(index), &isnull);
-			    printf("_bt_merge: inserted <%x> into tape %d\n",
-				   d, btspool->bts_tape);
+						    index->rd_att, &isnull);
+			    printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into output tape %d\n",
+				   npass, nruns, d, t,
+				   btspool->bts_tape);
 			}
-#endif /* FASTBUILD_DEBUG */
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
 		    }
 		}
-#ifdef FASTBUILD_DEBUG
-		{
-		    bool isnull;
-		    Datum d = index_getattr(&(bti->bti_itup), 1,
-					   RelationGetTupleDescriptor(index),
-					    &isnull);
-		    printf("_bt_merge: got <%x> from tape %d\n", d, t);
-		}
-#endif /* FASTBUILD_DEBUG */
-
 		itape = btspool->bts_itape[t];
 		if (!tapedone[t]) {
 		    BTItem newbti = _bt_tapenext(itape, &tapepos[t]);
 
 		    if (newbti == (BTItem) NULL) {
-			if (_bt_taperead(itape) == 0) {
-			    tapedone[t] = 1;
-			} else {
+			do {
+			    if (_bt_taperead(itape) == 0) {
+				tapedone[t] = 1;
+			    }
+			} while (!tapedone[t] && EMPTYTAPE(itape));
+			if (!tapedone[t]) {
 			    tapepos[t] = itape->bttb_data;
 			    newbti = _bt_tapenext(itape, &tapepos[t]);
 			}
@@ -1047,11 +1204,17 @@ _bt_merge(Relation index, BTSpool *btspool)
 			BTPriQueueElem nexte;
 			
 			nexte.btpqe_tape = t;
-			nexte.btpqe_item = newbti;
+			_bt_setsortkey(index, newbti, &(nexte.btpqe_item));
 			_bt_pqadd(&q, &nexte);
 		    }
 		}
 	    } 							/* item */
+
+	    /*
+	     * that's it for this run.  flush the output tape, marking
+	     * End-of-Run.
+	     */
+	    _bt_tapewrite(otape, 1);
 	}							/* run */
 	
 	/*
@@ -1068,60 +1231,50 @@ _bt_merge(Relation index, BTSpool *btspool)
 	}
     } while (nruns > 0);					/* pass */
 
-    /*
-     * this is the rightmost page, so the ItemId array needs to be
-     * slid back one slot.
-     */
-    _bt_slideleft(index, state.btps_buf, state.btps_page);
-    _bt_wrtbuf(index, state.btps_buf);
-
-    return(firstblk);
+    _bt_uppershutdown(index, state);
 }
 
 
 /*
- * given the block number 'blk' of the first page of a set of linked
- * siblings (i.e., the start of an entire level of the btree),
- * construct the corresponding next level of the btree.  we do this by
- * placing minimum keys from each page into this page.  the format of
- * the internal pages is otherwise the same as for leaf pages.
+ * given the (appropriately side-linked) leaf pages of a btree,
+ * construct the corresponding upper levels.  we do this by inserting
+ * minimum keys from each page into parent pages as needed.  the
+ * format of the internal pages is otherwise the same as for leaf
+ * pages.
+ *
+ * this routine is not called during conventional bulk-loading (in
+ * which case we can just build the upper levels as we create the
+ * sorted bottom level).  it is only used for index recycling.
  */
 void
-_bt_upperbuild(Relation index, BlockNumber blk, int level)
+_bt_upperbuild(Relation index)
 {
     Buffer rbuf;
+    BlockNumber blk;
     Page rpage;
     BTPageOpaque ropaque;
-    BTPageState state;
-    BlockNumber firstblk;
-    BTItem bti;
+    BTPageState *state;
     BTItem nbti;
-    OffsetNumber off;
-
-    rbuf = _bt_getbuf(index, blk, BT_WRITE);
-    rpage = BufferGetPage(rbuf);
-    ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
 
     /*
-     * if we only have one page on a level, we can just make it the
-     * root.
+     * find the first leaf block.  while we're at it, clear the
+     * BTP_ROOT flag that we set while building it (so we could find
+     * it later).
      */
-    if (P_RIGHTMOST(ropaque)) {
-	ropaque->btpo_flags |= BTP_ROOT;
-	_bt_wrtbuf(index, rbuf);
-	_bt_metaproot(index, blk);
-	return;
-    }
-    _bt_relbuf(index, rbuf, BT_WRITE);
+    rbuf = _bt_getroot(index, BT_WRITE);
+    blk = BufferGetBlockNumber(rbuf);
+    rpage = BufferGetPage(rbuf);
+    ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
+    ropaque->btpo_flags &= ~BTP_ROOT;
+    _bt_wrtbuf(index, rbuf);
 	
-    (void) memset((char *) &state, 0, sizeof(BTPageState));
-    _bt_blnewpage(index, &(state.btps_buf), &(state.btps_page), 0);
-    state.btps_lastoff = P_HIKEY;
-    state.btps_lastbti = (BTItem) NULL;
-    firstblk = BufferGetBlockNumber(state.btps_buf);
+    state = (BTPageState *) _bt_pagestate(index, 0, 0, true);
     
     /* for each page... */
     do {
+#if 0
+	printf("\t\tblk=%d\n", blk);
+#endif
 	rbuf = _bt_getbuf(index, blk, BT_READ);
 	rpage = BufferGetPage(rbuf);
 	ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
@@ -1133,35 +1286,24 @@ _bt_upperbuild(Relation index, BlockNumber blk, int level)
 	     * of the lower page and insert it into a page at this
 	     * level.
 	     */
-	    off = P_RIGHTMOST(ropaque) ? P_HIKEY : P_FIRSTKEY;
-	    bti = (BTItem) PageGetItem(rpage, PageGetItemId(rpage, off));
-	    nbti = _bt_formitem(&(bti->bti_itup));
-	    ItemPointerSet(&(nbti->bti_itup.t_tid), blk, P_HIKEY);
-#ifdef FASTBUILD_DEBUG
+	    nbti = _bt_minitem(rpage, blk, P_RIGHTMOST(ropaque));
+#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE)
 	    {
 		bool isnull;
-		Datum d = index_getattr(&(nbti->bti_itup), 1, 
-					RelationGetTupleDescriptor(index),
+		Datum d = index_getattr(&(nbti->bti_itup), 1, index->rd_att,
 					&isnull);
 		printf("_bt_upperbuild: inserting <%x> at %d\n",
-		       d, level);
+		       d, state->btps_level);
 	    }
-#endif /* FASTBUILD_DEBUG */
-	    _bt_buildadd(index, &state, nbti, 0);
+#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
+	    (void) _bt_buildadd(index, state, nbti, 0);
 	    pfree((void *) nbti);
 	}
 	blk = ropaque->btpo_next;
 	_bt_relbuf(index, rbuf, BT_READ);
     } while (blk != P_NONE);
 	
-    /*
-     * this is the rightmost page, so the ItemId array needs to be
-     * slid back one slot.
-     */
-    _bt_slideleft(index, state.btps_buf, state.btps_page);
-    _bt_wrtbuf(index, state.btps_buf);
-    
-    _bt_upperbuild(index, firstblk, level + 1);
+    _bt_uppershutdown(index, state);
 }
 
 /*
@@ -1171,26 +1313,5 @@ _bt_upperbuild(Relation index, BlockNumber blk, int level)
 void
 _bt_leafbuild(Relation index, void *spool)
 {
-    BTSpool *btspool = (BTSpool *) spool;
-    BlockNumber firstblk;
-
-    /*
-     * merge the runs into btree leaf pages.
-     */
-    firstblk = _bt_merge(index, btspool);
-
-    /*
-     * build the upper levels of the btree.
-     */
-    _bt_upperbuild(index, firstblk, 0);
+    _bt_merge(index, (BTSpool *) spool);
 }
-
-#else /* !FASTBUILD */
-
-void *_bt_spoolinit(Relation index, int ntapes) { return((void *) NULL); }
-void _bt_spooldestroy(void *spool) { }
-void _bt_spool(Relation index, BTItem btitem, void *spool) { }
-void _bt_upperbuild(Relation index, BlockNumber blk, int level) { }
-void _bt_leafbuild(Relation index, void *spool) { }
-
-#endif /* !FASTBUILD */
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index affe0d584450da43ff3a0e501186201bfb472e1c..582f9933c09b88005f2a47f3c78bbf7441b298a1 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: nbtree.h,v 1.5 1997/01/10 09:36:33 vadim Exp $
+ * $Id: nbtree.h,v 1.6 1997/02/12 05:04:28 scrappy Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -131,6 +131,13 @@ typedef BTStackData	*BTStack;
 #define BT_INSERTION	0
 #define BT_DESCENT	1
 
+/*
+ *  We must classify index modification types for the benefit of
+ *  _bt_adjscans.
+ */
+#define BT_INSERT	0
+#define	BT_DELETE	1
+
 /*
  *  In general, the btree code tries to localize its knowledge about
  *  page layout to a couple of routines.  However, we need a special
@@ -220,11 +227,7 @@ extern void btdelete(Relation rel, ItemPointer tid);
  */
 extern void _bt_regscan(IndexScanDesc scan);
 extern void _bt_dropscan(IndexScanDesc scan);
-extern void _bt_adjscans(Relation rel, ItemPointer tid);
-extern void _bt_scandel(IndexScanDesc scan, BlockNumber blkno,
-			OffsetNumber offno);
-extern bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno,
-			    OffsetNumber offno);
+extern void _bt_adjscans(Relation rel, ItemPointer tid, int op);
 
 /*
  * prototypes for functions in nbtsearch.c
@@ -267,7 +270,7 @@ extern BTItem _bt_formitem(IndexTuple itup);
 extern void *_bt_spoolinit(Relation index, int ntapes);
 extern void _bt_spooldestroy(void *spool);
 extern void _bt_spool(Relation index, BTItem btitem, void *spool);
-extern void _bt_upperbuild(Relation index, BlockNumber blk, int level);
+extern void _bt_upperbuild(Relation index);
 extern void _bt_leafbuild(Relation index, void *spool);
 
 #endif	/* NBTREE_H */
diff --git a/src/port/BSD44_derived.h b/src/port/BSD44_derived.h
new file mode 100644
index 0000000000000000000000000000000000000000..919b38cffe053b38592789a65222fd5981379704
--- /dev/null
+++ b/src/port/BSD44_derived.h
@@ -0,0 +1,7 @@
+#  define USE_POSIX_TIME
+#  define NEED_I386_TAS_ASM
+#  define HAS_TEST_AND_SET
+#  if defined(__mips__)
+/* #    undef HAS_TEST_AND_SET */
+#  endif
+   typedef unsigned char slock_t;