From bf3dbb5881e9b886ee9fe84bca2153c698eea885 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 27 Mar 2005 23:53:05 +0000
Subject: [PATCH] First steps towards index scans with heap access decoupled
 from index access: define new index access method functions 'amgetmulti' that
 can fetch multiple TIDs per call.  (The functions exist but are totally
 untested as yet.)  Since I was modifying pg_am anyway, remove the
 no-longer-needed 'rel' parameter from amcostestimate functions, and also
 remove the vestigial amowner column that was creating useless work for
 Alvaro's shared-object-dependencies project. Initdb forced due to changes in
 pg_am.

---
 doc/src/sgml/catalogs.sgml            |  28 ++---
 doc/src/sgml/indexam.sgml             |  85 +++++++++----
 src/backend/access/gist/gistget.c     |  29 ++++-
 src/backend/access/hash/hash.c        |  71 ++++++++++-
 src/backend/access/heap/heapam.c      |  29 ++++-
 src/backend/access/index/genam.c      |   3 +-
 src/backend/access/index/indexam.c    | 166 ++++++++++++++++++++------
 src/backend/access/nbtree/nbtree.c    |  75 +++++++++++-
 src/backend/access/rtree/rtget.c      |  46 ++++++-
 src/backend/optimizer/path/costsize.c |   5 +-
 src/backend/utils/adt/selfuncs.c      |  78 ++++++------
 src/include/access/genam.h            |   8 +-
 src/include/access/gist.h             |   3 +-
 src/include/access/hash.h             |   5 +-
 src/include/access/nbtree.h           |   5 +-
 src/include/access/relscan.h          |  10 +-
 src/include/access/rtree.h            |   7 +-
 src/include/catalog/catversion.h      |   4 +-
 src/include/catalog/pg_am.h           |  38 +++---
 src/include/catalog/pg_proc.h         |  18 ++-
 20 files changed, 537 insertions(+), 176 deletions(-)

diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 7cfca6f1182..2dc8b300956 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1,6 +1,6 @@
 <!--
  Documentation of the system catalogs, directed toward PostgreSQL developers
- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.96 2005/02/13 03:04:15 tgl Exp $
+ $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.97 2005/03/27 23:52:51 tgl Exp $
  -->
 
 <chapter id="catalogs">
@@ -316,13 +316,6 @@
       <entry>Name of the access method</entry>
      </row>
 
-     <row>
-      <entry><structfield>amowner</structfield></entry>
-      <entry><type>int4</type></entry>
-      <entry><literal><link linkend="catalog-pg-shadow"><structname>pg_shadow</structname></link>.usesysid</literal></entry>
-      <entry>User ID of the owner (currently not used)</entry>
-     </row>
-
      <row>
       <entry><structfield>amstrategies</structfield></entry>
       <entry><type>int2</type></entry>
@@ -374,24 +367,31 @@
      </row>
 
      <row>
-      <entry><structfield>amgettuple</structfield></entry>
+      <entry><structfield>aminsert</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Next valid tuple</quote> function</entry>
+      <entry><quote>Insert this tuple</quote> function</entry>
      </row>
 
      <row>
-      <entry><structfield>aminsert</structfield></entry>
+      <entry><structfield>ambeginscan</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Insert this tuple</quote> function</entry>
+      <entry><quote>Start new scan</quote> function</entry>
      </row>
 
      <row>
-      <entry><structfield>ambeginscan</structfield></entry>
+      <entry><structfield>amgettuple</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Start new scan</quote> function</entry>
+      <entry><quote>Next valid tuple</quote> function</entry>
+     </row>
+
+     <row>
+      <entry><structfield>amgetmulti</structfield></entry>
+      <entry><type>regproc</type></entry>
+      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
+      <entry><quote>Fetch multiple tuples</quote> function</entry>
      </row>
 
      <row>
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index d6b83060485..ef1e37a8260 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.2 2005/03/21 01:23:55 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.3 2005/03/27 23:52:51 tgl Exp $
 -->
 
 <chapter id="indexam">
@@ -252,6 +252,28 @@ amgettuple (IndexScanDesc scan,
 
   <para>
 <programlisting>
+boolean
+amgetmulti (IndexScanDesc scan,
+            ItemPointer tids,
+            int32 max_tids,
+            int32 *returned_tids);
+</programlisting>
+   Fetch multiple tuples in the given scan.  Returns TRUE if the scan should
+   continue, FALSE if no matching tuples remain.  <literal>tids</> points to
+   a caller-supplied array of <literal>max_tids</>
+   <structname>ItemPointerData</> records, which the call fills with TIDs of
+   matching tuples.  <literal>*returned_tids</> is set to the number of TIDs
+   actually returned.  This can be less than <literal>max_tids</>, or even
+   zero, even when the return value is TRUE.  (This provision allows the
+   access method to choose the most efficient stopping points in its scan,
+   for example index page boundaries.)  <function>amgetmulti</> and
+   <function>amgettuple</> cannot be used in the same index scan; there
+   are other restrictions too when using <function>amgetmulti</>, as explained
+   in <xref linkend="index-scanning">.
+  </para>
+
+  <para>
+<programlisting>
 void
 amrescan (IndexScanDesc scan,
           ScanKey key);
@@ -297,7 +319,6 @@ amrestrpos (IndexScanDesc scan);
 <programlisting>
 void
 amcostestimate (Query *root,
-                RelOptInfo *rel,
                 IndexOptInfo *index,
                 List *indexQuals,
                 Cost *indexStartupCost,
@@ -407,6 +428,25 @@ amcostestimate (Query *root,
    true, insertions or deletions from other backends must be handled as well.)
   </para>
 
+  <para>
+   Instead of using <function>amgettuple</>, an index scan can be done with 
+   <function>amgetmulti</> to fetch multiple tuples per call.  This can be
+   noticeably more efficient than <function>amgettuple</> because it allows
+   avoiding lock/unlock cycles within the access method.  In principle
+   <function>amgetmulti</> should have the same effects as repeated
+   <function>amgettuple</> calls, but we impose several restrictions to
+   simplify matters.  In the first place, <function>amgetmulti</> does not
+   take a <literal>direction</> argument, and therefore it does not support
+   backwards scan nor intrascan reversal of direction.  The access method
+   need not support marking or restoring scan positions during an
+   <function>amgetmulti</> scan, either.  (These restrictions cost little
+   since it would be difficult to use these features in an
+   <function>amgetmulti</> scan anyway: adjusting the caller's buffered
+   list of TIDs would be complex.)  Finally, <function>amgetmulti</> does
+   not guarantee any locking of the returned tuples, with implications
+   spelled out in <xref linkend="index-locking">.
+  </para>
+
  </sect1>
 
  <sect1 id="index-locking">
@@ -515,10 +555,15 @@ amcostestimate (Query *root,
    and only visit the heap tuples sometime later, requires much less index
    locking overhead and may allow a more efficient heap access pattern.
    Per the above analysis, we must use the synchronous approach for
-   non-MVCC-compliant snapshots, but an asynchronous scan would be safe
-   for a query using an MVCC snapshot.  This possibility is not exploited
-   as of <productname>PostgreSQL</productname> 8.0, but it is likely to be
-   investigated soon.
+   non-MVCC-compliant snapshots, but an asynchronous scan is workable
+   for a query using an MVCC snapshot.
+  </para>
+
+  <para>
+   In an <function>amgetmulti</> index scan, the access method need not
+   guarantee to keep an index pin on any of the returned tuples.  (It would be
+   impractical to pin more than the last one anyway.)  Therefore
+   it is only safe to use such scans with MVCC-compliant snapshots.
   </para>
 
  </sect1>
@@ -611,7 +656,6 @@ amcostestimate (Query *root,
 <programlisting>
 void
 amcostestimate (Query *root,
-                RelOptInfo *rel,
                 IndexOptInfo *index,
                 List *indexQuals,
                 Cost *indexStartupCost,
@@ -632,20 +676,11 @@ amcostestimate (Query *root,
      </listitem>
     </varlistentry>
 
-    <varlistentry>
-     <term>rel</term>
-     <listitem>
-      <para>
-       The relation the index is on.
-      </para>
-     </listitem>
-    </varlistentry>
-
     <varlistentry>
      <term>index</term>
      <listitem>
       <para>
-       The index itself.
+       The index being considered.
       </para>
      </listitem>
     </varlistentry>
@@ -714,19 +749,19 @@ amcostestimate (Query *root,
 
   <para>
    The index access costs should be computed in the units used by
-   <filename>src/backend/optimizer/path/costsize.c</filename>: a sequential disk block fetch
-   has cost 1.0, a nonsequential fetch has cost random_page_cost, and
-   the cost of processing one index row should usually be taken as
-   cpu_index_tuple_cost (which is a user-adjustable optimizer parameter).
-   In addition, an appropriate multiple of cpu_operator_cost should be charged
+   <filename>src/backend/optimizer/path/costsize.c</filename>: a sequential
+   disk block fetch has cost 1.0, a nonsequential fetch has cost
+   <varname>random_page_cost</>, and the cost of processing one index row
+   should usually be taken as <varname>cpu_index_tuple_cost</>.  In addition,
+   an appropriate multiple of <varname>cpu_operator_cost</> should be charged
    for any comparison operators invoked during index processing (especially
    evaluation of the indexQuals themselves).
   </para>
 
   <para>
    The access costs should include all disk and CPU costs associated with
-   scanning the index itself, but NOT the costs of retrieving or processing
-   the parent-table rows that are identified by the index.
+   scanning the index itself, but <emphasis>not</> the costs of retrieving or
+   processing the parent-table rows that are identified by the index.
   </para>
 
   <para>
@@ -764,7 +799,7 @@ amcostestimate (Query *root,
 
 <programlisting>
 *indexSelectivity = clauselist_selectivity(root, indexQuals,
-                                           rel-&gt;relid, JOIN_INNER);
+                                           index-&gt;rel-&gt;relid, JOIN_INNER);
 </programlisting>
     </para>
    </step>
diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c
index 03856874f33..8f7a6c7ed4f 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.44 2005/02/05 19:38:58 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.45 2005/03/27 23:52:55 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -47,6 +47,33 @@ gistgettuple(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(res);
 }
 
+Datum
+gistgetmulti(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
+	ItemPointer	tids = (ItemPointer) PG_GETARG_POINTER(1);
+	int32		max_tids = PG_GETARG_INT32(2);
+	int32	   *returned_tids = (int32 *) PG_GETARG_POINTER(3);
+	bool		res = true;
+	int32		ntids = 0;
+
+	/* XXX generic implementation: loop around guts of gistgettuple */
+	while (ntids < max_tids)
+	{
+		if (ItemPointerIsValid(&(s->currentItemData)))
+			res = gistnext(s, ForwardScanDirection);
+		else
+			res = gistfirst(s, ForwardScanDirection);
+		if (!res)
+			break;
+		tids[ntids] = s->xs_ctup.t_self;
+		ntids++;
+	}
+
+	*returned_tids = ntids;
+	PG_RETURN_BOOL(res);
+}
+
 static bool
 gistfirst(IndexScanDesc s, ScanDirection dir)
 {
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 1fd901e96a3..7b15937766e 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.77 2005/03/21 01:23:57 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.78 2005/03/27 23:52:57 tgl Exp $
  *
  * NOTES
  *	  This file contains only the public interface routines.
@@ -264,6 +264,75 @@ hashgettuple(PG_FUNCTION_ARGS)
 }
 
 
+/*
+ *	hashgetmulti() -- get multiple tuples at once
+ *
+ * This is a somewhat generic implementation: it avoids lock reacquisition
+ * overhead, but there's no smarts about picking especially good stopping
+ * points such as index page boundaries.
+ */
+Datum
+hashgetmulti(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+	ItemPointer	tids = (ItemPointer) PG_GETARG_POINTER(1);
+	int32		max_tids = PG_GETARG_INT32(2);
+	int32	   *returned_tids = (int32 *) PG_GETARG_POINTER(3);
+	HashScanOpaque so = (HashScanOpaque) scan->opaque;
+	Relation	rel = scan->indexRelation;
+	bool		res = true;
+	int32		ntids = 0;
+
+	/*
+	 * We hold pin but not lock on current buffer while outside the hash
+	 * AM. Reacquire the read lock here.
+	 */
+	if (BufferIsValid(so->hashso_curbuf))
+		_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ);
+
+	while (ntids < max_tids)
+	{
+		/*
+		 * Start scan, or advance to next tuple.
+		 */
+		if (ItemPointerIsValid(&(scan->currentItemData)))
+			res = _hash_next(scan, ForwardScanDirection);
+		else
+			res = _hash_first(scan, ForwardScanDirection);
+		/*
+		 * Skip killed tuples if asked to.
+		 */
+		if (scan->ignore_killed_tuples)
+		{
+			while (res)
+			{
+				Page		page;
+				OffsetNumber offnum;
+
+				offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
+				page = BufferGetPage(so->hashso_curbuf);
+				if (!ItemIdDeleted(PageGetItemId(page, offnum)))
+					break;
+				res = _hash_next(scan, ForwardScanDirection);
+			}
+		}
+
+		if (!res)
+			break;
+		/* Save tuple ID, and continue scanning */
+		tids[ntids] = scan->xs_ctup.t_self;
+		ntids++;
+	}
+
+	/* Release read lock on current buffer, but keep it pinned */
+	if (BufferIsValid(so->hashso_curbuf))
+		_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_READ, HASH_NOLOCK);
+
+	*returned_tids = ntids;
+	PG_RETURN_BOOL(res);
+}
+
+
 /*
  *	hashbeginscan() -- start a scan on a hash index
  */
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 4b540c857b7..b5020863000 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.184 2005/03/20 23:40:23 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.185 2005/03/27 23:52:58 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -933,18 +933,35 @@ heap_release_fetch(Relation relation,
 	 * Need share lock on buffer to examine tuple commit status.
 	 */
 	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	dp = (PageHeader) BufferGetPage(buffer);
 
 	/*
-	 * get the item line pointer corresponding to the requested tid
+	 * We'd better check for out-of-range offnum in case of VACUUM since
+	 * the TID was obtained.
 	 */
-	dp = (PageHeader) BufferGetPage(buffer);
 	offnum = ItemPointerGetOffsetNumber(tid);
+	if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
+	{
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+		if (keep_buf)
+			*userbuf = buffer;
+		else
+		{
+			ReleaseBuffer(buffer);
+			*userbuf = InvalidBuffer;
+		}
+		tuple->t_datamcxt = NULL;
+		tuple->t_data = NULL;
+		return false;
+	}
+
+	/*
+	 * get the item line pointer corresponding to the requested tid
+	 */
 	lp = PageGetItemId(dp, offnum);
 
 	/*
-	 * must check for deleted tuple (see for example analyze.c, which is
-	 * careful to pass an offnum in range, but doesn't know if the offnum
-	 * actually corresponds to an undeleted tuple).
+	 * Must check for deleted tuple.
 	 */
 	if (!ItemIdIsUsed(lp))
 	{
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 8572f492594..ceded59a2b4 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.44 2004/12/31 21:59:19 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.45 2005/03/27 23:52:59 tgl Exp $
  *
  * NOTES
  *	  many of the old access method routines have been turned into
@@ -103,6 +103,7 @@ RelationGetIndexScan(Relation indexRelation,
 
 	/* mark cached function lookup data invalid; it will be set later */
 	scan->fn_getnext.fn_oid = InvalidOid;
+	scan->fn_getmulti.fn_oid = InvalidOid;
 
 	scan->unique_tuple_pos = 0;
 	scan->unique_tuple_mark = 0;
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 6d7a874aac8..f6f4a065cfb 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -8,20 +8,22 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.78 2005/03/21 01:23:58 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.79 2005/03/27 23:52:59 tgl Exp $
  *
  * INTERFACE ROUTINES
  *		index_open		- open an index relation by relation OID
  *		index_openrv	- open an index relation specified by a RangeVar
  *		index_openr		- open a system index relation by name
  *		index_close		- close an index relation
- *		index_beginscan - start a scan of an index
+ *		index_beginscan - start a scan of an index with amgettuple
+ *		index_beginscan_multi - start a scan of an index with amgetmulti
  *		index_rescan	- restart a scan of an index
  *		index_endscan	- end a scan
  *		index_insert	- insert an index tuple into a relation
  *		index_markpos	- mark a scan position
  *		index_restrpos	- restore a scan position
  *		index_getnext	- get the next tuple from a scan
+ *		index_getmulti	- get multiple tuples from a scan
  *		index_bulk_delete	- bulk deletion of index tuples
  *		index_vacuum_cleanup	- post-deletion cleanup of an index
  *		index_cost_estimator	- fetch amcostestimate procedure OID
@@ -85,24 +87,25 @@
 	AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \
 )
 
-#define GET_REL_PROCEDURE(x,y) \
+#define GET_REL_PROCEDURE(pname) \
 ( \
-	procedure = indexRelation->rd_am->y, \
+	procedure = indexRelation->rd_am->pname, \
 	(!RegProcedureIsValid(procedure)) ? \
-		elog(ERROR, "index_%s: invalid %s regproc", \
-			 CppAsString(x), CppAsString(y)) \
+		elog(ERROR, "invalid %s regproc", CppAsString(pname)) \
 	: (void)NULL \
 )
 
-#define GET_SCAN_PROCEDURE(x,y) \
+#define GET_SCAN_PROCEDURE(pname) \
 ( \
-	procedure = scan->indexRelation->rd_am->y, \
+	procedure = scan->indexRelation->rd_am->pname, \
 	(!RegProcedureIsValid(procedure)) ? \
-		elog(ERROR, "index_%s: invalid %s regproc", \
-			 CppAsString(x), CppAsString(y)) \
+		elog(ERROR, "invalid %s regproc", CppAsString(pname)) \
 	: (void)NULL \
 )
 
+static IndexScanDesc index_beginscan_internal(Relation indexRelation,
+											  int nkeys, ScanKey key);
+
 
 /* ----------------------------------------------------------------
  *				   index_ interface functions
@@ -222,7 +225,7 @@ index_insert(Relation indexRelation,
 	RegProcedure procedure;
 
 	RELATION_CHECKS;
-	GET_REL_PROCEDURE(insert, aminsert);
+	GET_REL_PROCEDURE(aminsert);
 
 	/*
 	 * have the am's insert proc do all the work.
@@ -236,15 +239,14 @@ index_insert(Relation indexRelation,
 										 BoolGetDatum(check_uniqueness)));
 }
 
-/* ----------------
- *		index_beginscan - start a scan of an index
+/*
+ * index_beginscan - start a scan of an index with amgettuple
  *
  * Note: heapRelation may be NULL if there is no intention of calling
  * index_getnext on this scan; index_getnext_indexitem will not use the
  * heapRelation link (nor the snapshot).  However, the caller had better
  * be holding some kind of lock on the heap relation in any case, to ensure
  * no one deletes it (or the index) out from under us.
- * ----------------
  */
 IndexScanDesc
 index_beginscan(Relation heapRelation,
@@ -255,8 +257,71 @@ index_beginscan(Relation heapRelation,
 	IndexScanDesc scan;
 	RegProcedure procedure;
 
+	scan = index_beginscan_internal(indexRelation, nkeys, key);
+
+	/*
+	 * Save additional parameters into the scandesc.  Everything else was
+	 * set up by RelationGetIndexScan.
+	 */
+	scan->heapRelation = heapRelation;
+	scan->xs_snapshot = snapshot;
+
+	/*
+	 * We want to look up the amgettuple procedure just once per scan, not
+	 * once per index_getnext call.  So do it here and save the fmgr info
+	 * result in the scan descriptor.
+	 */
+	GET_SCAN_PROCEDURE(amgettuple);
+	fmgr_info(procedure, &scan->fn_getnext);
+
+	return scan;
+}
+
+/*
+ * index_beginscan_multi - start a scan of an index with amgetmulti
+ *
+ * As above, caller had better be holding some lock on the parent heap
+ * relation, even though it's not explicitly mentioned here.
+ */
+IndexScanDesc
+index_beginscan_multi(Relation indexRelation,
+					  Snapshot snapshot,
+					  int nkeys, ScanKey key)
+{
+	IndexScanDesc scan;
+	RegProcedure procedure;
+
+	scan = index_beginscan_internal(indexRelation, nkeys, key);
+
+	/*
+	 * Save additional parameters into the scandesc.  Everything else was
+	 * set up by RelationGetIndexScan.
+	 */
+	scan->xs_snapshot = snapshot;
+
+	/*
+	 * We want to look up the amgetmulti procedure just once per scan, not
+	 * once per index_getmulti call.  So do it here and save the fmgr info
+	 * result in the scan descriptor.
+	 */
+	GET_SCAN_PROCEDURE(amgetmulti);
+	fmgr_info(procedure, &scan->fn_getmulti);
+
+	return scan;
+}
+
+/*
+ * index_beginscan_internal --- common code for index_beginscan variants
+ */
+static IndexScanDesc
+index_beginscan_internal(Relation indexRelation,
+						 int nkeys, ScanKey key)
+{
+	IndexScanDesc scan;
+	RegProcedure procedure;
+
 	RELATION_CHECKS;
-	GET_REL_PROCEDURE(beginscan, ambeginscan);
+	GET_REL_PROCEDURE(ambeginscan);
 
 	RelationIncrementReferenceCount(indexRelation);
 
@@ -278,21 +343,6 @@ index_beginscan(Relation heapRelation,
 										 Int32GetDatum(nkeys),
 										 PointerGetDatum(key)));
 
-	/*
-	 * Save additional parameters into the scandesc.  Everything else was
-	 * set up by RelationGetIndexScan.
-	 */
-	scan->heapRelation = heapRelation;
-	scan->xs_snapshot = snapshot;
-
-	/*
-	 * We want to look up the amgettuple procedure just once per scan, not
-	 * once per index_getnext call.  So do it here and save the fmgr info
-	 * result in the scan descriptor.
-	 */
-	GET_SCAN_PROCEDURE(beginscan, amgettuple);
-	fmgr_info(procedure, &scan->fn_getnext);
-
 	return scan;
 }
 
@@ -314,7 +364,7 @@ index_rescan(IndexScanDesc scan, ScanKey key)
 	RegProcedure procedure;
 
 	SCAN_CHECKS;
-	GET_SCAN_PROCEDURE(rescan, amrescan);
+	GET_SCAN_PROCEDURE(amrescan);
 
 	/* Release any held pin on a heap page */
 	if (BufferIsValid(scan->xs_cbuf))
@@ -346,7 +396,7 @@ index_endscan(IndexScanDesc scan)
 	RegProcedure procedure;
 
 	SCAN_CHECKS;
-	GET_SCAN_PROCEDURE(endscan, amendscan);
+	GET_SCAN_PROCEDURE(amendscan);
 
 	/* Release any held pin on a heap page */
 	if (BufferIsValid(scan->xs_cbuf))
@@ -378,7 +428,7 @@ index_markpos(IndexScanDesc scan)
 	RegProcedure procedure;
 
 	SCAN_CHECKS;
-	GET_SCAN_PROCEDURE(markpos, ammarkpos);
+	GET_SCAN_PROCEDURE(ammarkpos);
 
 	scan->unique_tuple_mark = scan->unique_tuple_pos;
 
@@ -395,7 +445,7 @@ index_restrpos(IndexScanDesc scan)
 	RegProcedure procedure;
 
 	SCAN_CHECKS;
-	GET_SCAN_PROCEDURE(restrpos, amrestrpos);
+	GET_SCAN_PROCEDURE(amrestrpos);
 
 	scan->kill_prior_tuple = false;		/* for safety */
 
@@ -525,9 +575,9 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
 							   &scan->xs_pgstat_info))
 			break;
 
-		/* Skip if no tuple at this location */
+		/* Skip if no undeleted tuple at this location */
 		if (heapTuple->t_data == NULL)
-			continue;			/* should we raise an error instead? */
+			continue;
 
 		/*
 		 * If we can't see it, maybe no one else can either.  Check to see
@@ -595,6 +645,44 @@ index_getnext_indexitem(IndexScanDesc scan,
 	return found;
 }
 
+/* ----------------
+ *		index_getmulti - get multiple tuples from an index scan
+ *
+ * Collects the TIDs of multiple heap tuples satisfying the scan keys.
+ * Since there's no interlock between the index scan and the eventual heap
+ * access, this is only safe to use with MVCC-based snapshots: the heap
+ * item slot could have been replaced by a newer tuple by the time we get
+ * to it.
+ *
+ * A TRUE result indicates more calls should occur; a FALSE result says the
+ * scan is done.  *returned_tids could be zero or nonzero in either case.
+ * ----------------
+ */
+bool
+index_getmulti(IndexScanDesc scan,
+			   ItemPointer tids, int32 max_tids,
+			   int32 *returned_tids)
+{
+	bool		found;
+
+	SCAN_CHECKS;
+
+	/* just make sure this is false... */
+	scan->kill_prior_tuple = false;
+
+	/*
+	 * have the am's getmulti proc do all the work. index_beginscan_multi
+	 * already set up fn_getmulti.
+	 */
+	found = DatumGetBool(FunctionCall4(&scan->fn_getmulti,
+									   PointerGetDatum(scan),
+									   PointerGetDatum(tids),
+									   Int32GetDatum(max_tids),
+									   PointerGetDatum(returned_tids)));
+
+	return found;
+}
+
 /* ----------------
  *		index_bulk_delete - do mass deletion of index entries
  *
@@ -613,7 +701,7 @@ index_bulk_delete(Relation indexRelation,
 	IndexBulkDeleteResult *result;
 
 	RELATION_CHECKS;
-	GET_REL_PROCEDURE(bulk_delete, ambulkdelete);
+	GET_REL_PROCEDURE(ambulkdelete);
 
 	result = (IndexBulkDeleteResult *)
 		DatumGetPointer(OidFunctionCall3(procedure,
@@ -644,7 +732,7 @@ index_vacuum_cleanup(Relation indexRelation,
 	if (!RegProcedureIsValid(indexRelation->rd_am->amvacuumcleanup))
 		return stats;
 
-	GET_REL_PROCEDURE(vacuum_cleanup, amvacuumcleanup);
+	GET_REL_PROCEDURE(amvacuumcleanup);
 
 	result = (IndexBulkDeleteResult *)
 		DatumGetPointer(OidFunctionCall3(procedure,
@@ -671,7 +759,7 @@ index_cost_estimator(Relation indexRelation)
 	RegProcedure procedure;
 
 	RELATION_CHECKS;
-	GET_REL_PROCEDURE(cost_estimator, amcostestimate);
+	GET_REL_PROCEDURE(amcostestimate);
 
 	return procedure;
 }
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 1fc26ea8958..82f9051de79 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.126 2005/03/21 01:23:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.127 2005/03/27 23:53:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -340,6 +340,79 @@ btgettuple(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(res);
 }
 
+/*
+ * btgetmulti() -- get multiple tuples at once
+ *
+ * This is a somewhat generic implementation: it avoids the _bt_restscan
+ * overhead, but there's no smarts about picking especially good stopping
+ * points such as index page boundaries.
+ */
+Datum
+btgetmulti(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+	ItemPointer	tids = (ItemPointer) PG_GETARG_POINTER(1);
+	int32		max_tids = PG_GETARG_INT32(2);
+	int32	   *returned_tids = (int32 *) PG_GETARG_POINTER(3);
+	BTScanOpaque so = (BTScanOpaque) scan->opaque;
+	bool		res = true;
+	int32		ntids = 0;
+
+	/*
+	 * Restore prior state if we were already called at least once.
+	 */
+	if (ItemPointerIsValid(&(scan->currentItemData)))
+		_bt_restscan(scan);
+
+	while (ntids < max_tids)
+	{
+		/*
+		 * Start scan, or advance to next tuple.
+		 */
+		if (ItemPointerIsValid(&(scan->currentItemData)))
+			res = _bt_next(scan, ForwardScanDirection);
+		else
+			res = _bt_first(scan, ForwardScanDirection);
+		/*
+		 * Skip killed tuples if asked to.
+		 */
+		if (scan->ignore_killed_tuples)
+		{
+			while (res)
+			{
+				Page		page;
+				OffsetNumber offnum;
+
+				offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
+				page = BufferGetPage(so->btso_curbuf);
+				if (!ItemIdDeleted(PageGetItemId(page, offnum)))
+					break;
+				res = _bt_next(scan, ForwardScanDirection);
+			}
+		}
+
+		if (!res)
+			break;
+		/* Save tuple ID, and continue scanning */
+		tids[ntids] = scan->xs_ctup.t_self;
+		ntids++;
+	}
+
+	/*
+	 * Save heap TID to use it in _bt_restscan.  Then release the read
+	 * lock on the buffer so that we aren't blocking other backends.
+	 */
+	if (res)
+	{
+		((BTScanOpaque) scan->opaque)->curHeapIptr = scan->xs_ctup.t_self;
+		LockBuffer(((BTScanOpaque) scan->opaque)->btso_curbuf,
+				   BUFFER_LOCK_UNLOCK);
+	}
+
+	*returned_tids = ntids;
+	PG_RETURN_BOOL(res);
+}
+
 /*
  *	btbeginscan() -- start a scan on a btree index
  */
diff --git a/src/backend/access/rtree/rtget.c b/src/backend/access/rtree/rtget.c
index 31963e81a06..e076d5a989c 100644
--- a/src/backend/access/rtree/rtget.c
+++ b/src/backend/access/rtree/rtget.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/rtree/rtget.c,v 1.34 2005/01/18 23:25:43 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/rtree/rtget.c,v 1.35 2005/03/27 23:53:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -29,15 +29,13 @@ rtgettuple(PG_FUNCTION_ARGS)
 {
 	IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
 	ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
+	RTreeScanOpaque so = (RTreeScanOpaque) s->opaque;
 	Page page;
 	OffsetNumber offnum;
-	RTreeScanOpaque so;
-
-	so = (RTreeScanOpaque) s->opaque;
 
 	/*
 	 * If we've already produced a tuple and the executor has informed
-	 * us that it should be marked "killed", do so know.
+	 * us that it should be marked "killed", do so now.
 	 */
 	if (s->kill_prior_tuple && ItemPointerIsValid(&(s->currentItemData)))
 	{
@@ -57,7 +55,7 @@ rtgettuple(PG_FUNCTION_ARGS)
 	{
 		bool res = rtnext(s, dir);
 
-		if (res == true && s->ignore_killed_tuples)
+		if (res && s->ignore_killed_tuples)
 		{
 			offnum = ItemPointerGetOffsetNumber(&(s->currentItemData));
 			page = BufferGetPage(so->curbuf);
@@ -69,6 +67,42 @@ rtgettuple(PG_FUNCTION_ARGS)
 	}
 }
 
+Datum
+rtgetmulti(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
+	ItemPointer	tids = (ItemPointer) PG_GETARG_POINTER(1);
+	int32		max_tids = PG_GETARG_INT32(2);
+	int32	   *returned_tids = (int32 *) PG_GETARG_POINTER(3);
+	RTreeScanOpaque so = (RTreeScanOpaque) s->opaque;
+	bool		res = true;
+	int32		ntids = 0;
+
+	/* XXX generic implementation: loop around guts of rtgettuple */
+	while (ntids < max_tids)
+	{
+		res = rtnext(s, ForwardScanDirection);
+		if (res && s->ignore_killed_tuples)
+		{
+			Page page;
+			OffsetNumber offnum;
+
+			offnum = ItemPointerGetOffsetNumber(&(s->currentItemData));
+			page = BufferGetPage(so->curbuf);
+			if (ItemIdDeleted(PageGetItemId(page, offnum)))
+				continue;
+		}
+
+		if (!res)
+			break;
+		tids[ntids] = s->xs_ctup.t_self;
+		ntids++;
+	}
+
+	*returned_tids = ntids;
+	PG_RETURN_BOOL(res);
+}
+
 static bool
 rtnext(IndexScanDesc s, ScanDirection dir)
 {
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 83ac82a60a5..edde32d12df 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.139 2005/03/27 06:29:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.140 2005/03/27 23:53:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -277,9 +277,8 @@ cost_index(Path *path, Query *root,
 	 * index (ie, the fraction of main-table tuples we will have to
 	 * retrieve) and its correlation to the main-table tuple order.
 	 */
-	OidFunctionCall8(index->amcostestimate,
+	OidFunctionCall7(index->amcostestimate,
 					 PointerGetDatum(root),
-					 PointerGetDatum(baserel),
 					 PointerGetDatum(index),
 					 PointerGetDatum(indexQuals),
 					 PointerGetDatum(&indexStartupCost),
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 92fee4ce372..84684659731 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.174 2005/03/26 20:55:39 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.175 2005/03/27 23:53:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -4198,7 +4198,7 @@ string_to_bytea_const(const char *str, size_t str_len)
  */
 
 static void
-genericcostestimate(Query *root, RelOptInfo *rel,
+genericcostestimate(Query *root,
 					IndexOptInfo *index, List *indexQuals,
 					Cost *indexStartupCost,
 					Cost *indexTotalCost,
@@ -4250,7 +4250,7 @@ genericcostestimate(Query *root, RelOptInfo *rel,
 
 	/* Estimate the fraction of main-table tuples that will be visited */
 	*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
-											   rel->relid,
+											   index->rel->relid,
 											   JOIN_INNER);
 
 	/*
@@ -4259,7 +4259,7 @@ genericcostestimate(Query *root, RelOptInfo *rel,
 	 * for partial indexes.  We can bound the number of tuples by the
 	 * index size, in any case.
 	 */
-	numIndexTuples = *indexSelectivity * rel->tuples;
+	numIndexTuples = *indexSelectivity * index->rel->tuples;
 
 	if (numIndexTuples > index->tuples)
 		numIndexTuples = index->tuples;
@@ -4328,25 +4328,24 @@ Datum
 btcostestimate(PG_FUNCTION_ARGS)
 {
 	Query	   *root = (Query *) PG_GETARG_POINTER(0);
-	RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-	List	   *indexQuals = (List *) PG_GETARG_POINTER(3);
-	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(6);
 	Oid			relid;
 	AttrNumber	colnum;
 	HeapTuple	tuple;
 
-	genericcostestimate(root, rel, index, indexQuals,
+	genericcostestimate(root, index, indexQuals,
 						indexStartupCost, indexTotalCost,
 						indexSelectivity, indexCorrelation);
 
 	/*
 	 * If we can get an estimate of the first column's ordering
 	 * correlation C from pg_statistic, estimate the index correlation as
-	 * C for a single- column index, or C * 0.75 for multiple columns.
+	 * C for a single-column index, or C * 0.75 for multiple columns.
 	 * (The idea here is that multiple columns dilute the importance of
 	 * the first column's ordering, but don't negate it entirely.  Before
 	 * 8.0 we divided the correlation by the number of columns, but that
@@ -4355,7 +4354,7 @@ btcostestimate(PG_FUNCTION_ARGS)
 	if (index->indexkeys[0] != 0)
 	{
 		/* Simple variable --- look to stats for the underlying table */
-		relid = getrelid(rel->relid, root->rtable);
+		relid = getrelid(index->rel->relid, root->rtable);
 		Assert(relid != InvalidOid);
 		colnum = index->indexkeys[0];
 	}
@@ -4408,15 +4407,14 @@ Datum
 rtcostestimate(PG_FUNCTION_ARGS)
 {
 	Query	   *root = (Query *) PG_GETARG_POINTER(0);
-	RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-	List	   *indexQuals = (List *) PG_GETARG_POINTER(3);
-	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-	genericcostestimate(root, rel, index, indexQuals,
+	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+
+	genericcostestimate(root, index, indexQuals,
 						indexStartupCost, indexTotalCost,
 						indexSelectivity, indexCorrelation);
 
@@ -4427,15 +4425,14 @@ Datum
 hashcostestimate(PG_FUNCTION_ARGS)
 {
 	Query	   *root = (Query *) PG_GETARG_POINTER(0);
-	RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-	List	   *indexQuals = (List *) PG_GETARG_POINTER(3);
-	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-	genericcostestimate(root, rel, index, indexQuals,
+	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+
+	genericcostestimate(root, index, indexQuals,
 						indexStartupCost, indexTotalCost,
 						indexSelectivity, indexCorrelation);
 
@@ -4446,15 +4443,14 @@ Datum
 gistcostestimate(PG_FUNCTION_ARGS)
 {
 	Query	   *root = (Query *) PG_GETARG_POINTER(0);
-	RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-	List	   *indexQuals = (List *) PG_GETARG_POINTER(3);
-	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-	genericcostestimate(root, rel, index, indexQuals,
+	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+
+	genericcostestimate(root, index, indexQuals,
 						indexStartupCost, indexTotalCost,
 						indexSelectivity, indexCorrelation);
 
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 84daf30878c..89fd4f9db68 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.48 2005/03/21 01:24:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.49 2005/03/27 23:53:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -82,6 +82,9 @@ extern IndexScanDesc index_beginscan(Relation heapRelation,
 				Relation indexRelation,
 				Snapshot snapshot,
 				int nkeys, ScanKey key);
+extern IndexScanDesc index_beginscan_multi(Relation indexRelation,
+				Snapshot snapshot,
+				int nkeys, ScanKey key);
 extern void index_rescan(IndexScanDesc scan, ScanKey key);
 extern void index_endscan(IndexScanDesc scan);
 extern void index_markpos(IndexScanDesc scan);
@@ -89,6 +92,9 @@ extern void index_restrpos(IndexScanDesc scan);
 extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction);
 extern bool index_getnext_indexitem(IndexScanDesc scan,
 						ScanDirection direction);
+extern bool index_getmulti(IndexScanDesc scan,
+						   ItemPointer tids, int32 max_tids,
+						   int32 *returned_tids);
 
 extern IndexBulkDeleteResult *index_bulk_delete(Relation indexRelation,
 				  IndexBulkDeleteCallback callback,
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index a48e6f4c108..ee2df86b402 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.43 2004/12/31 22:03:21 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.44 2005/03/27 23:53:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -196,5 +196,6 @@ extern void gist_desc(char *buf, uint8 xl_info, char *rec);
 
 /* gistget.c */
 extern Datum gistgettuple(PG_FUNCTION_ARGS);
+extern Datum gistgetmulti(PG_FUNCTION_ARGS);
 
 #endif   /* GIST_H */
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 0b7c1a5f0f2..e6ad35300ba 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.60 2005/03/21 01:24:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.61 2005/03/27 23:53:04 tgl Exp $
  *
  * NOTES
  *		modeled after Margo Seltzer's hash implementation for unix.
@@ -232,8 +232,9 @@ typedef HashItemData *HashItem;
 
 extern Datum hashbuild(PG_FUNCTION_ARGS);
 extern Datum hashinsert(PG_FUNCTION_ARGS);
-extern Datum hashgettuple(PG_FUNCTION_ARGS);
 extern Datum hashbeginscan(PG_FUNCTION_ARGS);
+extern Datum hashgettuple(PG_FUNCTION_ARGS);
+extern Datum hashgetmulti(PG_FUNCTION_ARGS);
 extern Datum hashrescan(PG_FUNCTION_ARGS);
 extern Datum hashendscan(PG_FUNCTION_ARGS);
 extern Datum hashmarkpos(PG_FUNCTION_ARGS);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 8486fabcb51..d1c0df6b32f 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.84 2005/03/21 01:24:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.85 2005/03/27 23:53:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -404,8 +404,9 @@ typedef BTScanOpaqueData *BTScanOpaque;
  */
 extern Datum btbuild(PG_FUNCTION_ARGS);
 extern Datum btinsert(PG_FUNCTION_ARGS);
-extern Datum btgettuple(PG_FUNCTION_ARGS);
 extern Datum btbeginscan(PG_FUNCTION_ARGS);
+extern Datum btgettuple(PG_FUNCTION_ARGS);
+extern Datum btgetmulti(PG_FUNCTION_ARGS);
 extern Datum btrescan(PG_FUNCTION_ARGS);
 extern Datum btendscan(PG_FUNCTION_ARGS);
 extern Datum btmarkpos(PG_FUNCTION_ARGS);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index a4ad6e31357..1cb40445ea4 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.37 2004/12/31 22:03:21 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.38 2005/03/27 23:53:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,7 +38,12 @@ typedef struct HeapScanDescData
 
 typedef HeapScanDescData *HeapScanDesc;
 
-
+/*
+ * We use the same IndexScanDescData structure for both amgettuple-based
+ * and amgetmulti-based index scans.  Which one is being used can be told
+ * by looking at fn_getnext and fn_getmulti, only one of which will be
+ * initialized.  Some fields are only relevant in amgettuple-based scans.
+ */
 typedef struct IndexScanDescData
 {
 	/* scan parameters */
@@ -71,6 +76,7 @@ typedef struct IndexScanDescData
 	/* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
 
 	FmgrInfo	fn_getnext;		/* cached lookup info for AM's getnext fn */
+	FmgrInfo	fn_getmulti;	/* cached lookup info for AM's getmulti fn */
 
 	/*
 	 * If keys_are_unique and got_tuple are both true, we stop calling the
diff --git a/src/include/access/rtree.h b/src/include/access/rtree.h
index d06ccdcff09..c057160a790 100644
--- a/src/include/access/rtree.h
+++ b/src/include/access/rtree.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/rtree.h,v 1.37 2005/01/18 23:25:55 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/access/rtree.h,v 1.38 2005/03/27 23:53:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -116,10 +116,9 @@ extern void freestack(RTSTACK *s);
  */
 extern Datum rtinsert(PG_FUNCTION_ARGS);
 extern Datum rtbulkdelete(PG_FUNCTION_ARGS);
-
-extern Datum rtgettuple(PG_FUNCTION_ARGS);
 extern Datum rtbeginscan(PG_FUNCTION_ARGS);
-
+extern Datum rtgettuple(PG_FUNCTION_ARGS);
+extern Datum rtgetmulti(PG_FUNCTION_ARGS);
 extern Datum rtendscan(PG_FUNCTION_ARGS);
 extern Datum rtmarkpos(PG_FUNCTION_ARGS);
 extern Datum rtrestrpos(PG_FUNCTION_ARGS);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 3e72d05c99f..ba640078f57 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.259 2005/03/21 01:24:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.260 2005/03/27 23:53:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200503201
+#define CATALOG_VERSION_NO	200503271
 
 #endif
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index c47b385da86..3bb646b1cdd 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -1,14 +1,14 @@
 /*-------------------------------------------------------------------------
  *
  * pg_am.h
- *	  definition of the system "am" relation (pg_am)
+ *	  definition of the system "access method" relation (pg_am)
  *	  along with the relation's initial contents.
  *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.31 2004/12/31 22:03:24 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.32 2005/03/27 23:53:05 tgl Exp $
  *
  * NOTES
  *		the genbki.sh script reads this file and generates .bki
@@ -37,7 +37,6 @@
 CATALOG(pg_am)
 {
 	NameData	amname;			/* access method name */
-	int4		amowner;		/* usesysid of creator */
 	int2		amstrategies;	/* total NUMBER of strategies (operators)
 								 * by which we can traverse/search this AM */
 	int2		amsupport;		/* total NUMBER of support functions that
@@ -49,9 +48,10 @@ CATALOG(pg_am)
 	bool		amcanmulticol;	/* does AM support multi-column indexes? */
 	bool		amindexnulls;	/* does AM support NULL index entries? */
 	bool		amconcurrent;	/* does AM support concurrent updates? */
-	regproc		amgettuple;		/* "next valid tuple" function */
 	regproc		aminsert;		/* "insert this tuple" function */
 	regproc		ambeginscan;	/* "start new scan" function */
+	regproc		amgettuple;		/* "next valid tuple" function */
+	regproc		amgetmulti;		/* "fetch multiple tuples" function */
 	regproc		amrescan;		/* "restart this scan" function */
 	regproc		amendscan;		/* "end this scan" function */
 	regproc		ammarkpos;		/* "mark current scan position" function */
@@ -75,17 +75,17 @@ typedef FormData_pg_am *Form_pg_am;
  */
 #define Natts_pg_am						20
 #define Anum_pg_am_amname				1
-#define Anum_pg_am_amowner				2
-#define Anum_pg_am_amstrategies			3
-#define Anum_pg_am_amsupport			4
-#define Anum_pg_am_amorderstrategy		5
-#define Anum_pg_am_amcanunique			6
-#define Anum_pg_am_amcanmulticol		7
-#define Anum_pg_am_amindexnulls			8
-#define Anum_pg_am_amconcurrent			9
-#define Anum_pg_am_amgettuple			10
-#define Anum_pg_am_aminsert				11
-#define Anum_pg_am_ambeginscan			12
+#define Anum_pg_am_amstrategies			2
+#define Anum_pg_am_amsupport			3
+#define Anum_pg_am_amorderstrategy		4
+#define Anum_pg_am_amcanunique			5
+#define Anum_pg_am_amcanmulticol		6
+#define Anum_pg_am_amindexnulls			7
+#define Anum_pg_am_amconcurrent			8
+#define Anum_pg_am_aminsert				9
+#define Anum_pg_am_ambeginscan			10
+#define Anum_pg_am_amgettuple			11
+#define Anum_pg_am_amgetmulti			12
 #define Anum_pg_am_amrescan				13
 #define Anum_pg_am_amendscan			14
 #define Anum_pg_am_ammarkpos			15
@@ -100,15 +100,15 @@ typedef FormData_pg_am *Form_pg_am;
  * ----------------
  */
 
-DATA(insert OID = 402 (  rtree	PGUID	8 3 0 f f f f rtgettuple rtinsert rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos rtbuild rtbulkdelete - rtcostestimate ));
+DATA(insert OID = 402 (  rtree	8 3 0 f f f f rtinsert rtbeginscan rtgettuple rtgetmulti rtrescan rtendscan rtmarkpos rtrestrpos rtbuild rtbulkdelete - rtcostestimate ));
 DESCR("r-tree index access method");
-DATA(insert OID = 403 (  btree	PGUID	5 1 1 t t t t btgettuple btinsert btbeginscan btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate ));
+DATA(insert OID = 403 (  btree	5 1 1 t t t t btinsert btbeginscan btgettuple btgetmulti btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate ));
 DESCR("b-tree index access method");
 #define BTREE_AM_OID 403
-DATA(insert OID = 405 (  hash	PGUID	1 1 0 f f f t hashgettuple hashinsert hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete - hashcostestimate ));
+DATA(insert OID = 405 (  hash	1 1 0 f f f t hashinsert hashbeginscan hashgettuple hashgetmulti hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete - hashcostestimate ));
 DESCR("hash index access method");
 #define HASH_AM_OID 405
-DATA(insert OID = 783 (  gist	PGUID 100 7 0 f t f f gistgettuple gistinsert gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete - gistcostestimate ));
+DATA(insert OID = 783 (  gist	100 7 0 f t f f gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete - gistcostestimate ));
 DESCR("GiST index access method");
 #define GIST_AM_OID 783
 
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index c3edcab22b1..63a339e5559 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.353 2005/03/21 01:24:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.354 2005/03/27 23:53:05 tgl Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -664,6 +664,8 @@ DATA(insert OID = 320 (  rtinsert		   PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2
 DESCR("r-tree(internal)");
 DATA(insert OID = 322 (  rtgettuple		   PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_  rtgettuple - _null_ ));
 DESCR("r-tree(internal)");
+DATA(insert OID = 635 (  rtgetmulti		   PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_  rtgetmulti - _null_ ));
+DESCR("r-tree(internal)");
 DATA(insert OID = 323 (  rtbuild		   PGNSP PGUID 12 f f t f v 3 2278 "2281 2281 2281" _null_ rtbuild - _null_ ));
 DESCR("r-tree(internal)");
 DATA(insert OID = 324 (  rtbeginscan	   PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_	rtbeginscan - _null_ ));
@@ -678,11 +680,13 @@ DATA(insert OID = 328 (  rtrescan		   PGNSP PGUID 12 f f t f v 2 2278 "2281 2281
 DESCR("r-tree(internal)");
 DATA(insert OID = 321 (  rtbulkdelete	   PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_ rtbulkdelete - _null_ ));
 DESCR("r-tree(internal)");
-DATA(insert OID = 1265 (  rtcostestimate   PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_  rtcostestimate - _null_ ));
+DATA(insert OID = 1265 (  rtcostestimate   PGNSP PGUID 12 f f t f v 7 2278 "2281 2281 2281 2281 2281 2281 2281" _null_  rtcostestimate - _null_ ));
 DESCR("r-tree(internal)");
 
 DATA(insert OID = 330 (  btgettuple		   PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_  btgettuple - _null_ ));
 DESCR("btree(internal)");
+DATA(insert OID = 636 (  btgetmulti		   PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_  btgetmulti - _null_ ));
+DESCR("btree(internal)");
 DATA(insert OID = 331 (  btinsert		   PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_	btinsert - _null_ ));
 DESCR("btree(internal)");
 DATA(insert OID = 333 (  btbeginscan	   PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_	btbeginscan - _null_ ));
@@ -701,7 +705,7 @@ DATA(insert OID = 332 (  btbulkdelete	   PGNSP PGUID 12 f f t f v 3 2281 "2281 2
 DESCR("btree(internal)");
 DATA(insert OID = 972 (  btvacuumcleanup   PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_ btvacuumcleanup - _null_ ));
 DESCR("btree(internal)");
-DATA(insert OID = 1268 (  btcostestimate   PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_  btcostestimate - _null_ ));
+DATA(insert OID = 1268 (  btcostestimate   PGNSP PGUID 12 f f t f v 7 2278 "2281 2281 2281 2281 2281 2281 2281" _null_  btcostestimate - _null_ ));
 DESCR("btree(internal)");
 
 DATA(insert OID = 339 (  poly_same		   PGNSP PGUID 12 f f t f i 2 16 "604 604" _null_	poly_same - _null_ ));
@@ -798,6 +802,8 @@ DESCR("convert char(n) to name");
 
 DATA(insert OID = 440 (  hashgettuple	   PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_  hashgettuple - _null_ ));
 DESCR("hash(internal)");
+DATA(insert OID = 637 (  hashgetmulti	   PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_  hashgetmulti - _null_ ));
+DESCR("hash(internal)");
 DATA(insert OID = 441 (  hashinsert		   PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_	hashinsert - _null_ ));
 DESCR("hash(internal)");
 DATA(insert OID = 443 (  hashbeginscan	   PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_	hashbeginscan - _null_ ));
@@ -814,7 +820,7 @@ DATA(insert OID = 448 (  hashbuild		   PGNSP PGUID 12 f f t f v 3 2278 "2281 228
 DESCR("hash(internal)");
 DATA(insert OID = 442 (  hashbulkdelete    PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_ hashbulkdelete - _null_ ));
 DESCR("hash(internal)");
-DATA(insert OID = 438 (  hashcostestimate  PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_  hashcostestimate - _null_ ));
+DATA(insert OID = 438 (  hashcostestimate  PGNSP PGUID 12 f f t f v 7 2278 "2281 2281 2281 2281 2281 2281 2281" _null_  hashcostestimate - _null_ ));
 DESCR("hash(internal)");
 
 DATA(insert OID = 449 (  hashint2		   PGNSP PGUID 12 f f t f i 1 23 "21" _null_  hashint2 - _null_ ));
@@ -1060,6 +1066,8 @@ DESCR("smaller of two");
 
 DATA(insert OID = 774 (  gistgettuple	   PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_  gistgettuple - _null_ ));
 DESCR("gist(internal)");
+DATA(insert OID = 638 (  gistgetmulti	   PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_  gistgetmulti - _null_ ));
+DESCR("gist(internal)");
 DATA(insert OID = 775 (  gistinsert		   PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_	gistinsert - _null_ ));
 DESCR("gist(internal)");
 DATA(insert OID = 777 (  gistbeginscan	   PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_	gistbeginscan - _null_ ));
@@ -1076,7 +1084,7 @@ DATA(insert OID = 782 (  gistbuild		   PGNSP PGUID 12 f f t f v 3 2278 "2281 228
 DESCR("gist(internal)");
 DATA(insert OID = 776 (  gistbulkdelete    PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_ gistbulkdelete - _null_ ));
 DESCR("gist(internal)");
-DATA(insert OID = 772 (  gistcostestimate  PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_  gistcostestimate - _null_ ));
+DATA(insert OID = 772 (  gistcostestimate  PGNSP PGUID 12 f f t f v 7 2278 "2281 2281 2281 2281 2281 2281 2281" _null_  gistcostestimate - _null_ ));
 DESCR("gist(internal)");
 
 DATA(insert OID = 784 (  tintervaleq	   PGNSP PGUID 12 f f t f i 2 16 "704 704" _null_	tintervaleq - _null_ ));
-- 
GitLab