diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 0d8cf0a84cb52f707f6cb529e42ce8510c8fd4aa..7d21e31bdb5143c249d8d14f28a0bfab462928d4 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.204 2009/01/09 10:13:18 mha Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.205 2009/01/12 05:10:44 tgl Exp $ -->
 
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -1203,6 +1203,55 @@ SET ENABLE_SEQSCAN TO OFF;
       queries.
      </para>
     </sect2>
+
+    <sect2 id="runtime-config-resource-async-behavior">
+     <title>Asynchronous Behavior</title>
+
+     <variablelist>
+      <varlistentry id="guc-effective-io-concurrency" xreflabel="effective_io_concurrency">
+       <term><varname>effective_io_concurrency</varname> (<type>integer</type>)</term>
+       <indexterm>
+        <primary><varname>effective_io_concurrency</> configuration parameter</primary>
+       </indexterm>
+       <listitem>
+        <para>
+         Sets the number of concurrent disk I/O operations that
+         <productname>PostgreSQL</> expects can be executed
+         simultaneously.  Raising this value will increase the number of I/O
+         operations that any individual <productname>PostgreSQL</> session
+         attempts to initiate in parallel.  The allowed range is 1 to 1000,
+         or zero to disable issuance of asynchronous I/O requests.
+        </para>
+
+        <para>
+         A good starting point for this setting is the number of separate
+         drives comprising a RAID 0 stripe or RAID 1 mirror being used for the
+         database.  (For RAID 5 the parity drive should not be counted.)
+         However, if the database is often busy with multiple queries issued in
+         concurrent sessions, lower values may be sufficient to keep the disk
+         array busy.  A value higher than needed to keep the disks busy will
+         only result in extra CPU overhead.
+        </para>
+
+        <para>
+         For more exotic systems, such as memory-based storage or a RAID array
+         that is limited by bus bandwidth, the correct value might be the
+         number of I/O paths available.  Some experimentation may be needed
+         to find the best value.
+        </para>
+
+        <para>
+         Asynchronous I/O depends on an effective <function>posix_fadvise</>
+         function, which some operating systems lack.  If the function is not
+         present then setting this parameter to anything but zero will result
+         in an error.  On some operating systems the function is present but
+         does not actually do anything.  On such systems setting a nonzero
+         value will add CPU overhead without improving performance.
+        </para>
+       </listitem>
+      </varlistentry>
+     </variablelist>
+    </sect2>
    </sect1>
 
    <sect1 id="runtime-config-wal">
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 880b9c9590eae67d2b1e5fa55cf2acf6c7e7a783..2ba8b89ee359c2910ce582d810ea51679fd39a60 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -21,7 +21,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.32 2009/01/10 21:08:36 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.33 2009/01/12 05:10:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,6 +67,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	TIDBitmap  *tbm;
 	TBMIterator *tbmiterator;
 	TBMIterateResult *tbmres;
+	TBMIterator *prefetch_iterator;
 	OffsetNumber targoffset;
 	TupleTableSlot *slot;
 
@@ -81,6 +82,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	tbm = node->tbm;
 	tbmiterator = node->tbmiterator;
 	tbmres = node->tbmres;
+	prefetch_iterator = node->prefetch_iterator;
 
 	/*
 	 * Check if we are evaluating PlanQual for tuple of this relation.
@@ -114,6 +116,15 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	/*
 	 * If we haven't yet performed the underlying index scan, do it, and
 	 * begin the iteration over the bitmap.
+	 *
+	 * For prefetching, we use *two* iterators, one for the pages we are
+	 * actually scanning and another that runs ahead of the first for
+	 * prefetching.  node->prefetch_pages tracks exactly how many pages
+	 * ahead the prefetch iterator is.  Also, node->prefetch_target tracks
+	 * the desired prefetch distance, which starts small and increases up
+	 * to the GUC-controlled maximum, target_prefetch_pages.  This is to
+	 * avoid doing a lot of prefetching in a scan that stops after a few
+	 * tuples because of a LIMIT.
 	 */
 	if (tbm == NULL)
 	{
@@ -125,6 +136,15 @@ BitmapHeapNext(BitmapHeapScanState *node)
 		node->tbm = tbm;
 		node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
 		node->tbmres = tbmres = NULL;
+
+#ifdef USE_PREFETCH
+		if (target_prefetch_pages > 0)
+		{
+			node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
+			node->prefetch_pages = 0;
+			node->prefetch_target = -1;
+		}
+#endif /* USE_PREFETCH */
 	}
 
 	for (;;)
@@ -144,6 +164,22 @@ BitmapHeapNext(BitmapHeapScanState *node)
 				break;
 			}
 
+#ifdef USE_PREFETCH
+			if (node->prefetch_pages > 0)
+			{
+				/* The main iterator has closed the distance by one page */
+				node->prefetch_pages--;
+			}
+			else if (prefetch_iterator)
+			{
+				/* Do not let the prefetch iterator get behind the main one */
+				TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
+
+				if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
+					elog(ERROR, "prefetch and main iterators are out of sync");
+			}
+#endif /* USE_PREFETCH */
+
 			/*
 			 * Ignore any claimed entries past what we think is the end of the
 			 * relation.  (This is probably not necessary given that we got at
@@ -165,6 +201,23 @@ BitmapHeapNext(BitmapHeapScanState *node)
 			 * Set rs_cindex to first slot to examine
 			 */
 			scan->rs_cindex = 0;
+
+#ifdef USE_PREFETCH
+			/*
+			 * Increase prefetch target if it's not yet at the max.  Note
+			 * that we will increase it to zero after fetching the very
+			 * first page/tuple, then to one after the second tuple is
+			 * fetched, then it doubles as later pages are fetched.
+			 */
+			if (node->prefetch_target >= target_prefetch_pages)
+				/* don't increase any further */ ;
+			else if (node->prefetch_target >= target_prefetch_pages / 2)
+				node->prefetch_target = target_prefetch_pages;
+			else if (node->prefetch_target > 0)
+				node->prefetch_target *= 2;
+			else
+				node->prefetch_target++;
+#endif /* USE_PREFETCH */
 		}
 		else
 		{
@@ -172,7 +225,40 @@ BitmapHeapNext(BitmapHeapScanState *node)
 			 * Continuing in previously obtained page; advance rs_cindex
 			 */
 			scan->rs_cindex++;
+
+#ifdef USE_PREFETCH
+			/*
+			 * Try to prefetch at least a few pages even before we get to the
+			 * second page if we don't stop reading after the first tuple.
+			 */
+			if (node->prefetch_target < target_prefetch_pages)
+				node->prefetch_target++;
+#endif /* USE_PREFETCH */
+		}
+
+#ifdef USE_PREFETCH
+		/*
+		 * We issue prefetch requests *after* fetching the current page
+		 * to try to avoid having prefetching interfere with the main I/O.
+		 */
+		if (prefetch_iterator)
+		{
+			while (node->prefetch_pages < node->prefetch_target)
+			{
+				TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
+
+				if (tbmpre == NULL)
+				{
+					/* No more pages to prefetch */
+					tbm_end_iterate(prefetch_iterator);
+					node->prefetch_iterator = prefetch_iterator = NULL;
+					break;
+				}
+				node->prefetch_pages++;
+				PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+			}
 		}
+#endif /* USE_PREFETCH */
 
 		/*
 		 * Out of range?  If so, nothing more to look at on this page
@@ -379,11 +465,14 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
 
 	if (node->tbmiterator)
 		tbm_end_iterate(node->tbmiterator);
+	if (node->prefetch_iterator)
+		tbm_end_iterate(node->prefetch_iterator);
 	if (node->tbm)
 		tbm_free(node->tbm);
 	node->tbm = NULL;
 	node->tbmiterator = NULL;
 	node->tbmres = NULL;
+	node->prefetch_iterator = NULL;
 
 	/*
 	 * Always rescan the input immediately, to ensure we can pass down any
@@ -429,6 +518,8 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 	 */
 	if (node->tbmiterator)
 		tbm_end_iterate(node->tbmiterator);
+	if (node->prefetch_iterator)
+		tbm_end_iterate(node->prefetch_iterator);
 	if (node->tbm)
 		tbm_free(node->tbm);
 
@@ -474,6 +565,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->tbm = NULL;
 	scanstate->tbmiterator = NULL;
 	scanstate->tbmres = NULL;
+	scanstate->prefetch_iterator = NULL;
+	scanstate->prefetch_pages = 0;
+	scanstate->prefetch_target = 0;
 
 	/*
 	 * Miscellaneous initialization
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6046f6ef6aa325fb258f48eaa6a707865b54e398..534c7516f78e1030a89b5692bc83ca06dd9e2a49 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.244 2009/01/01 17:23:47 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.245 2009/01/12 05:10:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -65,6 +65,13 @@ bool		zero_damaged_pages = false;
 int			bgwriter_lru_maxpages = 100;
 double		bgwriter_lru_multiplier = 2.0;
 
+/*
+ * How many buffers PrefetchBuffer callers should try to stay ahead of their
+ * ReadBuffer calls by.  This is maintained by the assign hook for
+ * effective_io_concurrency.  Zero means "never prefetch".
+ */
+int			target_prefetch_pages = 0;
+
 /* local state for StartBufferIO and related functions */
 static volatile BufferDesc *InProgressBuf = NULL;
 static bool IsForInput;
@@ -95,6 +102,56 @@ static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
 static void AtProcExit_Buffers(int code, Datum arg);
 
 
+/*
+ * PrefetchBuffer -- initiate asynchronous read of a block of a relation
+ *
+ * This is named by analogy to ReadBuffer but doesn't actually allocate a
+ * buffer.  Instead it tries to ensure that a future ReadBuffer for the given
+ * block will not be delayed by the I/O.  Prefetching is optional.
+ * No-op if prefetching isn't compiled in.
+ */
+void
+PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
+{
+#ifdef USE_PREFETCH
+	Assert(RelationIsValid(reln));
+	Assert(BlockNumberIsValid(blockNum));
+
+	/* Open it at the smgr level if not already done */
+	RelationOpenSmgr(reln);
+
+	if (reln->rd_istemp)
+	{
+		/* pass it off to localbuf.c */
+		LocalPrefetchBuffer(reln->rd_smgr, forkNum, blockNum);
+	}
+	else
+	{
+		BufferTag	newTag;			/* identity of requested block */
+		uint32		newHash;		/* hash value for newTag */
+		LWLockId	newPartitionLock;		/* buffer partition lock for it */
+		int			buf_id;
+
+		/* create a tag so we can lookup the buffer */
+		INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode, forkNum, blockNum);
+
+		/* determine its hash code and partition lock ID */
+		newHash = BufTableHashCode(&newTag);
+		newPartitionLock = BufMappingPartitionLock(newHash);
+
+		/* see if the block is in the buffer pool already */
+		LWLockAcquire(newPartitionLock, LW_SHARED);
+		buf_id = BufTableLookup(&newTag, newHash);
+		LWLockRelease(newPartitionLock);
+
+		/* If not in buffers, initiate prefetch */
+		if (buf_id < 0)
+			smgrprefetch(reln->rd_smgr, forkNum, blockNum);
+	}
+#endif /* USE_PREFETCH */
+}
+
+
 /*
  * ReadBuffer -- a shorthand for ReadBufferExtended, for reading from main
  *		fork with RBM_NORMAL mode and default strategy.
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 4dd5619f39fb668cab7b3e1ecddfb86e2bce319c..5431419cfe6705def7b0dcde6e5e9833a76fca80 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.85 2009/01/01 17:23:47 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.86 2009/01/12 05:10:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -52,6 +52,43 @@ static void InitLocalBuffers(void);
 static Block GetLocalBufferStorage(void);
 
 
+/*
+ * LocalPrefetchBuffer -
+ *	  initiate asynchronous read of a block of a relation
+ *
+ * Do PrefetchBuffer's work for temporary relations.
+ * No-op if prefetching isn't compiled in.
+ */
+void
+LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
+					BlockNumber blockNum)
+{
+#ifdef USE_PREFETCH
+	BufferTag	newTag;			/* identity of requested block */
+	LocalBufferLookupEnt *hresult;
+
+	INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum);
+
+	/* Initialize local buffers if first request in this session */
+	if (LocalBufHash == NULL)
+		InitLocalBuffers();
+
+	/* See if the desired buffer already exists */
+	hresult = (LocalBufferLookupEnt *)
+		hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
+
+	if (hresult)
+	{
+		/* Yes, so nothing to do */
+		return;
+	}
+
+	/* Not in buffers, so initiate prefetch */
+	smgrprefetch(smgr, forkNum, blockNum);
+#endif /* USE_PREFETCH */
+}
+
+
 /*
  * LocalBufferAlloc -
  *	  Find or create a local buffer for the given page of the given relation.
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f67ab94fd526886cd2e04207a003fc7ae3a70c29..b91946a035052926415453c4eb22e99e6826fd40 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.146 2009/01/01 17:23:47 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.147 2009/01/12 05:10:44 tgl Exp $
  *
  * NOTES:
  *
@@ -1029,6 +1029,42 @@ FileClose(File file)
 	FreeVfd(file);
 }
 
+/*
+ * FilePrefetch - initiate asynchronous read of a given range of the file.
+ * The logical seek position is unaffected.
+ *
+ * Currently the only implementation of this function is using posix_fadvise
+ * which is the simplest standardized interface that accomplishes this.
+ * We could add an implementation using libaio in the future; but note that
+ * this API is inappropriate for libaio, which wants to have a buffer provided
+ * to read into.
+ */
+int
+FilePrefetch(File file, off_t offset, int amount)
+{
+#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_WILLNEED)
+	int			returnCode;
+
+	Assert(FileIsValid(file));
+	
+	DO_DB(elog(LOG, "FilePrefetch: %d (%s) " INT64_FORMAT " %d",
+			   file, VfdCache[file].fileName,
+			   (int64) offset, amount));
+
+	returnCode = FileAccess(file);
+	if (returnCode < 0)
+		return returnCode;
+
+	returnCode = posix_fadvise(VfdCache[file].fd, offset, amount,
+							   POSIX_FADV_WILLNEED);
+
+	return returnCode;
+#else
+	Assert(FileIsValid(file));
+	return 0;
+#endif
+}
+
 int
 FileRead(File file, char *buffer, int amount)
 {
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index b9c1273702fea6bfafa965cb539b85404f1cd47b..643c75e538b0b2a278a8d22eab84df7d6975ad5b 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.143 2009/01/01 17:23:48 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.144 2009/01/12 05:10:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -550,6 +550,26 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
 	}
 }
 
+/*
+ *	mdprefetch() -- Initiate asynchronous read of the specified block of a relation
+ */
+void
+mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+{
+#ifdef USE_PREFETCH
+	off_t		seekpos;
+	MdfdVec    *v;
+
+	v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
+
+	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+
+	(void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ);
+#endif /* USE_PREFETCH */
+}
+
+
 /*
  *	mdread() -- Read the specified block from a relation.
  */
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 6ed91bd96ff4e6183973a857a2fd95a7df619e0f..f2cc449f175de04958911e5ebc00241baea2b2c3 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.115 2009/01/01 17:23:48 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.116 2009/01/12 05:10:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -48,6 +48,8 @@ typedef struct f_smgr
 								bool isRedo);
 	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
 							BlockNumber blocknum, char *buffer, bool isTemp);
+	void		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
+								  BlockNumber blocknum);
 	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
 							  BlockNumber blocknum, char *buffer);
 	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum, 
@@ -65,7 +67,7 @@ typedef struct f_smgr
 static const f_smgr smgrsw[] = {
 	/* magnetic disk */
 	{mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
-		mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
+		mdprefetch, mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
 		mdpreckpt, mdsync, mdpostckpt
 	}
 };
@@ -375,6 +377,15 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 											   buffer, isTemp);
 }
 
+/*
+ *	smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
+ */
+void
+smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+{
+	(*(smgrsw[reln->smgr_which].smgr_prefetch)) (reln, forknum, blocknum);
+}
+
 /*
  *	smgrread() -- read a particular block from a relation into the supplied
  *				  buffer.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8d927ae138778bf2a4f6b78cbdb8da2e910bd0fb..63e9628a5dcfa8d4acafd0dbd405454dc43fc001 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.492 2009/01/09 10:13:18 mha Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.493 2009/01/12 05:10:44 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -18,6 +18,7 @@
 
 #include <ctype.h>
 #include <float.h>
+#include <math.h>
 #include <limits.h>
 #include <unistd.h>
 #include <sys/stat.h>
@@ -163,8 +164,9 @@ static bool assign_tcp_keepalives_count(int newval, bool doit, GucSource source)
 static const char *show_tcp_keepalives_idle(void);
 static const char *show_tcp_keepalives_interval(void);
 static const char *show_tcp_keepalives_count(void);
-static bool assign_autovacuum_max_workers(int newval, bool doit, GucSource source);
 static bool assign_maxconnections(int newval, bool doit, GucSource source);
+static bool assign_autovacuum_max_workers(int newval, bool doit, GucSource source);
+static bool assign_effective_io_concurrency(int newval, bool doit, GucSource source);
 static const char *assign_pgstat_temp_directory(const char *newval, bool doit, GucSource source);
 
 static char *config_enum_get_options(struct config_enum *record, 
@@ -413,6 +415,7 @@ static int	segment_size;
 static int	wal_block_size;
 static int	wal_segment_size;
 static bool integer_datetimes;
+static int	effective_io_concurrency;
 
 /* should be static, but commands/variable.c needs to get at these */
 char	   *role_string;
@@ -1700,6 +1703,20 @@ static struct config_int ConfigureNamesInt[] =
 		100, 0, 1000, NULL, NULL
 	},
 
+	{
+		{"effective_io_concurrency", PGC_USERSET, RESOURCES,
+			gettext_noop("Number of simultaneous requests that can be handled efficiently by the disk subsystem."),
+			gettext_noop("For RAID arrays, this should be approximately the number of drive spindles in the array.")
+		},
+		&effective_io_concurrency,
+#ifdef USE_PREFETCH
+		1, 0, 1000,
+#else
+		0, 0, 0,
+#endif
+		assign_effective_io_concurrency, NULL
+	},
+
 	{
 		{"log_rotation_age", PGC_SIGHUP, LOGGING_WHERE,
 			gettext_noop("Automatic log file rotation will occur after N minutes."),
@@ -7587,6 +7604,61 @@ assign_autovacuum_max_workers(int newval, bool doit, GucSource source)
 	return true;
 }
 
+static bool
+assign_effective_io_concurrency(int newval, bool doit, GucSource source)
+{
+#ifdef USE_PREFETCH
+	double		new_prefetch_pages = 0.0;
+	int			i;
+
+	/*----------
+	 * The user-visible GUC parameter is the number of drives (spindles),
+	 * which we need to translate to a number-of-pages-to-prefetch target.
+	 *
+	 * The expected number of prefetch pages needed to keep N drives busy is:
+	 *
+	 * drives |   I/O requests
+	 * -------+----------------
+	 *      1 |   1
+	 *      2 |   2/1 + 2/2 = 3
+	 *      3 |   3/1 + 3/2 + 3/3 = 5 1/2
+	 *      4 |   4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
+	 *      n |   n * H(n)
+	 *
+	 * This is called the "coupon collector problem" and H(n) is called the
+	 * harmonic series.  This could be approximated by n * ln(n), but for
+	 * reasonable numbers of drives we might as well just compute the series.
+	 *
+	 * Alternatively we could set the target to the number of pages necessary
+	 * so that the expected number of active spindles is some arbitrary
+	 * percentage of the total.  This sounds the same but is actually slightly
+	 * different.  The result ends up being ln(1-P)/ln((n-1)/n) where P is
+	 * that desired fraction.
+	 *
+	 * Experimental results show that both of these formulas aren't aggressive
+	 * enough, but we don't really have any better proposals.
+	 *
+	 * Note that if newval = 0 (disabled), we must set target = 0.
+	 *----------
+	 */
+
+	for (i = 1; i <= newval; i++)
+		new_prefetch_pages += (double) newval / (double) i;
+
+	/* This range check shouldn't fail, but let's be paranoid */
+	if (new_prefetch_pages >= 0.0 && new_prefetch_pages < (double) INT_MAX)
+	{
+		if (doit)
+			target_prefetch_pages = (int) rint(new_prefetch_pages);
+		return true;
+	}
+	else
+		return false;
+#else
+	return true;
+#endif /* USE_PREFETCH */
+}
+
 static const char *
 assign_pgstat_temp_directory(const char *newval, bool doit, GucSource source)
 {
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index ffa5055b76889b56bcdfd09a81f1fc29ee8af975..977e13e0aff54e700567b211cf9d734524453294 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -131,6 +131,10 @@
 #bgwriter_lru_maxpages = 100		# 0-1000 max buffers written/round
 #bgwriter_lru_multiplier = 2.0		# 0-10.0 multipler on buffers scanned/round
 
+# - Asynchronous Behavior -
+
+#effective_io_concurrency = 1		# 1-1000, or 0 to disable prefetching
+
 
 #------------------------------------------------------------------------------
 # WRITE AHEAD LOG
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 506605df0014676ded4f42770ea11469d4087148..8d87ec19e1d09bb365eb3bb9ea598076d57675e7 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.200 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.201 2009/01/12 05:10:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1154,6 +1154,9 @@ typedef struct BitmapIndexScanState
  *		tbm				   bitmap obtained from child index scan(s)
  *		tbmiterator		   iterator for scanning current pages
  *		tbmres			   current-page data
+ *		prefetch_iterator  iterator for prefetching ahead of current page
+ *		prefetch_pages	   # pages prefetch iterator is ahead of current
+ *		prefetch_target	   target prefetch distance
  * ----------------
  */
 typedef struct BitmapHeapScanState
@@ -1163,6 +1166,9 @@ typedef struct BitmapHeapScanState
 	TIDBitmap  *tbm;
 	TBMIterator *tbmiterator;
 	TBMIterateResult *tbmres;
+	TBMIterator *prefetch_iterator;
+	int			prefetch_pages;
+	int			prefetch_target;
 } BitmapHeapScanState;
 
 /* ----------------
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index ff9d6ce45decd23ebeeb96e7b940ac9a36355988..bc66df2eb340060620c1ff7ff74af01ee1560e56 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -6,7 +6,7 @@
  * for developers.	If you edit any of these, be sure to do a *full*
  * rebuild (and an initdb if noted).
  *
- * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.36 2009/01/11 18:02:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.37 2009/01/12 05:10:45 tgl Exp $
  *------------------------------------------------------------------------
  */
 
@@ -135,6 +135,15 @@
 #define USE_POSIX_FADVISE
 #endif
 
+/*
+ * USE_PREFETCH code should be compiled only if we have a way to implement
+ * prefetching.  (This is decoupled from USE_POSIX_FADVISE because there
+ * might in future be support for alternative low-level prefetch APIs.)
+ */
+#ifdef USE_POSIX_FADVISE
+#define USE_PREFETCH
+#endif
+
 /*
  * This is the default directory in which AF_UNIX socket files are
  * placed.	Caution: changing this risks breaking your existing client
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 9ec9fcb98a57332234af7c46f3641abb95276813..12512d7428b01084de5bf053d44b53aebaf12069 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.100 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.101 2009/01/12 05:10:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -208,7 +208,9 @@ extern int	BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
 extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
 
 /* localbuf.c */
-extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, ForkNumber forkNum,
+extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
+								BlockNumber blockNum);
+extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
 				 BlockNumber blockNum, bool *foundPtr);
 extern void MarkLocalBufferDirty(Buffer buffer);
 extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 9d1f47d58a2665b9da0f32ec8d3243d5b34f389e..0ee09ced6d214d984aa3d1a445ffa80932a8a344 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.119 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.120 2009/01/12 05:10:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -47,6 +47,7 @@ extern PGDLLIMPORT int NBuffers;
 extern bool zero_damaged_pages;
 extern int	bgwriter_lru_maxpages;
 extern double bgwriter_lru_multiplier;
+extern int	target_prefetch_pages;
 
 /* in buf_init.c */
 extern PGDLLIMPORT char *BufferBlocks;
@@ -152,6 +153,8 @@ extern PGDLLIMPORT int32 *LocalRefCount;
 /*
  * prototypes for functions in bufmgr.c
  */
+extern void PrefetchBuffer(Relation reln, ForkNumber forkNum,
+						   BlockNumber blockNum);
 extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
 extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
 								 BlockNumber blockNum, ReadBufferMode mode,
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 17aa150aa03ea33e2bdc4be9f522ec0c7ddcfa48..98d091c97872db05de18c1dcb99eb6cc77d6b9e5 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.63 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.64 2009/01/12 05:10:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,6 +62,7 @@ extern int	max_files_per_process;
 extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
 extern File OpenTemporaryFile(bool interXact);
 extern void FileClose(File file);
+extern int	FilePrefetch(File file, off_t offset, int amount);
 extern int	FileRead(File file, char *buffer, int amount);
 extern int	FileWrite(File file, char *buffer, int amount);
 extern int	FileSync(File file);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 0392fdf81a3c1d73e99fcb5ab1653b7627552dfd..e753af76dde2a9ac8ac6a1903beaf566807a1a6e 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.65 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.66 2009/01/12 05:10:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -70,6 +70,8 @@ extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum,
 						 bool isTemp, bool isRedo);
 extern void smgrextend(SMgrRelation reln, ForkNumber forknum, 
 					   BlockNumber blocknum, char *buffer, bool isTemp);
+extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
+						 BlockNumber blocknum);
 extern void smgrread(SMgrRelation reln, ForkNumber forknum,
 					 BlockNumber blocknum, char *buffer);
 extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
@@ -93,6 +95,8 @@ extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
 extern void mdunlink(RelFileNode rnode, ForkNumber forknum, bool isRedo);
 extern void mdextend(SMgrRelation reln, ForkNumber forknum,
 					 BlockNumber blocknum, char *buffer, bool isTemp);
+extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
+					   BlockNumber blocknum);
 extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				   char *buffer);
 extern void mdwrite(SMgrRelation reln, ForkNumber forknum,