diff --git a/src/backend/access/hash/README b/src/backend/access/hash/README
index 014f742c252ce34921e2e2fe532e5bd0388774f5..9f34fd73c483d3cd738d0b239b7aa09f35dab913 100644
--- a/src/backend/access/hash/README
+++ b/src/backend/access/hash/README
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/access/hash/README,v 1.6 2007/04/19 20:24:04 tgl Exp $
+$PostgreSQL: pgsql/src/backend/access/hash/README,v 1.7 2008/03/15 20:46:31 tgl Exp $
 
 This directory contains an implementation of hash indexing for Postgres.  Most
 of the core ideas are taken from Margo Seltzer and Ozan Yigit, A New Hashing
@@ -65,6 +65,11 @@ hashm_spares[N] <= hashm_spares[N+1], since the latter count includes the
 former.  The difference between the two represents the number of overflow
 pages appearing between the bucket page groups of splitpoints N and N+1.
 
+(Note: the above describes what happens when filling an initially minimally
+sized hash index.  In practice, we try to estimate the required index size
+and allocate a suitable number of splitpoints immediately, to avoid
+expensive re-splitting during initial index build.)
+
 When S splitpoints exist altogether, the array entries hashm_spares[0]
 through hashm_spares[S] are valid; hashm_spares[S] records the current
 total number of overflow pages.  New overflow pages are created as needed
@@ -101,9 +106,9 @@ includes the bitmap pages, which is the reason for saying that bitmap
 pages are a subset of the overflow pages.  It turns out in fact that each
 bitmap page's first bit represents itself --- this is not an essential
 property, but falls out of the fact that we only allocate another bitmap
-page when we really need one.  Bit number zero always corresponds to block
-number 3, which is the first bitmap page and is allocated during index
-creation.
+page when we really need one.  Bit number zero always corresponds to the
+first bitmap page, which is allocated during index creation just after all
+the initially created buckets.
 
 
 Lock definitions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index f6c4d5705d4284a6393d3ebcb81ba09be88b66bd..b008c0aa4a7abde67c91cc158abc16ae34b5d888 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.98 2008/01/01 19:45:46 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.99 2008/03/15 20:46:31 tgl Exp $
  *
  * NOTES
  *	  This file contains only the public interface routines.
@@ -22,6 +22,7 @@
 #include "access/hash.h"
 #include "catalog/index.h"
 #include "commands/vacuum.h"
+#include "optimizer/plancat.h"
 
 
 /* Working state for hashbuild and its callback */
@@ -48,6 +49,7 @@ hashbuild(PG_FUNCTION_ARGS)
 	Relation	index = (Relation) PG_GETARG_POINTER(1);
 	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
 	IndexBuildResult *result;
+	BlockNumber	relpages;
 	double		reltuples;
 	HashBuildState buildstate;
 
@@ -59,8 +61,11 @@ hashbuild(PG_FUNCTION_ARGS)
 		elog(ERROR, "index \"%s\" already contains data",
 			 RelationGetRelationName(index));
 
-	/* initialize the hash index metadata page */
-	_hash_metapinit(index);
+	/* estimate the number of rows currently present in the table */
+	estimate_rel_size(heap, NULL, &relpages, &reltuples);
+
+	/* initialize the hash index metadata page and initial buckets */
+	_hash_metapinit(index, reltuples);
 
 	/* build the index */
 	buildstate.indtuples = 0;
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 68861087585e97c873fe90eff1286d477d9af9d8..ec6f4b390fd944305e908aaebb035e325b733860 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.72 2008/01/01 19:45:46 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.73 2008/03/15 20:46:31 tgl Exp $
  *
  * NOTES
  *	  Postgres hash pages look like ordinary relation pages.  The opaque
@@ -312,15 +312,17 @@ _hash_chgbufaccess(Relation rel,
 
 /*
  *	_hash_metapinit() -- Initialize the metadata page of a hash index,
- *				the two buckets that we begin with and the initial
- *				bitmap page.
+ *				the initial buckets, and the initial bitmap page.
+ *
+ * The initial number of buckets is dependent on num_tuples, an estimate
+ * of the number of tuples to be loaded into the index initially.
  *
  * We are fairly cavalier about locking here, since we know that no one else
  * could be accessing this index.  In particular the rule about not holding
  * multiple buffer locks is ignored.
  */
 void
-_hash_metapinit(Relation rel)
+_hash_metapinit(Relation rel, double num_tuples)
 {
 	HashMetaPage metap;
 	HashPageOpaque pageopaque;
@@ -330,7 +332,10 @@ _hash_metapinit(Relation rel)
 	int32		data_width;
 	int32		item_width;
 	int32		ffactor;
-	uint16		i;
+	double		dnumbuckets;
+	uint32		num_buckets;
+	uint32		log2_num_buckets;
+	uint32		i;
 
 	/* safety check */
 	if (RelationGetNumberOfBlocks(rel) != 0)
@@ -354,7 +359,26 @@ _hash_metapinit(Relation rel)
 		ffactor = 10;
 
 	/*
-	 * We initialize the metapage, the first two bucket pages, and the first
+	 * Choose the number of initial bucket pages to match the fill factor
+	 * given the estimated number of tuples.  We round up the result to the
+	 * next power of 2, however, and always force at least 2 bucket pages.
+	 * The upper limit is determined by considerations explained in
+	 * _hash_expandtable().
+	 */
+	dnumbuckets = num_tuples / ffactor;
+	if (dnumbuckets <= 2.0)
+		num_buckets = 2;
+	else if (dnumbuckets >= (double) 0x40000000)
+		num_buckets = 0x40000000;
+	else
+		num_buckets = ((uint32) 1) << _hash_log2((uint32) dnumbuckets);
+
+	log2_num_buckets = _hash_log2(num_buckets);
+	Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
+	Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);
+
+	/*
+	 * We initialize the metapage, the first N bucket pages, and the first
 	 * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
 	 * calls to occur.	This ensures that the smgr level has the right idea of
 	 * the physical index length.
@@ -398,23 +422,25 @@ _hash_metapinit(Relation rel)
 	metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
 
 	/*
-	 * We initialize the index with two buckets, 0 and 1, occupying physical
-	 * blocks 1 and 2.	The first freespace bitmap page is in block 3.
+	 * We initialize the index with N buckets, 0 .. N-1, occupying physical
+	 * blocks 1 to N.  The first freespace bitmap page is in block N+1.
+	 * Since N is a power of 2, we can set the masks this way:
 	 */
-	metap->hashm_maxbucket = metap->hashm_lowmask = 1;	/* nbuckets - 1 */
-	metap->hashm_highmask = 3;	/* (nbuckets << 1) - 1 */
+	metap->hashm_maxbucket = metap->hashm_lowmask = num_buckets - 1;
+	metap->hashm_highmask = (num_buckets << 1) - 1;
 
 	MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
 	MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
 
-	metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */
-	metap->hashm_ovflpoint = 1;
+	/* Set up mapping for one spare page after the initial splitpoints */
+	metap->hashm_spares[log2_num_buckets] = 1;
+	metap->hashm_ovflpoint = log2_num_buckets;
 	metap->hashm_firstfree = 0;
 
 	/*
-	 * Initialize the first two buckets
+	 * Initialize the first N buckets
 	 */
-	for (i = 0; i <= 1; i++)
+	for (i = 0; i < num_buckets; i++)
 	{
 		buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
 		pg = BufferGetPage(buf);
@@ -430,7 +456,7 @@ _hash_metapinit(Relation rel)
 	/*
 	 * Initialize first bitmap page
 	 */
-	_hash_initbitmap(rel, metap, 3);
+	_hash_initbitmap(rel, metap, num_buckets + 1);
 
 	/* all done */
 	_hash_wrtbuf(rel, metabuf);
@@ -511,6 +537,9 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	 * index with 2^32 buckets would certainly overflow BlockNumber and hence
 	 * _hash_alloc_buckets() would fail, but if we supported buckets smaller
 	 * than a disk block then this would be an independent constraint.
+	 *
+	 * If you change this, see also the maximum initial number of buckets
+	 * in _hash_metapinit().
 	 */
 	if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
 		goto fail;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 5f927095edcd9198f831652b32db52a64bf63346..a56dccd2ff5d9482736cd06e5c9769662948c9ba 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.140 2008/01/12 00:11:39 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.141 2008/03/15 20:46:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,8 +45,6 @@ bool		constraint_exclusion = false;
 get_relation_info_hook_type get_relation_info_hook = NULL;
 
 
-static void estimate_rel_size(Relation rel, int32 *attr_widths,
-				  BlockNumber *pages, double *tuples);
 static List *get_relation_constraints(Oid relationObjectId, RelOptInfo *rel,
 						 bool include_notnull);
 
@@ -319,7 +317,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
  * relation's attr_width[] cache; we fill this in if we have need to compute
  * the attribute widths for estimation purposes.
  */
-static void
+void
 estimate_rel_size(Relation rel, int32 *attr_widths,
 				  BlockNumber *pages, double *tuples)
 {
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index ac54c47f014ca704dd97e898ae8ad39d4c67d373..fd7b68e9aebeb18382e99628a041d66a78a0e3c2 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.84 2008/01/01 19:45:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.85 2008/03/15 20:46:31 tgl Exp $
  *
  * NOTES
  *		modeled after Margo Seltzer's hash implementation for unix.
@@ -298,7 +298,7 @@ extern void _hash_dropbuf(Relation rel, Buffer buf);
 extern void _hash_wrtbuf(Relation rel, Buffer buf);
 extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
 				   int to_access);
-extern void _hash_metapinit(Relation rel);
+extern void _hash_metapinit(Relation rel, double num_tuples);
 extern void _hash_pageinit(Page page, Size size);
 extern void _hash_expandtable(Relation rel, Buffer metabuf);
 
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index 42e24832495bdee61fd2c3c8cfadf59a04f2bb17..82b4c2200ad9d0b3e397db1ee30fd3b63d08e83c 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.47 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.48 2008/03/15 20:46:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -15,6 +15,7 @@
 #define PLANCAT_H
 
 #include "nodes/relation.h"
+#include "utils/rel.h"
 
 /* Hook for plugins to get control in get_relation_info() */
 typedef void (*get_relation_info_hook_type) (PlannerInfo *root,
@@ -27,6 +28,9 @@ extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook;
 extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
 				  bool inhparent, RelOptInfo *rel);
 
+extern void estimate_rel_size(Relation rel, int32 *attr_widths,
+							  BlockNumber *pages, double *tuples);
+
 extern bool relation_excluded_by_constraints(RelOptInfo *rel,
 								 RangeTblEntry *rte);