diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index c98ca9bd95fb3beebd1e30612cf7288cff68c5da..ebad04c9fe5c9fa04286af17c93d9424d0b56a74 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.54 2002/03/02 21:39:16 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $ * * NOTES * This file contains only the public interface routines. @@ -165,9 +165,6 @@ hashinsert(PG_FUNCTION_ARGS) char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); -#ifdef NOT_USED - Relation heapRel = (Relation) PG_GETARG_POINTER(4); -#endif InsertIndexResult res; HashItem hitem; IndexTuple itup; @@ -333,7 +330,6 @@ hashendscan(PG_FUNCTION_ARGS) /* * hashmarkpos() -- save current scan position - * */ Datum hashmarkpos(PG_FUNCTION_ARGS) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 9cb2a02cf3a0d2ced78e33cbfa00adda014b15f3..2e0f181939d0ec6f0a285d7a31b88367e0aa9f79 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.31 2002/02/25 04:06:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $ * * NOTES * These functions are stored in pg_amproc. For each operator class @@ -95,6 +95,8 @@ hashname(PG_FUNCTION_ARGS) { char *key = NameStr(*PG_GETARG_NAME(0)); + Assert(strlen(key) <= NAMEDATALEN); + return hash_any(key, strlen(key)); } @@ -116,61 +118,89 @@ hashvarlena(PG_FUNCTION_ARGS) return result; } +/* This hash function was written by Bob Jenkins + * (bob_jenkins@burtleburtle.net), and superficially adapted + * for PostgreSQL by Neil Conway. For more information on this + * hash function, see http://burtleburtle.net/bob/hash/doobs.html + */ /* - * hash_any --- compute a hash function for any specified chunk of memory - * - * This can be used as the underlying hash function for any pass-by-reference - * data type in which there are no non-significant bits. - * - * (Comment from the original db3 hashing code: ) - * - * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte - * units. On the first time through the loop we get the 'leftover bytes' - * (strlen % 8). On every later iteration, we perform 8 HASHC's so we handle - * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If - * this routine is heavily used enough, it's worth the ugly coding. - * - * "OZ's original sdbm hash" + * mix -- mix 3 32-bit values reversibly. + * For every delta with one or two bits set, and the deltas of all three + * high bits or all three low bits, whether the original value of a,b,c + * is almost all zero or is uniformly distributed, + * - If mix() is run forward or backward, at least 32 bits in a,b,c + * have at least 1/4 probability of changing. + * - If mix() is run forward, every bit of c will change between 1/3 and + * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) + */ +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* + * hash_any() -- hash a variable-length key into a 32-bit value + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * Returns a 32-bit value. Every bit of the key affects every bit of + * the return value. Every 1-bit and 2-bit delta achieves avalanche. + * About 6*len+35 instructions. The best hash table sizes are powers + * of 2. There is no need to do mod a prime (mod is sooo slow!). + * If you need less than 32 bits, use a bitmask. */ Datum -hash_any(const char *keydata, int keylen) +hash_any(register const char *k, register int keylen) { - uint32 n; - int loop; - -#define HASHC n = *keydata++ + 65599 * n - - n = 0; - if (keylen > 0) - { - loop = (keylen + 8 - 1) >> 3; - - switch (keylen & (8 - 1)) - { - case 0: - do - { /* All fall throughs */ - HASHC; - case 7: - HASHC; - case 6: - HASHC; - case 5: - HASHC; - case 4: - HASHC; - case 3: - HASHC; - case 2: - HASHC; - case 1: - HASHC; - } while (--loop); - } - } - -#undef HASHC - - PG_RETURN_UINT32(n); + register Datum a,b,c,len; + + /* Set up the internal state */ + len = keylen; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + /* Another arbitrary value. If the hash function is called + * multiple times, this could be the previously generated + * hash value; however, the interface currently doesn't allow + * this. AFAIK this isn't a big deal. + */ + c = 3923095; + + /* handle most of the key */ + while (len >= 12) + { + a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24)); + b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24)); + c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24)); + mix(a,b,c); + k += 12; len -= 12; + } + + /* handle the last 11 bytes */ + c += keylen; + switch(len) /* all the case statements fall through */ + { + case 11: c+=((Datum)k[10]<<24); + case 10: c+=((Datum)k[9]<<16); + case 9 : c+=((Datum)k[8]<<8); + /* the first byte of c is reserved for the length */ + case 8 : b+=((Datum)k[7]<<24); + case 7 : b+=((Datum)k[6]<<16); + case 6 : b+=((Datum)k[5]<<8); + case 5 : b+=k[4]; + case 4 : a+=((Datum)k[3]<<24); + case 3 : a+=((Datum)k[2]<<16); + case 2 : a+=((Datum)k[1]<<8); + case 1 : a+=k[0]; + /* case 0: nothing left to add */ + } + mix(a,b,c); + /* report the result */ + return c; } diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index 2f99e7426dc8d4a97781633f6d540901b033095b..303fe54213b8290fbf039e2ef4b26894026a62fb 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.23 2001/10/25 05:49:21 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.24 2002/03/06 20:49:40 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -49,7 +49,7 @@ _hash_doinsert(Relation rel, HashItem hitem) itup = &(hitem->hash_itup); if ((natts = rel->rd_rel->relnatts) != 1) elog(ERROR, "Hash indices valid for only one index key."); - itup_scankey = _hash_mkscankey(rel, itup, metap); + itup_scankey = _hash_mkscankey(rel, itup); /* * find the first page in the bucket chain containing this key and @@ -232,7 +232,7 @@ _hash_pgaddtup(Relation rel, RelationGetRelationName(rel)); /* write the buffer, but hold our lock */ - _hash_wrtnorelbuf(rel, buf); + _hash_wrtnorelbuf(buf); return itup_off; } diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index e41c1bd0a3ea943ba2ddfec63102a8bfa4525136..6a53eed9c95d56582aa7df173cfb20c2d5dde96a 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.31 2001/10/25 05:49:21 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.32 2002/03/06 20:49:41 momjian Exp $ * * NOTES * Overflow pages look like ordinary relation pages. @@ -73,11 +73,11 @@ _hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf) ovflopaque->hasho_flag = LH_OVERFLOW_PAGE; ovflopaque->hasho_oaddr = oaddr; ovflopaque->hasho_bucket = pageopaque->hasho_bucket; - _hash_wrtnorelbuf(rel, ovflbuf); + _hash_wrtnorelbuf(ovflbuf); /* logically chain overflow page to previous page */ pageopaque->hasho_nextblkno = ovflblkno; - _hash_wrtnorelbuf(rel, buf); + _hash_wrtnorelbuf(buf); return ovflbuf; } @@ -574,7 +574,7 @@ _hash_squeezebucket(Relation rel, * the "next" ItemId. */ PageIndexTupleDelete(rpage, roffnum); - _hash_wrtnorelbuf(rel, rbuf); + _hash_wrtnorelbuf(rbuf); /* * if the "read" page is now empty because of the deletion, free diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 668f0b16d097e1f95f338270d5689110007bb607..5d75b1e9b696768b58e3ee6f2da0e1794dacbc15 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.34 2002/01/15 22:14:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.35 2002/03/06 20:49:42 momjian Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque @@ -151,7 +151,7 @@ _hash_metapinit(Relation rel) elog(ERROR, "Problem with _hash_initbitmap."); /* all done */ - _hash_wrtnorelbuf(rel, metabuf); + _hash_wrtnorelbuf(metabuf); /* * initialize the first two buckets @@ -260,7 +260,7 @@ _hash_wrtbuf(Relation rel, Buffer buf) * or a reference to the buffer. */ void -_hash_wrtnorelbuf(Relation rel, Buffer buf) +_hash_wrtnorelbuf(Buffer buf) { BlockNumber blkno; @@ -383,7 +383,7 @@ _hash_pagedel(Relation rel, ItemPointer tid) opaque = (HashPageOpaque) PageGetSpecialPointer(page); PageIndexTupleDelete(page, offno); - _hash_wrtnorelbuf(rel, buf); + _hash_wrtnorelbuf(buf); if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) { @@ -505,7 +505,7 @@ _hash_splitpage(Relation rel, nopaque->hasho_flag = LH_BUCKET_PAGE; nopaque->hasho_oaddr = InvalidOvflAddress; nopaque->hasho_bucket = nbucket; - _hash_wrtnorelbuf(rel, nbuf); + _hash_wrtnorelbuf(nbuf); /* * make sure the old bucket isn't empty. advance 'opage' and friends @@ -628,7 +628,7 @@ _hash_splitpage(Relation rel, == InvalidOffsetNumber) elog(ERROR, "_hash_splitpage: failed to add index item to %s", RelationGetRelationName(rel)); - _hash_wrtnorelbuf(rel, nbuf); + _hash_wrtnorelbuf(nbuf); /* * now delete the tuple from the old bucket. after this @@ -640,7 +640,7 @@ _hash_splitpage(Relation rel, * instead of calling PageGetMaxOffsetNumber. */ PageIndexTupleDelete(opage, ooffnum); - _hash_wrtnorelbuf(rel, obuf); + _hash_wrtnorelbuf(obuf); omaxoffnum = OffsetNumberPrev(omaxoffnum); /* diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c index 4680061118375f2d79c5ae779559e00d19635fd1..dd16d887fbff61bb12efceeb1749928baa55d795 100644 --- a/src/backend/access/hash/hashutil.c +++ b/src/backend/access/hash/hashutil.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * - * btutils.c - * Utility code for Postgres btree implementation. + * hashutil.c + * Utility code for Postgres hash implementation. * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.27 2001/10/06 23:21:43 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.28 2002/03/06 20:49:43 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -21,7 +21,7 @@ ScanKey -_hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap) +_hash_mkscankey(Relation rel, IndexTuple itup) { ScanKey skey; TupleDesc itupdesc; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index bf9bf6eeb0f5120318eeccf024714312ed043235..5645eb3160937cfaf86231d8e6260c537dcdf16b 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * - * $Id: nodeHash.c,v 1.60 2001/10/25 05:49:28 momjian Exp $ + * $Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -115,8 +115,7 @@ ExecInitHash(Hash *node, EState *estate, Plan *parent) HashState *hashstate; Plan *outerPlan; - SO1_printf("ExecInitHash: %s\n", - "initializing hash node"); + SO_printf("ExecInitHash: initializing hash node\n"); /* * assign the node's execution state diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index ecf0f87c7494f38f91f1aed03ee7e786e2cb96dd..6cb7b65fe3f3b9ac98abb140809daf989b8568b2 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/cache/catcache.c,v 1.91 2002/03/06 06:10:21 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/cache/catcache.c,v 1.92 2002/03/06 20:49:45 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -39,7 +39,7 @@ /* * Constants related to size of the catcache. * - * NCCBUCKETS should be prime and must be less than 64K (because + * NCCBUCKETS must be a power of two and must be less than 64K (because * SharedInvalCatcacheMsg crams hash indexes into a uint16 field). In * practice it should be a lot less, anyway, to avoid chewing up too much * space on hash bucket headers. @@ -47,9 +47,16 @@ * MAXCCTUPLES could be as small as a few hundred, if per-backend memory * consumption is at a premium. */ -#define NCCBUCKETS 257 /* Hash buckets per CatCache */ +#define NCCBUCKETS 256 /* Hash buckets per CatCache */ #define MAXCCTUPLES 5000 /* Maximum # of tuples in all caches */ +/* + * Given a hash value and the size of the hash table, find the bucket + * in which the hash value belongs. Since the hash table must contain + * a power-of-2 number of elements, this is a simple bitmask. + */ +#define HASH_INDEX(h, sz) ((Index) ((h) & ((sz) - 1))) + /* * variables, macros and other stuff @@ -380,7 +387,7 @@ CatalogCacheIdInvalidate(int cacheId, /* * inspect the proper hash bucket for matches */ - hashIndex = (Index) (hashValue % (uint32) ccp->cc_size); + hashIndex = HASH_INDEX(hashValue, ccp->cc_nbuckets); for (elt = DLGetHead(&ccp->cc_bucket[hashIndex]); elt; elt = nextelt) { @@ -490,7 +497,7 @@ ResetCatalogCache(CatCache *cache) int i; /* Remove each tuple in this cache, or at least mark it dead */ - for (i = 0; i < cache->cc_size; i++) + for (i = 0; i < cache->cc_nbuckets; i++) { Dlelem *elt, *nextelt; @@ -578,7 +585,7 @@ CatalogCacheFlushRelation(Oid relId) continue; /* nope, leave it alone */ /* Yes, scan the tuples and remove those related to relId */ - for (i = 0; i < cache->cc_size; i++) + for (i = 0; i < cache->cc_nbuckets; i++) { Dlelem *elt, *nextelt; @@ -640,7 +647,7 @@ CatalogCacheFlushRelation(Oid relId) #define InitCatCache_DEBUG1 \ do { \ elog(DEBUG1, "InitCatCache: rel=%s id=%d nkeys=%d size=%d\n", \ - cp->cc_relname, cp->id, cp->cc_nkeys, cp->cc_size); \ + cp->cc_relname, cp->id, cp->cc_nkeys, cp->cc_nbuckets); \ } while(0) #else @@ -705,7 +712,7 @@ InitCatCache(int id, cp->cc_tupdesc = (TupleDesc) NULL; cp->cc_reloidattr = reloidattr; cp->cc_ntup = 0; - cp->cc_size = NCCBUCKETS; + cp->cc_nbuckets = NCCBUCKETS; cp->cc_nkeys = nkeys; for (i = 0; i < nkeys; ++i) cp->cc_key[i] = key[i]; @@ -985,7 +992,7 @@ SearchCatCache(CatCache *cache, * find the hash bucket in which to look for the tuple */ hashValue = CatalogCacheComputeHashValue(cache, cur_skey); - hashIndex = (Index) (hashValue % (uint32) cache->cc_size); + hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets); /* * scan the hash bucket until we find a match or exhaust our tuples diff --git a/src/include/access/hash.h b/src/include/access/hash.h index c88f34f54a93f7f877a34801c9ec14c3bc9e100e..42c0558b64e80953f2330f4837f61e1cf826ebf6 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: hash.h,v 1.43 2002/02/25 04:06:52 momjian Exp $ + * $Id: hash.h,v 1.44 2002/03/06 20:49:45 momjian Exp $ * * NOTES * modeled after Margo Seltzer's hash implementation for unix. @@ -265,8 +265,7 @@ extern Datum hashoidvector(PG_FUNCTION_ARGS); extern Datum hashint2vector(PG_FUNCTION_ARGS); extern Datum hashname(PG_FUNCTION_ARGS); extern Datum hashvarlena(PG_FUNCTION_ARGS); -extern Datum hash_any(const char *keydata, int keylen); - +extern Datum hash_any(register const char *k, register int keylen); /* private routines */ @@ -288,7 +287,7 @@ extern void _hash_metapinit(Relation rel); extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access); extern void _hash_relbuf(Relation rel, Buffer buf, int access); extern void _hash_wrtbuf(Relation rel, Buffer buf); -extern void _hash_wrtnorelbuf(Relation rel, Buffer buf); +extern void _hash_wrtnorelbuf(Buffer buf); extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access, int to_access); extern void _hash_pageinit(Page page, Size size); @@ -313,8 +312,7 @@ extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, /* hashutil.c */ -extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup, - HashMetaPage metap); +extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup); extern void _hash_freeskey(ScanKey skey); extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup); extern HashItem _hash_formitem(IndexTuple itup); diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index cf820e37b8f4f6ba50630b9e204c05e06111c376..034c061e7d4b2d84b638de24f6921710b8fee6b1 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -13,7 +13,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: catcache.h,v 1.39 2002/03/03 17:47:56 tgl Exp $ + * $Id: catcache.h,v 1.40 2002/03/06 20:49:46 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -40,8 +40,8 @@ typedef struct catcache TupleDesc cc_tupdesc; /* tuple descriptor (copied from reldesc) */ int cc_reloidattr; /* AttrNumber of relation OID attr, or 0 */ int cc_ntup; /* # of tuples currently in this cache */ - int cc_size; /* # of hash buckets in this cache */ - int cc_nkeys; /* number of keys (1..4) */ + int cc_nbuckets; /* # of hash buckets in this cache */ + int cc_nkeys; /* # of keys (1..4) */ int cc_key[4]; /* AttrNumber of each key */ PGFunction cc_hashfunc[4]; /* hash function to use for each key */ ScanKeyData cc_skey[4]; /* precomputed key info for heap scans */