diff --git a/contrib/intarray/_int.c b/contrib/intarray/_int.c index a642998cd444003040af75884865a8bc06e5f0dc..35ee7a659e2328c6614590e784069a6df415e0b0 100644 --- a/contrib/intarray/_int.c +++ b/contrib/intarray/_int.c @@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec, v->spl_nleft = 0; right = v->spl_right; v->spl_nright = 0; + if ( seed_1 == 0 || seed_2 == 0 ) { + seed_1 = 1; + seed_2 = 2; + } datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key); datum_l = copy_intArrayType(datum_alpha); diff --git a/contrib/tsearch/README.tsearch b/contrib/tsearch/README.tsearch index 96059893fa693bf27b9b919a8cde99f19c1f4422..c63ae91edd096cfbf4646e565f4cd1c41b08e85e 100644 --- a/contrib/tsearch/README.tsearch +++ b/contrib/tsearch/README.tsearch @@ -198,23 +198,6 @@ Don't forget to do make clean; make; make install 2. -As it was mentioned above we don't use explicitly ID of lexems -as in OpenFTS but use hash function (crc32) instead to map lexem to -integer. Our experiments show that probability of collision is quite small: -for english text it's about 10**(-6) and 10**(-5) for russian collection. -Default installation doesn't check for collisions but if your application -does need to guarantee an exact (no collisions) search, you need -to update system table to mark index islossy: - - update pg_amop set amopreqcheck = true where amopclaid = - (select oid from pg_opclass where opcname = 'gist_txtidx_ops'); - -If you don't bother about collisions : - - update pg_amop set amopreqcheck = false where amopclaid = - (select oid from pg_opclass where opcname = 'gist_txtidx_ops'); - -3. txtidx doesn't preserve words ordering (this is not critical for searching) for performance reason, for example: @@ -224,7 +207,7 @@ test=# select 'page two'::txtidx; 'two' 'page' (1 row) -4. +3. Indexed access provided by txtidx data type isn't always good because of internal data structure we use (RD-Tree). Particularly, queries like '!gist' will be slower than just a sequential scan, @@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt ); These two queries will be processed by scanning of full index ! Very slow ! -5. +4. Following selects produce the same result select title from titles where titleidx @@ 'patch&gist'; diff --git a/contrib/tsearch/gistidx.c b/contrib/tsearch/gistidx.c index 376b21a307d0c10df95b1069aaa30935c46d1ac8..5e527036840c49c1bb0b86617952727383fd3036 100644 --- a/contrib/tsearch/gistidx.c +++ b/contrib/tsearch/gistidx.c @@ -10,6 +10,7 @@ #include "utils/array.h" #include "utils/builtins.h" #include "storage/bufpage.h" +#include "access/tuptoaster.h" #include "txtidx.h" #include "query.h" @@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) { return res + 1 - a; } +static void +makesign( BITVECP sign, GISTTYPE *a) { + int4 k,len = ARRNELEM( a ); + int4 *ptr = GETARR( a ); + MemSet( (void*)sign, 0, sizeof(BITVEC) ); + for(k=0;k<len;k++) + HASH( sign, ptr[k] ); +} + Datum gtxtidx_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0); @@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) { *arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len ); arr++; ptr++; } - if ( val != toastedval ) - pfree(val); len = uniqueint( GETARR(res), val->size ); if ( len != val->size ) { @@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) { len = CALCGTSIZE( ARRKEY, len ); res = (GISTTYPE*)repalloc( (void*)res, len ); res->len = len; - } + } + if ( val != toastedval ) + pfree(val); + + /* make signature, if array is too long */ + if ( res->len > TOAST_INDEX_TARGET ) { + GISTTYPE *ressign; + + len = CALCGTSIZE( SIGNKEY, 0 ); + ressign = (GISTTYPE*)palloc( len ); + ressign->len = len; + ressign->flag = SIGNKEY; + makesign( GETSIGN(ressign), res ); + pfree(res); + res = ressign; + } retval = (GISTENTRY*)palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), @@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) { PG_RETURN_POINTER( penalty ); } -static void -makesign( BITVECP sign, GISTTYPE *a) { - int4 k,len = ARRNELEM( a ); - int4 *ptr = GETARR( a ); - MemSet( (void*)sign, 0, sizeof(BITVEC) ); - for(k=0;k<len;k++) - HASH( sign, ptr[k] ); -} - typedef struct { bool allistrue; BITVEC sign; @@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) { right = v->spl_right; v->spl_nright = 0; + if ( seed_1 == 0 || seed_2 == 0 ) { + seed_1 = 1; + seed_2 = 2; + } + /* form initial .. */ if ( cache[seed_1].allistrue ) { datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) ); diff --git a/contrib/tsearch/tsearch.sql.in b/contrib/tsearch/tsearch.sql.in index 17317f255842f76038eedaaf549ad22925394db4..48a5ae27e62183027b6db5447141bafc22f8996c 100644 --- a/contrib/tsearch/tsearch.sql.in +++ b/contrib/tsearch/tsearch.sql.in @@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid and ( tq.typname='query_txt' or tq.typname='mquery_txt' ); INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) - SELECT opcl.oid, 1, false, c.opoid + SELECT opcl.oid, 1, true, c.opoid FROM pg_opclass opcl, txtidx_ops_tmp c WHERE opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') @@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) and c.oprname = '@@'; INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) - SELECT opcl.oid, 2, false, c.opoid + SELECT opcl.oid, 2, true, c.opoid FROM pg_opclass opcl, txtidx_ops_tmp c WHERE opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')