diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index 974a1b7ae4e545e6023947d3cc7aa9319079a727..ba4a10313cbaace24fa85efd928d73ef0383845b 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.3 2007/09/07 16:03:40 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.4 2007/09/11 08:46:29 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -25,13 +25,12 @@ gin_extract_tsvector(PG_FUNCTION_ARGS) int32 *nentries = (int32 *) PG_GETARG_POINTER(1); Datum *entries = NULL; - *nentries = 0; + *nentries = vector->size; if (vector->size > 0) { int i; WordEntry *we = ARRPTR(vector); - *nentries = (uint32) vector->size; entries = (Datum *) palloc(sizeof(Datum) * vector->size); for (i = 0; i < vector->size; i++) @@ -134,11 +133,19 @@ gin_ts_consistent(PG_FUNCTION_ARGS) if (query->size > 0) { - int4 i, + int i, j = 0; QueryItem *item; GinChkVal gcv; + /* + * check-parameter array has one entry for each value (operand) in the + * query. We expand that array into mapped_check, so that there's one + * entry in mapped_check for every node in the query, including + * operators, to allow quick lookups in checkcondition_gin. Only the + * entries corresponding operands are actually used. + */ + gcv.frst = item = GETQUERY(query); gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size); diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index 4fc51378b4bf5c70cbfc5e3e7d16195e7597f79c..985b917d0f022446828b4fa56d9120b495aecc9a 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.4 2007/09/11 08:46:29 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -133,20 +133,27 @@ gtsvectorout(PG_FUNCTION_ARGS) } static int -compareint(const void *a, const void *b) +compareint(const void *va, const void *vb) { - if (*((int4 *) a) == *((int4 *) b)) + int4 a = *((int4 *) va); + int4 b = *((int4 *) vb); + + if (a == b) return 0; - return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1; + return (a > b) ? 1 : -1; } +/* + * Removes duplicates from an array of int4. 'l' is + * size of the input array. Returns the new size of the array. + */ static int uniqueint(int4 *a, int4 l) { int4 *ptr, *res; - if (l == 1) + if (l <= 1) return l; ptr = res = a; @@ -570,12 +577,15 @@ typedef struct } SPLITCOST; static int -comparecost(const void *a, const void *b) +comparecost(const void *va, const void *vb) { - if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) + SPLITCOST *a = (SPLITCOST *) va; + SPLITCOST *b = (SPLITCOST *) vb; + + if (a->cost == b->cost) return 0; else - return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; + return (a->cost > b->cost) ? 1 : -1; } diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index 535a3541bf75cfced4baddb8e254bbcbf21e7321..453b67df431d53e58f332ebc925688b24789a3ca 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.4 2007/09/07 16:03:40 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.5 2007/09/11 08:46:29 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -53,22 +53,24 @@ cnt_length(TSVector t) { WordEntry *ptr = ARRPTR(t), *end = (WordEntry *) STRPTR(t); - int len = 0, - clen; + int len = 0; while (ptr < end) { - if ((clen = POSDATALEN(t, ptr)) == 0) + int clen = POSDATALEN(t, ptr); + + if (clen == 0) len += 1; else len += clen; + ptr++; } return len; } -static int4 +static int WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item) { if (ptr->len == item->length) @@ -80,6 +82,10 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item return (ptr->len > item->length) ? 1 : -1; } +/* + * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q' + * is the TSQuery containing 'item'. Returns NULL if not found. + */ static WordEntry * find_wordentry(TSVector t, TSQuery q, QueryOperand *item) { @@ -178,15 +184,15 @@ SortAndUniqItems(TSQuery q, int *size) } /* A dummy WordEntryPos array to use when haspos is false */ -static WordEntryPos POSNULL[] = { +static WordEntryPosVector POSNULL = { 1, /* Number of elements that follow */ - 0 + { 0 } }; static float calc_rank_and(float *w, TSVector t, TSQuery q) { - uint16 **pos; + WordEntryPosVector **pos; int i, k, l, @@ -207,9 +213,8 @@ calc_rank_and(float *w, TSVector t, TSQuery q) pfree(item); return calc_rank_or(w, t, q); } - pos = (uint16 **) palloc(sizeof(uint16 *) * q->size); - memset(pos, 0, sizeof(uint16 *) * q->size); - WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1); + pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size); + WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1); for (i = 0; i < size; i++) { @@ -218,25 +223,25 @@ calc_rank_and(float *w, TSVector t, TSQuery q) continue; if (entry->haspos) - pos[i] = (uint16 *) _POSDATAPTR(t, entry); + pos[i] = _POSVECPTR(t, entry); else - pos[i] = (uint16 *) POSNULL; + pos[i] = &POSNULL; - dimt = *(uint16 *) (pos[i]); - post = (WordEntryPos *) (pos[i] + 1); + dimt = pos[i]->npos; + post = pos[i]->pos; for (k = 0; k < i; k++) { if (!pos[k]) continue; - lenct = *(uint16 *) (pos[k]); - ct = (WordEntryPos *) (pos[k] + 1); + lenct = pos[k]->npos; + ct = pos[k]->pos; for (l = 0; l < dimt; l++) { for (p = 0; p < lenct; p++) { dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); - if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL))) + if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL))) { float curw; @@ -285,8 +290,8 @@ calc_rank_or(float *w, TSVector t, TSQuery q) } else { - dimt = *(uint16 *) POSNULL; - post = POSNULL + 1; + dimt = POSNULL.npos; + post = POSNULL.pos; } resj = 0.0; @@ -456,17 +461,19 @@ typedef struct { QueryItem **item; int16 nitem; - bool needfree; uint8 wclass; int32 pos; } DocRepresentation; static int -compareDocR(const void *a, const void *b) +compareDocR(const void *va, const void *vb) { - if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos) + DocRepresentation *a = (DocRepresentation *) va; + DocRepresentation *b = (DocRepresentation *) vb; + + if (a->pos == b->pos) return 0; - return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1; + return (a->pos > b->pos) ? 1 : -1; } static bool @@ -547,11 +554,11 @@ Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext) ptr = doc + lastpos; - /* find lower bound of cover from founded upper bound, move down */ + /* find lower bound of cover from found upper bound, move down */ while (ptr >= doc + ext->pos) { for (i = 0; i < ptr->nitem; i++) - if(ptr->item[i]->type == QI_VAL) /* XXX */ + if(ptr->item[i]->type == QI_VAL) ptr->item[i]->operand.istrue = 1; if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand)) { @@ -620,8 +627,8 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) } else { - dimt = *(uint16 *) POSNULL; - post = POSNULL + 1; + dimt = POSNULL.npos; + post = POSNULL.pos; } while (cur + dimt >= len) @@ -636,7 +643,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) { int k; - doc[cur].needfree = false; doc[cur].nitem = 0; doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size); @@ -658,7 +664,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) } else { - doc[cur].needfree = false; doc[cur].nitem = doc[cur - 1].nitem; doc[cur].item = doc[cur - 1].item; } @@ -764,9 +769,6 @@ calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method) if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) Wdoc /= log((double) (txt->size + 1)) / log(2.0); - for (i = 0; i < doclen; i++) - if (doc[i].needfree) - pfree(doc[i].item); pfree(doc); return (float4) Wdoc; diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 8e7593513ff70baf7f365b5a723b00874a4437ff..e150f9a267837f79d63385d4e559f0637fbd1e70 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.4 2007/09/07 16:03:40 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.5 2007/09/11 08:46:29 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -269,7 +269,7 @@ compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b) static int4 add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos) { - uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr); + uint16 *clen = &_POSVECPTR(dest, destptr)->npos; int i; uint16 slen = POSDATALEN(src, srcptr), startlen; @@ -354,7 +354,7 @@ tsvector_concat(PG_FUNCTION_ARGS) if (ptr->haspos) { cur += SHORTALIGN(ptr1->len); - memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } else @@ -399,7 +399,7 @@ tsvector_concat(PG_FUNCTION_ARGS) cur += SHORTALIGN(ptr1->len); if (ptr1->haspos) { - memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); if (ptr2->haspos) cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); @@ -434,7 +434,7 @@ tsvector_concat(PG_FUNCTION_ARGS) if (ptr->haspos) { cur += SHORTALIGN(ptr1->len); - memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } else @@ -499,10 +499,17 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item) * check weight info */ static bool -checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item) +checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item) { - WordEntryPos *ptr = (WordEntryPos *) (chkval->values + SHORTALIGN(val->pos + val->len) + sizeof(uint16)); - uint16 len = *((uint16 *) (chkval->values + SHORTALIGN(val->pos + val->len))); + WordEntryPosVector *posvec; + WordEntryPos *ptr; + uint16 len; + + posvec = (WordEntryPosVector *) + (chkval->values + SHORTALIGN(val->pos + val->len)); + + len = posvec->npos; + ptr = posvec->pos; while (len--) { @@ -674,7 +681,13 @@ ts_match_tq(PG_FUNCTION_ARGS) } /* - * Statistics of tsvector + * ts_stat statistic function support + */ + + +/* + * Returns the number of positions in value 'wptr' within tsvector 'txt', + * that have a weight equal to one of the weights in 'weight' bitmask. */ static int check_weight(TSVector txt, WordEntry * wptr, int8 weight) @@ -824,6 +837,18 @@ formstat(tsstat * stat, TSVector txt, WordEntry ** entry, uint32 len) return newstat; } +/* + * This is written like a custom aggregate function, because the + * original plan was to do just that. Unfortunately, an aggregate function + * can't return a set, so that plan was abandoned. If that limitation is + * lifted in the future, ts_stat could be a real aggregate function so that + * you could use it like this: + * + * SELECT ts_stat(vector_column) FROM vector_table; + * + * where vector_column is a tsvector-type column in vector_table. + */ + static tsstat * ts_accum(tsstat * stat, Datum data) { diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h index 0aa95e892cc00325624995039d6d8ebf72ee9f03..107fc4a71127fde7422deb21af99e3c2c9f46441 100644 --- a/src/include/tsearch/ts_type.h +++ b/src/include/tsearch/ts_type.h @@ -5,7 +5,7 @@ * * Copyright (c) 1998-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.4 2007/09/07 16:03:40 teodor Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.5 2007/09/11 08:46:29 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -43,6 +43,13 @@ typedef struct typedef uint16 WordEntryPos; +typedef struct +{ + uint16 npos; + WordEntryPos pos[1]; /* var length */ +} WordEntryPosVector; + + #define WEP_GETWEIGHT(x) ( (x) >> 14 ) #define WEP_GETPOS(x) ( (x) & 0x3fff ) @@ -88,9 +95,9 @@ typedef TSVectorData *TSVector; /* returns a pointer to the beginning of lexemes */ #define STRPTR(x) ( (char *) &(x)->entries[x->size] ) -#define _POSDATAPTR(x,e) (STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)) -#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) -#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) ) +#define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))) +#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 ) +#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos) /* * fmgr interface macros