From 484a0fa45480957222759bbeb541cc2eb6226963 Mon Sep 17 00:00:00 2001 From: Bruce Momjian <bruce@momjian.us> Date: Sun, 23 Sep 2001 04:16:16 +0000 Subject: [PATCH] please apply attached patch to current CVS. Changes: 1. Added support for boolean queries (indexable operator @@, looks like a @@ '1|(2&3)' 2. Some code cleanup and optimization Regards, Oleg --- contrib/intarray/README.intarray | 4 + contrib/intarray/_int.c | 862 ++++++++++++++++++++++++++--- contrib/intarray/_int.sql.in | 92 ++- contrib/intarray/expected/_int.out | 331 +++++++++++ contrib/intarray/sql/_int.sql | 58 ++ 5 files changed, 1256 insertions(+), 91 deletions(-) diff --git a/contrib/intarray/README.intarray b/contrib/intarray/README.intarray index 8e292126c0e..7acff125a50 100644 --- a/contrib/intarray/README.intarray +++ b/contrib/intarray/README.intarray @@ -12,6 +12,10 @@ for additional information. CHANGES: +September 21, 2001 + 1. Added support for boolean query (indexable operator @@, looks like + a @@ '1|(2&3)', perfomance is better in any case ) + 2. Done some small optimizations March 19, 2001 1. Added support for toastable keys 2. Improved split algorithm for intbig (selection speedup is about 30%) diff --git a/contrib/intarray/_int.c b/contrib/intarray/_int.c index fa3ed8e9add..fc75d47b9ab 100644 --- a/contrib/intarray/_int.c +++ b/contrib/intarray/_int.c @@ -5,6 +5,7 @@ ******************************************************************************/ /* +#define BS_DEBUG #define GIST_DEBUG #define GIST_QUERY_DEBUG */ @@ -79,10 +80,13 @@ typedef char *BITVECP; } /* beware of multiple evaluation of arguments to these macros! */ -#define GETBYTEBIT(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) ) -#define CLRBIT(x,i) GETBYTEBIT(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) ) -#define SETBIT(x,i) GETBYTEBIT(x,i) |= ( 0x01 << ( (i) % BITBYTE ) ) -#define GETBIT(x,i) ( (GETBYTEBIT(x,i) >> ( (i) % BITBYTE )) & 0x01 ) +#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) ) +#define GETBITBYTE(x,i) ( *((char*)x) >> i & 0x01 ) +#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) ) +#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) ) +#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 ) +#define HASHVAL(val) ((val) % SIGLENBIT) +#define HASH(sign, val) SETBIT((sign), HASHVAL(val)) #ifdef GIST_DEBUG @@ -220,7 +224,51 @@ static ArrayType *_intbig_union(ArrayType *a, ArrayType *b); static ArrayType * _intbig_inter(ArrayType *a, ArrayType *b); static void rt__intbig_size(ArrayType *a, float *sz); -static void gensign(BITVEC sign, int *a, int len); + + + +/***************************************************************************** + * Boolean Search + *****************************************************************************/ + +#define BooleanSearchStrategy 20 + +/* + * item in polish notation with back link + * to left operand + */ +typedef struct ITEM { + int2 type; + int2 left; + int4 val; +} ITEM; + +typedef struct { + int4 len; + int4 size; + char data[1]; +} QUERYTYPE; + +#define HDRSIZEQT ( 2*sizeof(int4) ) +#define COMPUTESIZE(size) ( HDRSIZEQT + size * sizeof(ITEM) ) +#define GETQUERY(x) (ITEM*)( (char*)(x)+HDRSIZEQT ) + +PG_FUNCTION_INFO_V1(bqarr_in); +PG_FUNCTION_INFO_V1(bqarr_out); +Datum bqarr_in(PG_FUNCTION_ARGS); +Datum bqarr_out(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(boolop); +Datum boolop(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(rboolop); +Datum rboolop(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(querytree); +Datum querytree(PG_FUNCTION_ARGS); + +static bool signconsistent( QUERYTYPE *query, BITVEC sign, bool leaf ); +static bool execconsistent( QUERYTYPE *query, ArrayType *array, bool leaf ); /***************************************************************************** * GiST functions @@ -239,11 +287,17 @@ g_int_consistent(PG_FUNCTION_ARGS) { StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); bool retval; + if ( strategy == BooleanSearchStrategy ) + PG_RETURN_BOOL(execconsistent( (QUERYTYPE*)query, + (ArrayType *) DatumGetPointer(entry->key), + ( ARRNELEMS(DatumGetPointer(entry->key))< 2 * MAXNUMRANGE ) ? + GIST_LEAF(entry) : false ) ); + /* sort query for fast search, key is already sorted */ /* XXX are we sure it's safe to scribble on the query object here? */ /* XXX what about toasted input? */ - if (ARRISNULL(query)) - return FALSE; + if ( ARRISVOID( query ) ) + PG_RETURN_BOOL(false); PREPAREARR(query); switch (strategy) @@ -463,14 +517,24 @@ g_int_picksplit(PG_FUNCTION_ARGS) Datum g_int_same(PG_FUNCTION_ARGS) { + ArrayType *a = (ArrayType*)PointerGetDatum(PG_GETARG_POINTER(0)); + ArrayType *b = (ArrayType*)PointerGetDatum(PG_GETARG_POINTER(1)); bool *result = (bool *)PG_GETARG_POINTER(2); - *result = DatumGetBool( - DirectFunctionCall2( - _int_same, - PointerGetDatum(PG_GETARG_POINTER(0)), - PointerGetDatum(PG_GETARG_POINTER(1)) - ) - ); + int4 n = ARRNELEMS(a); + int4 *da, *db; + + if ( n != ARRNELEMS(b) ) { + *result = false; + PG_RETURN_POINTER(result); + } + *result = TRUE; + da = ARRPTR(a); + db = ARRPTR(b); + while(n--) + if (*da++ != *db++) { + *result = FALSE; + break; + } PG_RETURN_POINTER(result); } @@ -490,24 +554,19 @@ _int_contained(PG_FUNCTION_ARGS) Datum _int_contains(PG_FUNCTION_ARGS) { - ArrayType *a = (ArrayType *)PG_GETARG_POINTER(0); - ArrayType *b = (ArrayType *)PG_GETARG_POINTER(1); + ArrayType *a = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + ArrayType *b = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); bool res; - ArrayType *an, - *bn; if (ARRISNULL(a) || ARRISNULL(b)) return FALSE; - an = copy_intArrayType(a); - bn = copy_intArrayType(b); + PREPAREARR(a); + PREPAREARR(b); - PREPAREARR(an); - PREPAREARR(bn); - - res = inner_int_contains(an, bn); - pfree(an); - pfree(bn); + res = inner_int_contains(a, b); + pfree(a); + pfree(b); PG_RETURN_BOOL( res ); } @@ -569,31 +628,26 @@ _int_different(PG_FUNCTION_ARGS) Datum _int_same(PG_FUNCTION_ARGS) { - ArrayType *a = (ArrayType *)PG_GETARG_POINTER(0); - ArrayType *b = (ArrayType *)PG_GETARG_POINTER(1); + ArrayType *a = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + ArrayType *b = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); int na, nb; int n; int *da, *db; bool result; - ArrayType *an, - *bn; bool anull = ARRISNULL(a); bool bnull = ARRISNULL(b); if (anull || bnull) return (anull && bnull) ? TRUE : FALSE; - an = copy_intArrayType(a); - bn = copy_intArrayType(b); - - SORT(an); - SORT(bn); - na = ARRNELEMS(an); - nb = ARRNELEMS(bn); - da = ARRPTR(an); - db = ARRPTR(bn); + SORT(a); + SORT(b); + na = ARRNELEMS(a); + nb = ARRNELEMS(b); + da = ARRPTR(a); + db = ARRPTR(b); result = FALSE; @@ -608,8 +662,8 @@ _int_same(PG_FUNCTION_ARGS) } } - pfree(an); - pfree(bn); + pfree(a); + pfree(b); PG_RETURN_BOOL(result); } @@ -619,25 +673,20 @@ _int_same(PG_FUNCTION_ARGS) Datum _int_overlap(PG_FUNCTION_ARGS) { - ArrayType *a = (ArrayType *)PG_GETARG_POINTER(0); - ArrayType *b = (ArrayType *)PG_GETARG_POINTER(1); + ArrayType *a = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + ArrayType *b = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); bool result; - ArrayType *an, - *bn; if (ARRISNULL(a) || ARRISNULL(b)) return FALSE; - an = copy_intArrayType(a); - bn = copy_intArrayType(b); - - SORT(an); - SORT(bn); + SORT(a); + SORT(b); - result = inner_int_overlap(an, bn); + result = inner_int_overlap(a, b); - pfree(an); - pfree(bn); + pfree(a); + pfree(b); PG_RETURN_BOOL( result ); } @@ -679,26 +728,21 @@ inner_int_overlap(ArrayType *a, ArrayType *b) Datum _int_union(PG_FUNCTION_ARGS) { - ArrayType *a = (ArrayType *)PG_GETARG_POINTER(0); - ArrayType *b = (ArrayType *)PG_GETARG_POINTER(1); + ArrayType *a = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + ArrayType *b = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); ArrayType *result; - ArrayType *an, - *bn; - an = copy_intArrayType(a); - bn = copy_intArrayType(b); + if (!ARRISNULL(a)) + SORT(a); + if (!ARRISNULL(b)) + SORT(b); - if (!ARRISNULL(an)) - SORT(an); - if (!ARRISNULL(bn)) - SORT(bn); + result = inner_int_union(a, b); - result = inner_int_union(an, bn); - - if (an) - pfree(an); - if (bn) - pfree(bn); + if (a) + pfree(a); + if (b) + pfree(b); PG_RETURN_POINTER( result ); } @@ -763,25 +807,20 @@ inner_int_union(ArrayType *a, ArrayType *b) Datum _int_inter(PG_FUNCTION_ARGS) { - ArrayType *a = (ArrayType *)PG_GETARG_POINTER(0); - ArrayType *b = (ArrayType *)PG_GETARG_POINTER(1); + ArrayType *a = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + ArrayType *b = (ArrayType *)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); ArrayType *result; - ArrayType *an, - *bn; if (ARRISNULL(a) || ARRISNULL(b)) PG_RETURN_POINTER(new_intArrayType(0)); - an = copy_intArrayType(a); - bn = copy_intArrayType(b); + SORT(a); + SORT(b); - SORT(an); - SORT(bn); + result = inner_int_inter(a, b); - result = inner_int_inter(an, bn); - - pfree(an); - pfree(bn); + pfree(a); + pfree(b); PG_RETURN_POINTER( result ); } @@ -970,7 +1009,7 @@ gensign(BITVEC sign, int *a, int len) /* we assume that the sign vector is previously zeroed */ for (i = 0; i < len; i++) { - SETBIT(sign, (*a) % SIGLENBIT); + HASH(sign, *a); a++; } } @@ -1022,7 +1061,20 @@ rt__intbig_size(ArrayType *a, float *sz) } bv = SIGPTR(a); - LOOPBIT(len += GETBIT(bv, i)); + + LOOPBYTE( + len += + GETBITBYTE(bv,0) + + GETBITBYTE(bv,1) + + GETBITBYTE(bv,2) + + GETBITBYTE(bv,3) + + GETBITBYTE(bv,4) + + GETBITBYTE(bv,5) + + GETBITBYTE(bv,6) + + GETBITBYTE(bv,7) ; + bv = (BITVECP) ( ((char*)bv) + 1 ); + ); + *sz = (float) len; return; } @@ -1262,8 +1314,13 @@ g_intbig_consistent(PG_FUNCTION_ARGS) { bool retval; ArrayType *q; + if ( strategy == BooleanSearchStrategy ) + PG_RETURN_BOOL(signconsistent( (QUERYTYPE*)query, + SIGPTR((ArrayType *) DatumGetPointer(entry->key)), + false ) ); + /* XXX what about toasted input? */ - if (ARRISNULL(query)) + if (ARRISVOID(query)) return FALSE; q = new_intArrayType(SIGLENINT); @@ -1417,7 +1474,6 @@ _int_common_picksplit(bytea *entryvec, firsttime = true; waste = 0.0; - for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[i].key); @@ -1552,3 +1608,639 @@ _int_common_picksplit(bytea *entryvec, #endif return v; } + +/***************************************************************************** + * BoolSearch + *****************************************************************************/ + + +#define END 0 +#define ERR 1 +#define VAL 2 +#define OPR 3 +#define OPEN 4 +#define CLOSE 5 + +/* parser's states */ +#define WAITOPERAND 1 +#define WAITENDOPERAND 2 +#define WAITOPERATOR 3 + +/* + * node of query tree, also used + * for storing polish notation in parser + */ +typedef struct NODE { + int4 type; + int4 val; + struct NODE *next; +} NODE; + +typedef struct { + char *buf; + int4 state; + int4 count; + /* reverse polish notation in list (for temprorary usage)*/ + NODE *str; + /* number in str */ + int4 num; +} WORKSTATE; + +/* + * get token from query string + */ +static int4 +gettoken( WORKSTATE* state, int4* val ) { + char nnn[16], *curnnn; + + curnnn=nnn; + while(1) { + switch(state->state) { + case WAITOPERAND: + curnnn=nnn; + if ( (*(state->buf)>='0' && *(state->buf)<='9') || + *(state->buf)=='-' ) { + state->state = WAITENDOPERAND; + *curnnn = *(state->buf); + curnnn++; + } else if ( *(state->buf) == '!' ) { + (state->buf)++; + *val = (int4)'!'; + return OPR; + } else if ( *(state->buf) == '(' ) { + state->count++; + (state->buf)++; + return OPEN; + } else if ( *(state->buf) != ' ' ) + return ERR; + break; + case WAITENDOPERAND: + if ( *(state->buf)>='0' && *(state->buf)<='9' ) { + *curnnn = *(state->buf); + curnnn++; + } else { + *curnnn = '\0'; + *val=(int4)atoi( nnn ); + state->state = WAITOPERATOR; + return ( state->count && *(state->buf) == '\0' ) + ? ERR : VAL; + } + break; + case WAITOPERATOR: + if ( *(state->buf) == '&' || *(state->buf) == '|' ) { + state->state = WAITOPERAND; + *val = (int4) *(state->buf); + (state->buf)++; + return OPR; + } else if ( *(state->buf) == ')' ) { + (state->buf)++; + state->count--; + return ( state->count <0 ) ? ERR : CLOSE; + } else if ( *(state->buf) == '\0' ) { + return ( state->count ) ? ERR : END; + } else if ( *(state->buf) != ' ' ) + return ERR; + break; + default: + return ERR; + break; + } + (state->buf)++; + } + return END; +} + +/* + * push new one in polish notation reverse view + */ +static void +pushquery( WORKSTATE *state, int4 type, int4 val ) { + NODE *tmp = (NODE*)palloc(sizeof(NODE)); + tmp->type=type; + tmp->val =val; + tmp->next = state->str; + state->str = tmp; + state->num++; +} + +#define STACKDEPTH 16 + +/* + * make polish notaion of query + */ +static int4 +makepol(WORKSTATE *state) { + int4 val,type; + int4 stack[STACKDEPTH]; + int4 lenstack=0; + + while( (type=gettoken(state, &val))!=END ) { + switch(type) { + case VAL: + pushquery(state, type, val); + while ( lenstack && (stack[ lenstack-1 ] == (int4)'&' || + stack[ lenstack-1 ] == (int4)'!') ) { + lenstack--; + pushquery(state, OPR, stack[ lenstack ]); + } + break; + case OPR: + if ( lenstack && val == (int4) '|' ) { + pushquery(state, OPR, val); + } else { + if ( lenstack == STACKDEPTH ) + elog(ERROR,"Stack too short"); + stack[ lenstack ] = val; + lenstack++; + } + break; + case OPEN: + if ( makepol( state ) == ERR ) return ERR; + if ( lenstack && (stack[ lenstack-1 ] == (int4)'&' || + stack[ lenstack-1 ] == (int4)'!') ) { + lenstack--; + pushquery(state, OPR, stack[ lenstack ]); + } + break; + case CLOSE: + while ( lenstack ) { + lenstack--; + pushquery(state, OPR, stack[ lenstack ]); + }; + return END; + break; + case ERR: + default: + elog(ERROR,"Syntax error"); + return ERR; + + } + } + + while (lenstack) { + lenstack--; + pushquery(state, OPR, stack[ lenstack ]); + }; + return END; +} + +typedef struct { + int4 *arrb; + int4 *arre; + int4 *ptr; +} CHKVAL; + +/* + * is there value 'val' in array or not ? + */ +static bool +checkcondition_arr( void *checkval, int4 val ) { +#ifdef BS_DEBUG + elog(NOTICE,"OPERAND %d", val); +#endif + if ( val > *(((CHKVAL*)checkval)->ptr) ) { + while ( ((CHKVAL*)checkval)->ptr < ((CHKVAL*)checkval)->arre ) { + ((CHKVAL*)checkval)->ptr++; + if ( *(((CHKVAL*)checkval)->ptr) == val ) return true; + if ( val < *(((CHKVAL*)checkval)->ptr) ) return false; + } + } else if ( val < *(((CHKVAL*)checkval)->ptr) ) { + while ( ((CHKVAL*)checkval)->ptr > ((CHKVAL*)checkval)->arrb ) { + ((CHKVAL*)checkval)->ptr--; + if ( *(((CHKVAL*)checkval)->ptr) == val ) return true; + if ( val > *(((CHKVAL*)checkval)->ptr) ) return false; + } + } else { + return true; + } + return false; +} + +static bool +checkcondition_bit( void *checkval, int4 val ) { + return GETBIT( checkval, HASHVAL( val ) ); +} + +/* + * check for boolean condition + */ +static bool +execute( ITEM* curitem, void *checkval, bool calcnot, bool (*chkcond)(void *checkval, int4 val )) { + + if ( curitem->type == VAL ) { + return (*chkcond)( checkval, curitem->val ); + } else if ( curitem->val == (int4)'!' ) { + return ( calcnot ) ? + ( ( execute(curitem - 1, checkval, calcnot, chkcond) ) ? false : true ) + : true; + } else if ( curitem->val == (int4)'&' ) { + if ( execute(curitem + curitem->left, checkval, calcnot, chkcond) ) + return execute(curitem - 1, checkval, calcnot, chkcond); + else + return false; + } else { /* |-operator */ + if ( execute(curitem + curitem->left, checkval, calcnot, chkcond) ) + return true; + else + return execute(curitem - 1, checkval, calcnot, chkcond); + } + return false; +} + +/* + * signconsistent & execconsistent called by *_consistent + */ +static bool +signconsistent( QUERYTYPE *query, BITVEC sign, bool calcnot ) { + return execute( + GETQUERY(query) + query->size-1 , + (void*)sign, calcnot, + checkcondition_bit + ); +} + +static bool +execconsistent( QUERYTYPE *query, ArrayType *array, bool calcnot ) { + CHKVAL chkval; + + chkval.arrb = ARRPTR(array); + chkval.arre = chkval.arrb + ARRNELEMS(array) - 1; + chkval.ptr = chkval.arrb + ARRNELEMS(array)/2; + return execute( + GETQUERY(query) + query->size-1 , + (void*)&chkval, calcnot, + checkcondition_arr + ); +} + +/* + * boolean operations + */ +Datum +rboolop(PG_FUNCTION_ARGS) { + return DirectFunctionCall2( + boolop, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0) + ); +} + +Datum +boolop(PG_FUNCTION_ARGS) { + ArrayType *val = ( ArrayType * )PG_DETOAST_DATUM_COPY(PG_GETARG_POINTER(0)); + QUERYTYPE *query = ( QUERYTYPE * )PG_DETOAST_DATUM(PG_GETARG_POINTER(1)); + CHKVAL chkval; + bool result; + + if ( ARRISVOID( val ) ) { + pfree(val); + PG_FREE_IF_COPY(query,1); + PG_RETURN_BOOL( false ); + } + + PREPAREARR(val); + chkval.arrb = ARRPTR(val); + chkval.arre = chkval.arrb + ARRNELEMS(val) - 1; + chkval.ptr = chkval.arrb + ARRNELEMS(val)/2; + result = execute( + GETQUERY(query) + query->size-1 , + &chkval, true, + checkcondition_arr + ); + pfree(val); + + PG_FREE_IF_COPY(query,1); + PG_RETURN_BOOL( result ); +} + +static void +findoprnd( ITEM *ptr, int4 *pos ) { +#ifdef BS_DEBUG + elog(NOTICE, ( ptr[*pos].type == OPR ) ? + "%d %c" : "%d %d ", *pos, ptr[*pos].val ); +#endif + if ( ptr[*pos].type == VAL ) { + ptr[*pos].left = 0; + (*pos)--; + } else if ( ptr[*pos].val == (int4)'!' ) { + ptr[*pos].left = -1; + (*pos)--; + findoprnd( ptr, pos ); + } else { + ITEM *curitem = &ptr[*pos]; + int4 tmp = *pos; + (*pos)--; + findoprnd(ptr,pos); + curitem->left = *pos - tmp; + findoprnd(ptr,pos); + } +} + + +/* + * input + */ +Datum +bqarr_in(PG_FUNCTION_ARGS) { + char *buf=(char*)PG_GETARG_POINTER(0); + WORKSTATE state; + int4 i; + QUERYTYPE *query; + int4 commonlen; + ITEM *ptr; + NODE *tmp; + int4 pos=0; +#ifdef BS_DEBUG + char pbuf[16384],*cur; +#endif + + state.buf = buf; + state.state = WAITOPERAND; + state.count = 0; + state.num = 0; + state.str=NULL; + + /* make polish notation (postfix, but in reverse order) */ + makepol( &state ); + if (!state.num) + elog( ERROR,"Empty query"); + + commonlen = COMPUTESIZE(state.num); + query = (QUERYTYPE*) palloc( commonlen ); + query->len = commonlen; + query->size = state.num; + ptr = GETQUERY(query); + + for(i=state.num-1; i>=0; i-- ) { + ptr[i].type = state.str->type; + ptr[i].val = state.str->val; + tmp = state.str->next; + pfree( state.str ); + state.str = tmp; + } + + pos = query->size-1; + findoprnd( ptr, &pos ); +#ifdef BS_DEBUG + cur = pbuf; + *cur = '\0'; + for( i=0;i<query->size;i++ ) { + if ( ptr[i].type == OPR ) + sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left); + else + sprintf(cur, "%d ", ptr[i].val ); + cur = strchr(cur,'\0'); + } + elog(NOTICE,"POR: %s", pbuf); +#endif + + PG_RETURN_POINTER( query ); +} + + +/* + * out function + */ +typedef struct { + ITEM *curpol; + char *buf; + char *cur; + int4 buflen; +} INFIX; + +#define RESIZEBUF(inf,addsize) while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) { \ + int4 len = inf->cur - inf->buf; \ + inf->buflen *= 2; \ + inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \ + inf->cur = inf->buf + len; \ +} + +static void +infix(INFIX *in, bool first) { + if ( in->curpol->type == VAL ) { + RESIZEBUF(in, 11); + sprintf(in->cur, "%d", in->curpol->val ); + in->cur = strchr( in->cur, '\0' ); + in->curpol--; + } else if ( in->curpol->val == (int4)'!' ) { + bool isopr = false; + RESIZEBUF(in, 1); + *(in->cur) = '!'; + in->cur++; + *(in->cur) = '\0'; + in->curpol--; + if ( in->curpol->type == OPR ) { + isopr = true; + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr( in->cur, '\0' ); + } + infix( in, isopr ); + if ( isopr ) { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr( in->cur, '\0' ); + } + } else { + int4 op = in->curpol->val; + INFIX nrm; + + in->curpol--; + if ( op == (int4)'|' && ! first) { + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr( in->cur, '\0' ); + } + + nrm.curpol = in->curpol; + nrm.buflen = 16; + nrm.cur = nrm.buf = (char*)palloc( sizeof(char) * nrm.buflen ); + + /* get right operand */ + infix( &nrm, false ); + + /* get & print left operand */ + in->curpol = nrm.curpol; + infix( in, false ); + + /* print operator & right operand*/ + RESIZEBUF(in, 3 + (nrm.cur - nrm.buf) ); + sprintf(in->cur, " %c %s", op, nrm.buf); + in->cur = strchr( in->cur, '\0' ); + pfree( nrm.buf ); + + if ( op == (int4)'|' && ! first) { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr( in->cur, '\0' ); + } + } +} + + +Datum +bqarr_out(PG_FUNCTION_ARGS) { + QUERYTYPE *query = (QUERYTYPE*)PG_DETOAST_DATUM(PG_GETARG_POINTER(0)); + INFIX nrm; + + if ( query->size == 0 ) + elog(ERROR,"Empty"); + nrm.curpol = GETQUERY(query) + query->size - 1; + nrm.buflen = 32; + nrm.cur = nrm.buf = (char*)palloc( sizeof(char) * nrm.buflen ); + *(nrm.cur) = '\0'; + infix( &nrm, true ); + + PG_FREE_IF_COPY(query,0); + PG_RETURN_POINTER( nrm.buf ); +} + +static int4 +countdroptree( ITEM *q, int4 pos ) { + if ( q[pos].type == VAL ) { + return 1; + } else if ( q[pos].val == (int4)'!' ) { + return 1+countdroptree(q, pos-1); + } else { + return 1 + countdroptree(q, pos-1) + countdroptree(q, pos + q[pos].left); + } +} + +/* + * common algorithm: + * result of all '!' will be = 'true', so + * we can modify query tree for clearing + */ +static int4 +shorterquery( ITEM *q, int4 len ) { + int4 index,posnot,poscor; + bool notisleft = false; + int4 drop,i; + + /* out all '!' */ + do { + index=0; + drop=0; + /* find ! */ + for(posnot=0; posnot < len; posnot++) + if ( q[posnot].type == OPR && q[posnot].val == (int4)'!') { + index=1; + break; + } + + if ( posnot == len ) + return len; + + /* last operator is ! */ + if ( posnot == len-1 ) + return 0; + + /* find operator for this operand */ + for( poscor=posnot+1; poscor<len; poscor++) { + if ( q[poscor].type == OPR ) { + if ( poscor == posnot+1 ) { + notisleft = false; + break; + } else if ( q[poscor].left + poscor == posnot ) { + notisleft = true; + break; + } + } + } + if ( q[poscor].val == (int4)'!' ) { + drop = countdroptree(q, poscor); + q[poscor-1].type=VAL; + for(i=poscor+1;i<len;i++) + if ( q[i].type == OPR && q[i].left + i <= poscor ) + q[i].left += drop - 2; + memcpy( (void*)&q[poscor-drop+1], + (void*)&q[poscor-1], + sizeof(ITEM) * ( len - (poscor-1) )); + len -= drop - 2; + } else if ( q[poscor].val == (int4)'|' ) { + drop = countdroptree(q, poscor); + q[poscor-1].type=VAL; + q[poscor].val=(int4)'!'; + q[poscor].left=-1; + for(i=poscor+1;i<len;i++) + if ( q[i].type == OPR && q[i].left + i < poscor ) + q[i].left += drop - 2; + memcpy( (void*)&q[poscor-drop+1], + (void*)&q[poscor-1], + sizeof(ITEM) * ( len - (poscor-1) )); + len -= drop - 2; + } else { /* &-operator */ + if ( + (notisleft && q[poscor-1].type == OPR && + q[poscor-1].val == (int4)'!' ) || + (!notisleft && q[poscor+q[poscor].left].type == OPR && + q[poscor+q[poscor].left].val == (int4)'!' ) + ) { /* drop subtree */ + drop = countdroptree(q, poscor); + q[poscor-1].type=VAL; + q[poscor].val=(int4)'!'; + q[poscor].left=-1; + for(i=poscor+1;i<len;i++) + if ( q[i].type == OPR && q[i].left + i < poscor ) + q[i].left += drop - 2; + memcpy( (void*)&q[poscor-drop+1], + (void*)&q[poscor-1], + sizeof(ITEM) * ( len - (poscor-1) )); + len -= drop - 2; + } else { /* drop only operator */ + int4 subtreepos = ( notisleft ) ? + poscor-1 : poscor+q[poscor].left; + int4 subtreelen = countdroptree( q, subtreepos ); + drop = countdroptree(q, poscor); + for(i=poscor+1;i<len;i++) + if ( q[i].type == OPR && q[i].left + i < poscor ) + q[i].left += drop - subtreelen; + memcpy( (void*)&q[ subtreepos+1 ], + (void*)&q[poscor+1], + sizeof(ITEM)*( len - (poscor-1) ) ); + memcpy( (void*)&q[ poscor-drop+1 ], + (void*)&q[subtreepos-subtreelen+1], + sizeof(ITEM)*( len - (drop-subtreelen) ) ); + len -= drop - subtreelen; + } + } + } while( index ); + return len; +} + + +Datum +querytree(PG_FUNCTION_ARGS) { + QUERYTYPE *query = (QUERYTYPE*)PG_DETOAST_DATUM(PG_GETARG_POINTER(0)); + INFIX nrm; + text *res; + ITEM *q; + int4 len; + + if ( query->size == 0 ) + elog(ERROR,"Empty"); + + q = (ITEM*)palloc( sizeof(ITEM) * query->size ); + memcpy( (void*)q, GETQUERY(query), sizeof(ITEM) * query->size ); + len = shorterquery( q, query->size ); + PG_FREE_IF_COPY(query,0); + + if ( len == 0 ) { + res = (text*) palloc( 1 + VARHDRSZ ); + VARATT_SIZEP(res) = 1 + VARHDRSZ; + *((char*)VARDATA(res)) = 'T'; + } else { + nrm.curpol = q + len - 1; + nrm.buflen = 32; + nrm.cur = nrm.buf = (char*)palloc( sizeof(char) * nrm.buflen ); + *(nrm.cur) = '\0'; + infix( &nrm, true ); + + res = (text*) palloc( nrm.cur-nrm.buf + VARHDRSZ ); + VARATT_SIZEP(res) = nrm.cur-nrm.buf + VARHDRSZ; + strncpy( VARDATA(res), nrm.buf, nrm.cur-nrm.buf ); + } + pfree(q); + + PG_RETURN_POINTER( res ); +} diff --git a/contrib/intarray/_int.sql.in b/contrib/intarray/_int.sql.in index 6ee186dc267..c033a5c0178 100644 --- a/contrib/intarray/_int.sql.in +++ b/contrib/intarray/_int.sql.in @@ -2,6 +2,51 @@ -- BEGIN TRANSACTION; +-- Query type +CREATE FUNCTION bqarr_in(opaque) +RETURNS opaque +AS 'MODULE_PATHNAME' +LANGUAGE 'c' with (isstrict); + +CREATE FUNCTION bqarr_out(opaque) +RETURNS opaque +AS 'MODULE_PATHNAME' +LANGUAGE 'c' with (isstrict); + +CREATE TYPE query_int ( +internallength = -1, +input = bqarr_in, +output = bqarr_out +); + +--only for debug +CREATE FUNCTION querytree(query_int) +RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE 'c' with (isstrict); + + +CREATE FUNCTION boolop(_int4, query_int) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE 'c' with (isstrict); + +COMMENT ON FUNCTION boolop(_int4, query_int) IS 'boolean operation with array'; + +CREATE FUNCTION rboolop(query_int, _int4) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE 'c' with (isstrict); + +COMMENT ON FUNCTION rboolop(query_int, _int4) IS 'boolean operation with array'; + +CREATE OPERATOR @@ ( + LEFTARG = _int4, RIGHTARG = query_int, PROCEDURE = boolop, + COMMUTATOR = '~~', RESTRICT = contsel, JOIN = contjoinsel +); + +CREATE OPERATOR ~~ ( + LEFTARG = query_int, RIGHTARG = _int4, PROCEDURE = rboolop, + COMMUTATOR = '@@', RESTRICT = contsel, JOIN = contjoinsel +); + + -- -- External C-functions for R-tree methods -- @@ -111,9 +156,10 @@ INSERT INTO pg_opclass (opcamid, opcname, opcintype, opcdefault, opckeytype) -- get the comparators for _intments and store them in a tmp table SELECT o.oid AS opoid, o.oprname INTO TEMP TABLE _int_ops_tmp -FROM pg_operator o, pg_type t -WHERE o.oprleft = t.oid and o.oprright = t.oid - and t.typname = '_int4'; +FROM pg_operator o, pg_type t, pg_type tq +WHERE o.oprleft = t.oid and ( o.oprright = t.oid or o.oprright=tq.oid ) + and t.typname = '_int4' + and tq.typname='query_int'; -- make sure we have the right operators -- SELECT * from _int_ops_tmp; @@ -157,6 +203,23 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) and opcname = 'gist__int_ops' and c.oprname = '~'; +--boolean search +INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) + SELECT opcl.oid, 20, true, c.opoid + FROM pg_opclass opcl, _int_ops_tmp c + WHERE + opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') + and opcname = 'gist__int_ops' + and c.oprname = '@@'; + +INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) + SELECT opcl.oid, 20, true, c.opoid + FROM pg_opclass opcl, _int_ops_tmp c + WHERE + opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') + and opcname = 'gist__int_ops' + and c.oprname = '~~'; + DROP TABLE _int_ops_tmp; @@ -258,9 +321,10 @@ INSERT INTO pg_opclass (opcamid, opcname, opcintype, opcdefault, opckeytype) -- get the comparators for _intments and store them in a tmp table SELECT o.oid AS opoid, o.oprname INTO TEMP TABLE _int_ops_tmp -FROM pg_operator o, pg_type t -WHERE o.oprleft = t.oid and o.oprright = t.oid - and t.typname = '_int4'; +FROM pg_operator o, pg_type t, pg_type tq +WHERE o.oprleft = t.oid and ( o.oprright = t.oid or o.oprright=tq.oid ) + and t.typname = '_int4' + and tq.typname='query_int'; -- make sure we have the right operators -- SELECT * from _int_ops_tmp; @@ -295,6 +359,22 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) and opcname = 'gist__intbig_ops' and c.oprname = '~'; +--boolean search +INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) + SELECT opcl.oid, 20, true, c.opoid + FROM pg_opclass opcl, _int_ops_tmp c + WHERE + opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') + and opcname = 'gist__intbig_ops' + and c.oprname = '@@'; +INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) + SELECT opcl.oid, 20, true, c.opoid + FROM pg_opclass opcl, _int_ops_tmp c + WHERE + opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') + and opcname = 'gist__intbig_ops' + and c.oprname = '~~'; + DROP TABLE _int_ops_tmp; diff --git a/contrib/intarray/expected/_int.out b/contrib/intarray/expected/_int.out index edcccf54bb6..b56d4e1b440 100644 --- a/contrib/intarray/expected/_int.out +++ b/contrib/intarray/expected/_int.out @@ -3,6 +3,229 @@ -- does not depend on contents of seg.sql. -- \set ECHO none +--test query_int +select '1'::query_int; + ?column? +---------- + 1 +(1 row) + +select ' 1'::query_int; + ?column? +---------- + 1 +(1 row) + +select '1 '::query_int; + ?column? +---------- + 1 +(1 row) + +select ' 1 '::query_int; + ?column? +---------- + 1 +(1 row) + +select ' ! 1 '::query_int; + ?column? +---------- + !1 +(1 row) + +select '!1'::query_int; + ?column? +---------- + !1 +(1 row) + +select '1|2'::query_int; + ?column? +---------- + 1 | 2 +(1 row) + +select '1|!2'::query_int; + ?column? +---------- + 1 | !2 +(1 row) + +select '!1|2'::query_int; + ?column? +---------- + !1 | 2 +(1 row) + +select '!1|!2'::query_int; + ?column? +---------- + !1 | !2 +(1 row) + +select '!(!1|!2)'::query_int; + ?column? +-------------- + !( !1 | !2 ) +(1 row) + +select '!(!1|2)'::query_int; + ?column? +------------- + !( !1 | 2 ) +(1 row) + +select '!(1|!2)'::query_int; + ?column? +------------- + !( 1 | !2 ) +(1 row) + +select '!(1|2)'::query_int; + ?column? +------------ + !( 1 | 2 ) +(1 row) + +select '1&2'::query_int; + ?column? +---------- + 1 & 2 +(1 row) + +select '!1&2'::query_int; + ?column? +---------- + !1 & 2 +(1 row) + +select '1&!2'::query_int; + ?column? +---------- + 1 & !2 +(1 row) + +select '!1&!2'::query_int; + ?column? +---------- + !1 & !2 +(1 row) + +select '(1&2)'::query_int; + ?column? +---------- + 1 & 2 +(1 row) + +select '1&(2)'::query_int; + ?column? +---------- + 1 & 2 +(1 row) + +select '!(1)&2'::query_int; + ?column? +---------- + !1 & 2 +(1 row) + +select '!(1&2)'::query_int; + ?column? +------------ + !( 1 & 2 ) +(1 row) + +select '1|2&3'::query_int; + ?column? +----------- + 1 | 2 & 3 +(1 row) + +select '1|(2&3)'::query_int; + ?column? +----------- + 1 | 2 & 3 +(1 row) + +select '(1|2)&3'::query_int; + ?column? +--------------- + ( 1 | 2 ) & 3 +(1 row) + +select '1|2&!3'::query_int; + ?column? +------------ + 1 | 2 & !3 +(1 row) + +select '1|!2&3'::query_int; + ?column? +------------ + 1 | !2 & 3 +(1 row) + +select '!1|2&3'::query_int; + ?column? +------------ + !1 | 2 & 3 +(1 row) + +select '!1|(2&3)'::query_int; + ?column? +------------ + !1 | 2 & 3 +(1 row) + +select '!(1|2)&3'::query_int; + ?column? +---------------- + !( 1 | 2 ) & 3 +(1 row) + +select '(!1|2)&3'::query_int; + ?column? +---------------- + ( !1 | 2 ) & 3 +(1 row) + +select '1|(2|(4|(5|6)))'::query_int; + ?column? +------------------------------- + 1 | ( 2 | ( 4 | ( 5 | 6 ) ) ) +(1 row) + +select '1|2|4|5|6'::query_int; + ?column? +------------------------------- + ( ( ( 1 | 2 ) | 4 ) | 5 ) | 6 +(1 row) + +select '1&(2&(4&(5&6)))'::query_int; + ?column? +------------------- + 1 & 2 & 4 & 5 & 6 +(1 row) + +select '1&2&4&5&6'::query_int; + ?column? +------------------- + 1 & 2 & 4 & 5 & 6 +(1 row) + +select '1&(2&(4&(5|6)))'::query_int; + ?column? +----------------------- + 1 & 2 & 4 & ( 5 | 6 ) +(1 row) + +select '1&(2&(4&(5|!6)))'::query_int; + ?column? +------------------------ + 1 & 2 & 4 & ( 5 | !6 ) +(1 row) + CREATE TABLE test__int( a int[] ); \copy test__int from 'data/test__int.data' SELECT count(*) from test__int WHERE a && '{23,50}'; @@ -11,12 +234,48 @@ SELECT count(*) from test__int WHERE a && '{23,50}'; 403 (1 row) +SELECT count(*) from test__int WHERE a @@ '23|50'; + count +------- + 403 +(1 row) + SELECT count(*) from test__int WHERE a @ '{23,50}'; count ------- 12 (1 row) +SELECT count(*) from test__int WHERE a @@ '23&50'; + count +------- + 12 +(1 row) + +SELECT count(*) from test__int WHERE a @ '{20,23}'; + count +------- + 12 +(1 row) + +SELECT count(*) from test__int WHERE a @@ '50&68'; + count +------- + 9 +(1 row) + +SELECT count(*) from test__int WHERE a @ '{20,23}' or a @ '{50,68}'; + count +------- + 21 +(1 row) + +SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)'; + count +------- + 21 +(1 row) + CREATE INDEX text_idx on test__int using gist ( a gist__int_ops ); SELECT count(*) from test__int WHERE a && '{23,50}'; count @@ -24,12 +283,48 @@ SELECT count(*) from test__int WHERE a && '{23,50}'; 403 (1 row) +SELECT count(*) from test__int WHERE a @@ '23|50'; + count +------- + 403 +(1 row) + SELECT count(*) from test__int WHERE a @ '{23,50}'; count ------- 12 (1 row) +SELECT count(*) from test__int WHERE a @@ '23&50'; + count +------- + 12 +(1 row) + +SELECT count(*) from test__int WHERE a @ '{20,23}'; + count +------- + 12 +(1 row) + +SELECT count(*) from test__int WHERE a @@ '50&68'; + count +------- + 9 +(1 row) + +SELECT count(*) from test__int WHERE a @ '{20,23}' or a @ '{50,68}'; + count +------- + 21 +(1 row) + +SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)'; + count +------- + 21 +(1 row) + drop index text_idx; CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops ); SELECT count(*) from test__int WHERE a && '{23,50}'; @@ -38,9 +333,45 @@ SELECT count(*) from test__int WHERE a && '{23,50}'; 403 (1 row) +SELECT count(*) from test__int WHERE a @@ '23|50'; + count +------- + 403 +(1 row) + SELECT count(*) from test__int WHERE a @ '{23,50}'; count ------- 12 (1 row) +SELECT count(*) from test__int WHERE a @@ '23&50'; + count +------- + 12 +(1 row) + +SELECT count(*) from test__int WHERE a @ '{20,23}'; + count +------- + 12 +(1 row) + +SELECT count(*) from test__int WHERE a @@ '50&68'; + count +------- + 9 +(1 row) + +SELECT count(*) from test__int WHERE a @ '{20,23}' or a @ '{50,68}'; + count +------- + 21 +(1 row) + +SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)'; + count +------- + 21 +(1 row) + diff --git a/contrib/intarray/sql/_int.sql b/contrib/intarray/sql/_int.sql index 9635c5f26fe..52313ef7a17 100644 --- a/contrib/intarray/sql/_int.sql +++ b/contrib/intarray/sql/_int.sql @@ -6,21 +6,79 @@ \i _int.sql \set ECHO all +--test query_int +select '1'::query_int; +select ' 1'::query_int; +select '1 '::query_int; +select ' 1 '::query_int; +select ' ! 1 '::query_int; +select '!1'::query_int; +select '1|2'::query_int; +select '1|!2'::query_int; +select '!1|2'::query_int; +select '!1|!2'::query_int; +select '!(!1|!2)'::query_int; +select '!(!1|2)'::query_int; +select '!(1|!2)'::query_int; +select '!(1|2)'::query_int; +select '1&2'::query_int; +select '!1&2'::query_int; +select '1&!2'::query_int; +select '!1&!2'::query_int; +select '(1&2)'::query_int; +select '1&(2)'::query_int; +select '!(1)&2'::query_int; +select '!(1&2)'::query_int; +select '1|2&3'::query_int; +select '1|(2&3)'::query_int; +select '(1|2)&3'::query_int; +select '1|2&!3'::query_int; +select '1|!2&3'::query_int; +select '!1|2&3'::query_int; +select '!1|(2&3)'::query_int; +select '!(1|2)&3'::query_int; +select '(!1|2)&3'::query_int; +select '1|(2|(4|(5|6)))'::query_int; +select '1|2|4|5|6'::query_int; +select '1&(2&(4&(5&6)))'::query_int; +select '1&2&4&5&6'::query_int; +select '1&(2&(4&(5|6)))'::query_int; +select '1&(2&(4&(5|!6)))'::query_int; + + CREATE TABLE test__int( a int[] ); \copy test__int from 'data/test__int.data' SELECT count(*) from test__int WHERE a && '{23,50}'; +SELECT count(*) from test__int WHERE a @@ '23|50'; SELECT count(*) from test__int WHERE a @ '{23,50}'; +SELECT count(*) from test__int WHERE a @@ '23&50'; +SELECT count(*) from test__int WHERE a @ '{20,23}'; +SELECT count(*) from test__int WHERE a @@ '50&68'; +SELECT count(*) from test__int WHERE a @ '{20,23}' or a @ '{50,68}'; +SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)'; CREATE INDEX text_idx on test__int using gist ( a gist__int_ops ); SELECT count(*) from test__int WHERE a && '{23,50}'; +SELECT count(*) from test__int WHERE a @@ '23|50'; SELECT count(*) from test__int WHERE a @ '{23,50}'; +SELECT count(*) from test__int WHERE a @@ '23&50'; +SELECT count(*) from test__int WHERE a @ '{20,23}'; +SELECT count(*) from test__int WHERE a @@ '50&68'; +SELECT count(*) from test__int WHERE a @ '{20,23}' or a @ '{50,68}'; +SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)'; drop index text_idx; CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops ); SELECT count(*) from test__int WHERE a && '{23,50}'; +SELECT count(*) from test__int WHERE a @@ '23|50'; SELECT count(*) from test__int WHERE a @ '{23,50}'; +SELECT count(*) from test__int WHERE a @@ '23&50'; +SELECT count(*) from test__int WHERE a @ '{20,23}'; +SELECT count(*) from test__int WHERE a @@ '50&68'; +SELECT count(*) from test__int WHERE a @ '{20,23}' or a @ '{50,68}'; +SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)'; -- GitLab