diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 67d0c349e0c6a558ccc8ba2a7de6d2552ff4c832..464ce83d30e0b991616ba57dc2dda19db1d6a5ee 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -3959,15 +3959,7 @@ SELECT 'fat & rat & ! cat'::tsquery; tsquery ------------------------ 'fat' & 'rat' & !'cat' - -SELECT '(fat | rat) <-> cat'::tsquery; - tsquery ------------------------------------ - 'fat' <-> 'cat' | 'rat' <-> 'cat' </programlisting> - - The last example demonstrates that <type>tsquery</type> sometimes - rearranges nested operators into a logically equivalent formulation. </para> <para> diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 2da75955d0b5557e82d8783697346c204c60a52e..67e4901c921d8cf885beeb56e3b1fc6567a90615 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -264,7 +264,7 @@ SELECT 'fat & cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t text, any more than a <type>tsvector</type> is. A <type>tsquery</type> contains search terms, which must be already-normalized lexemes, and may combine multiple terms using AND, OR, NOT, and FOLLOWED BY operators. - (For details see <xref linkend="datatype-tsquery">.) There are + (For syntax details see <xref linkend="datatype-tsquery">.) There are functions <function>to_tsquery</>, <function>plainto_tsquery</>, and <function>phraseto_tsquery</> that are helpful in converting user-written text into a proper @@ -323,6 +323,8 @@ text @@ text at least one of its arguments must appear, while the <literal>!</> (NOT) operator specifies that its argument must <emphasis>not</> appear in order to have a match. + For example, the query <literal>fat & ! rat</> matches documents that + contain <literal>fat</> but not <literal>rat</>. </para> <para> @@ -377,6 +379,28 @@ SELECT phraseto_tsquery('the cats ate the rats'); then <literal>&</literal>, then <literal><-></literal>, and <literal>!</literal> most tightly. </para> + + <para> + It's worth noticing that the AND/OR/NOT operators mean something subtly + different when they are within the arguments of a FOLLOWED BY operator + than when they are not, because within FOLLOWED BY the exact position of + the match is significant. For example, normally <literal>!x</> matches + only documents that do not contain <literal>x</> anywhere. + But <literal>!x <-> y</> matches <literal>y</> if it is not + immediately after an <literal>x</>; an occurrence of <literal>x</> + elsewhere in the document does not prevent a match. Another example is + that <literal>x & y</> normally only requires that <literal>x</> + and <literal>y</> both appear somewhere in the document, but + <literal>(x & y) <-> z</> requires <literal>x</> + and <literal>y</> to match at the same place, immediately before + a <literal>z</>. Thus this query behaves differently from + <literal>x <-> z & y <-> z</>, which will match a + document containing two separate sequences <literal>x z</> and + <literal>y z</>. (This specific query is useless as written, + since <literal>x</> and <literal>y</> could not match at the same place; + but with more complex situations such as prefix-match patterns, a query + of this form could be useful.) + </para> </sect2> <sect2 id="textsearch-intro-configurations"> diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index efc111e379c6dfd62bb6aff3c91227fdf27c5a70..3e0a44459acd301bb0dfbced5568944fb34d5e73 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -212,7 +212,7 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) * Evaluate tsquery boolean expression using ternary logic. */ static GinTernaryValue -TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) +TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase) { GinTernaryValue val1, val2, @@ -230,7 +230,10 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) switch (curitem->qoperator.oper) { case OP_NOT: - result = TS_execute_ternary(gcv, curitem + 1); + /* In phrase search, always return MAYBE since we lack positions */ + if (in_phrase) + return GIN_MAYBE; + result = TS_execute_ternary(gcv, curitem + 1, in_phrase); if (result == GIN_MAYBE) return result; return !result; @@ -238,17 +241,21 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) case OP_PHRASE: /* - * GIN doesn't contain any information about positions, treat + * GIN doesn't contain any information about positions, so treat * OP_PHRASE as OP_AND with recheck requirement */ - *gcv->need_recheck = true; + *(gcv->need_recheck) = true; + /* Pass down in_phrase == true in case there's a NOT below */ + in_phrase = true; + /* FALL THRU */ case OP_AND: - val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left); + val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, + in_phrase); if (val1 == GIN_FALSE) return GIN_FALSE; - val2 = TS_execute_ternary(gcv, curitem + 1); + val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); if (val2 == GIN_FALSE) return GIN_FALSE; if (val1 == GIN_TRUE && val2 == GIN_TRUE) @@ -257,10 +264,11 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) return GIN_MAYBE; case OP_OR: - val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left); + val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, + in_phrase); if (val1 == GIN_TRUE) return GIN_TRUE; - val2 = TS_execute_ternary(gcv, curitem + 1); + val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); if (val2 == GIN_TRUE) return GIN_TRUE; if (val1 == GIN_FALSE && val2 == GIN_FALSE) @@ -307,7 +315,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) res = TS_execute(GETQUERY(query), &gcv, - TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND, + TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS, checkcondition_gin); } @@ -343,7 +351,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = &recheck; - res = TS_execute_ternary(&gcv, GETQUERY(query)); + res = TS_execute_ternary(&gcv, GETQUERY(query), false); if (res == GIN_TRUE && recheck) res = GIN_MAYBE; diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index 6cdfb13f6d12221e309bc536a1ba71235141f75b..a4c2bb9cec40d7d4d26c5b4a2097892d872e73e0 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -359,12 +359,11 @@ gtsvector_consistent(PG_FUNCTION_ARGS) if (ISALLTRUE(key)) PG_RETURN_BOOL(true); - PG_RETURN_BOOL(TS_execute( - GETQUERY(query), + /* since signature is lossy, cannot specify CALC_NOT here */ + PG_RETURN_BOOL(TS_execute(GETQUERY(query), (void *) GETSIGN(key), - TS_EXEC_PHRASE_AS_AND, - checkcondition_bit - )); + TS_EXEC_PHRASE_NO_POS, + checkcondition_bit)); } else { /* only leaf pages */ @@ -372,12 +371,10 @@ gtsvector_consistent(PG_FUNCTION_ARGS) chkval.arrb = GETARR(key); chkval.arre = chkval.arrb + ARRNELEM(key); - PG_RETURN_BOOL(TS_execute( - GETQUERY(query), + PG_RETURN_BOOL(TS_execute(GETQUERY(query), (void *) &chkval, - TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT, - checkcondition_arr - )); + TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT, + checkcondition_arr)); } } diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 3d11a1c2080b89396a3bc0955eb0f6a4b8ee1298..f0bd52877f335753688926258c3edc4136f08523 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -557,13 +557,11 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup) curitem->oper == OP_OR || curitem->oper == OP_PHRASE); - if (curitem->oper == OP_PHRASE) - *needcleanup = true; /* push OP_PHRASE down later */ - (*pos)++; /* process RIGHT argument */ findoprnd_recurse(ptr, pos, nnodes, needcleanup); + curitem->left = *pos - tmp; /* set LEFT arg's offset */ /* process LEFT argument */ @@ -574,8 +572,9 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup) /* - * Fills in the left-fields previously left unfilled. The input - * QueryItems must be in polish (prefix) notation. + * Fill in the left-fields previously left unfilled. + * The input QueryItems must be in polish (prefix) notation. + * Also, set *needcleanup to true if there are any QI_VALSTOP nodes. */ static void findoprnd(QueryItem *ptr, int size, bool *needcleanup) @@ -687,15 +686,17 @@ parse_tsquery(char *buf, memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); pfree(state.op); - /* Set left operand pointers for every operator. */ + /* + * Set left operand pointers for every operator. While we're at it, + * detect whether there are any QI_VALSTOP nodes. + */ findoprnd(ptr, query->size, &needcleanup); /* - * QI_VALSTOP nodes should be cleaned and OP_PHRASE should be pushed - * down + * If there are QI_VALSTOP nodes, delete them and simplify the tree. */ if (needcleanup) - return cleanup_fakeval_and_phrase(query); + query = cleanup_tsquery_stopwords(query); return query; } @@ -1088,6 +1089,9 @@ tsqueryrecv(PG_FUNCTION_ARGS) */ findoprnd(item, size, &needcleanup); + /* Can't have found any QI_VALSTOP nodes */ + Assert(!needcleanup); + /* Copy operands to output struct */ for (i = 0; i < size; i++) { @@ -1105,9 +1109,6 @@ tsqueryrecv(PG_FUNCTION_ARGS) SET_VARSIZE(query, len + datalen); - if (needcleanup) - PG_RETURN_TSQUERY(cleanup_fakeval_and_phrase(query)); - PG_RETURN_TSQUERY(query); } diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c index 330664da6391d5adc686a545d423648f1d8ae904..c10c7ef0aa5ffffdcdd8d8392ade11eee5ec68ed 100644 --- a/src/backend/utils/adt/tsquery_cleanup.c +++ b/src/backend/utils/adt/tsquery_cleanup.c @@ -25,19 +25,6 @@ typedef struct NODE QueryItem *valnode; } NODE; -/* - * To simplify walking on query tree and pushing down of phrase operator - * we define some fake priority here: phrase operator has highest priority - * of any other operators (and we believe here that OP_PHRASE is a highest - * code of operations) and value node has ever highest priority. - * Priority values of other operations don't matter until they are less than - * phrase operator and value node. - */ -#define VALUE_PRIORITY (OP_COUNT + 1) -#define NODE_PRIORITY(x) \ - ( ((x)->valnode->qoperator.type == QI_OPR) ? \ - (x)->valnode->qoperator.oper : VALUE_PRIORITY ) - /* * make query tree from plain view of query */ @@ -368,227 +355,6 @@ clean_stopword_intree(NODE *node, int *ladd, int *radd) return node; } -static NODE * -copyNODE(NODE *node) -{ - NODE *cnode = palloc(sizeof(NODE)); - - /* since this function recurses, it could be driven to stack overflow. */ - check_stack_depth(); - - cnode->valnode = palloc(sizeof(QueryItem)); - *(cnode->valnode) = *(node->valnode); - - if (node->valnode->type == QI_OPR) - { - cnode->right = copyNODE(node->right); - if (node->valnode->qoperator.oper != OP_NOT) - cnode->left = copyNODE(node->left); - } - - return cnode; -} - -static NODE * -makeNODE(int8 op, NODE *left, NODE *right) -{ - NODE *node = palloc(sizeof(NODE)); - - /* zeroing allocation to prevent difference in unused bytes */ - node->valnode = palloc0(sizeof(QueryItem)); - - node->valnode->qoperator.type = QI_OPR; - node->valnode->qoperator.oper = op; - - node->left = left; - node->right = right; - - return node; -} - -/* - * Move operation with high priority to the leaves. This guarantees - * that the phrase operator will be near the bottom of the tree. - * An idea behind is do not store position of lexemes during execution - * of ordinary operations (AND, OR, NOT) because it could be expensive. - * Actual transformation will be performed only on subtrees under the - * <-> (<n>) operation since it's needed solely for the phrase operator. - * - * Rules: - * a <-> (b | c) => (a <-> b) | (a <-> c) - * (a | b) <-> c => (a <-> c) | (b <-> c) - * a <-> !b => a & !(a <-> b) - * !a <-> b => b & !(a <-> b) - * - * Warnings for readers: - * a <-> b != b <-> a - * - * a <n> (b <n> c) != (a <n> b) <n> c since the phrase lengths are: - * n 2n-1 - */ -static NODE * -normalize_phrase_tree(NODE *node) -{ - /* there should be no stop words at this point */ - Assert(node->valnode->type != QI_VALSTOP); - - if (node->valnode->type == QI_VAL) - return node; - - /* since this function recurses, it could be driven to stack overflow. */ - check_stack_depth(); - - Assert(node->valnode->type == QI_OPR); - - if (node->valnode->qoperator.oper == OP_NOT) - { - NODE *orignode = node; - - /* eliminate NOT sequence */ - while (node->valnode->type == QI_OPR && - node->valnode->qoperator.oper == node->right->valnode->qoperator.oper) - { - node = node->right->right; - } - - if (orignode != node) - /* current node isn't checked yet */ - node = normalize_phrase_tree(node); - else - node->right = normalize_phrase_tree(node->right); - } - else if (node->valnode->qoperator.oper == OP_PHRASE) - { - int16 distance; - NODE *X; - - node->left = normalize_phrase_tree(node->left); - node->right = normalize_phrase_tree(node->right); - - /* - * if subtree contains only nodes with higher "priority" then we are - * done. See comment near NODE_PRIORITY() - */ - if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) && - NODE_PRIORITY(node) <= NODE_PRIORITY(node->left)) - return node; - - /* - * We can't swap left-right and works only with left child because of - * a <-> b != b <-> a - */ - - distance = node->valnode->qoperator.distance; - - if (node->right->valnode->type == QI_OPR) - { - switch (node->right->valnode->qoperator.oper) - { - case OP_AND: - /* a <-> (b & c) => (a <-> b) & (a <-> c) */ - node = makeNODE(OP_AND, - makeNODE(OP_PHRASE, - node->left, - node->right->left), - makeNODE(OP_PHRASE, - copyNODE(node->left), - node->right->right)); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_OR: - /* a <-> (b | c) => (a <-> b) | (a <-> c) */ - node = makeNODE(OP_OR, - makeNODE(OP_PHRASE, - node->left, - node->right->left), - makeNODE(OP_PHRASE, - copyNODE(node->left), - node->right->right)); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_NOT: - /* a <-> !b => a & !(a <-> b) */ - X = node->right; - node->right = node->right->right; - X->right = node; - node = makeNODE(OP_AND, - copyNODE(node->left), - X); - break; - case OP_PHRASE: - /* no-op */ - break; - default: - elog(ERROR, "Wrong type of tsquery node: %d", - node->right->valnode->qoperator.oper); - } - } - - if (node->left->valnode->type == QI_OPR && - node->valnode->qoperator.oper == OP_PHRASE) - { - /* - * if the node is still OP_PHRASE, check the left subtree, - * otherwise the whole node will be transformed later. - */ - switch (node->left->valnode->qoperator.oper) - { - case OP_AND: - /* (a & b) <-> c => (a <-> c) & (b <-> c) */ - node = makeNODE(OP_AND, - makeNODE(OP_PHRASE, - node->left->left, - node->right), - makeNODE(OP_PHRASE, - node->left->right, - copyNODE(node->right))); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_OR: - /* (a | b) <-> c => (a <-> c) | (b <-> c) */ - node = makeNODE(OP_OR, - makeNODE(OP_PHRASE, - node->left->left, - node->right), - makeNODE(OP_PHRASE, - node->left->right, - copyNODE(node->right))); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_NOT: - /* !a <-> b => b & !(a <-> b) */ - X = node->left; - node->left = node->left->right; - X->right = node; - node = makeNODE(OP_AND, - X, - copyNODE(node->right)); - break; - case OP_PHRASE: - /* no-op */ - break; - default: - elog(ERROR, "Wrong type of tsquery node: %d", - node->left->valnode->qoperator.oper); - } - } - - /* continue transformation */ - node = normalize_phrase_tree(node); - } - else /* AND or OR */ - { - node->left = normalize_phrase_tree(node->left); - node->right = normalize_phrase_tree(node->right); - } - - return node; -} - /* * Number of elements in query tree */ @@ -613,8 +379,11 @@ calcstrlen(NODE *node) return size; } +/* + * Remove QI_VALSTOP (stopword) nodes from TSQuery. + */ TSQuery -cleanup_fakeval_and_phrase(TSQuery in) +cleanup_tsquery_stopwords(TSQuery in) { int32 len, lenstr, @@ -642,9 +411,6 @@ cleanup_fakeval_and_phrase(TSQuery in) return out; } - /* push OP_PHRASE nodes down */ - root = normalize_phrase_tree(root); - /* * Build TSQuery from plain view */ diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c index a574b4b2573d602ea91b05256449e836a1dc0060..8f90ce99e0d1f264021b3b75c5750a591aa2999e 100644 --- a/src/backend/utils/adt/tsquery_op.c +++ b/src/backend/utils/adt/tsquery_op.c @@ -104,7 +104,7 @@ tsquery_or(PG_FUNCTION_ARGS) PG_FREE_IF_COPY(a, 0); PG_FREE_IF_COPY(b, 1); - PG_RETURN_POINTER(query); + PG_RETURN_TSQUERY(query); } Datum @@ -140,7 +140,7 @@ tsquery_phrase_distance(PG_FUNCTION_ARGS) PG_FREE_IF_COPY(a, 0); PG_FREE_IF_COPY(b, 1); - PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query)); + PG_RETURN_TSQUERY(query); } Datum diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 36cc10c90176aad0e5c00f01cacfcf15729622ed..01c721f835e713269def6c02a430fe1fd1acb212 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -11,9 +11,10 @@ * *------------------------------------------------------------------------- */ - #include "postgres.h" +#include <limits.h> + #include "access/htup_details.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" @@ -1404,148 +1405,395 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) return res; } +/* + * Compute output position list for a tsquery operator in phrase mode. + * + * Merge the position lists in Ldata and Rdata as specified by "emit", + * returning the result list into *data. The input position lists must be + * sorted and unique, and the output will be as well. + * + * data: pointer to initially-all-zeroes output struct, or NULL + * Ldata, Rdata: input position lists + * emit: bitmask of TSPO_XXX flags + * Loffset: offset to be added to Ldata positions before comparing/outputting + * Roffset: offset to be added to Rdata positions before comparing/outputting + * max_npos: maximum possible required size of output position array + * + * Loffset and Roffset should not be negative, else we risk trying to output + * negative positions, which won't fit into WordEntryPos. + * + * Returns true if any positions were emitted to *data; or if data is NULL, + * returns true if any positions would have been emitted. + */ +#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */ +#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */ +#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */ + +static bool +TS_phrase_output(ExecPhraseData *data, + ExecPhraseData *Ldata, + ExecPhraseData *Rdata, + int emit, + int Loffset, + int Roffset, + int max_npos) +{ + int Lindex, + Rindex; + + /* Loop until both inputs are exhausted */ + Lindex = Rindex = 0; + while (Lindex < Ldata->npos || Rindex < Rdata->npos) + { + int Lpos, + Rpos; + int output_pos = 0; + + /* + * Fetch current values to compare. WEP_GETPOS() is needed because + * ExecPhraseData->data can point to a tsvector's WordEntryPosVector. + */ + if (Lindex < Ldata->npos) + Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset; + else + { + /* L array exhausted, so we're done if R_ONLY isn't set */ + if (!(emit & TSPO_R_ONLY)) + break; + Lpos = INT_MAX; + } + if (Rindex < Rdata->npos) + Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset; + else + { + /* R array exhausted, so we're done if L_ONLY isn't set */ + if (!(emit & TSPO_L_ONLY)) + break; + Rpos = INT_MAX; + } + + /* Merge-join the two input lists */ + if (Lpos < Rpos) + { + /* Lpos is not matched in Rdata, should we output it? */ + if (emit & TSPO_L_ONLY) + output_pos = Lpos; + Lindex++; + } + else if (Lpos == Rpos) + { + /* Lpos and Rpos match ... should we output it? */ + if (emit & TSPO_BOTH) + output_pos = Rpos; + Lindex++; + Rindex++; + } + else /* Lpos > Rpos */ + { + /* Rpos is not matched in Ldata, should we output it? */ + if (emit & TSPO_R_ONLY) + output_pos = Rpos; + Rindex++; + } + + if (output_pos > 0) + { + if (data) + { + /* Store position, first allocating output array if needed */ + if (data->pos == NULL) + { + data->pos = (WordEntryPos *) + palloc(max_npos * sizeof(WordEntryPos)); + data->allocated = true; + } + data->pos[data->npos++] = output_pos; + } + else + { + /* + * Exact positions not needed, so return true as soon as we + * know there is at least one. + */ + return true; + } + } + } + + if (data && data->npos > 0) + { + /* Let's assert we didn't overrun the array */ + Assert(data->npos <= max_npos); + return true; + } + return false; +} + /* * Execute tsquery at or below an OP_PHRASE operator. * - * This handles the recursion at levels where we need to care about - * match locations. In addition to the same arguments used for TS_execute, - * the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to - * be filled with lexeme match positions on success. data == NULL if no - * match data need be returned. (In practice, outside callers pass NULL, - * and only the internal recursion cases pass a data pointer.) + * This handles tsquery execution at recursion levels where we need to care + * about match locations. + * + * In addition to the same arguments used for TS_execute, the caller may pass + * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme + * match position info on success. data == NULL if no position data need be + * returned. (In practice, outside callers pass NULL, and only the internal + * recursion cases pass a data pointer.) + * Note: the function assumes data != NULL for operators other than OP_PHRASE. + * This is OK because an outside call always starts from an OP_PHRASE node. + * + * The detailed semantics of the match data, given that the function returned + * "true" (successful match, or possible match), are: + * + * npos > 0, negate = false: + * query is matched at specified position(s) (and only those positions) + * npos > 0, negate = true: + * query is matched at all positions *except* specified position(s) + * npos = 0, negate = false: + * query is possibly matched, matching position(s) are unknown + * (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set) + * npos = 0, negate = true: + * query is matched at all positions + * + * Successful matches also return a "width" value which is the match width in + * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches, + * and is the sum of the phrase operator distances for phrase matches. Note + * that when width > 0, the listed positions represent the ends of matches not + * the starts. (This unintuitive rule is needed to avoid possibly generating + * negative positions, which wouldn't fit into the WordEntryPos arrays.) + * + * When the function returns "false" (no match), it must return npos = 0, + * negate = false (which is the state initialized by the caller); but the + * "width" output in such cases is undefined. */ static bool TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, - ExecPhraseData *data, - TSExecuteCallback chkcond) + TSExecuteCallback chkcond, + ExecPhraseData *data) { + ExecPhraseData Ldata, + Rdata; + bool lmatch, + rmatch; + int Loffset, + Roffset, + maxwidth; + /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (curitem->type == QI_VAL) - { return chkcond(arg, (QueryOperand *) curitem, data); - } - else + + switch (curitem->qoperator.oper) { - ExecPhraseData Ldata = {0, false, NULL}, - Rdata = {0, false, NULL}; - WordEntryPos *Lpos, - *LposStart, - *Rpos, - *pos_iter = NULL; + case OP_NOT: - Assert(curitem->qoperator.oper == OP_PHRASE); + /* + * Because a "true" result with no specific positions is taken as + * uncertain, we need no special care here for !TS_EXEC_CALC_NOT. + * If it's a false positive, the right things happen anyway. + * + * Also, we need not touch data->width, since a NOT operation does + * not change the match width. + */ + if (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data)) + { + if (data->npos > 0) + { + /* we have some positions, invert negate flag */ + data->negate = !data->negate; + return true; + } + else if (data->negate) + { + /* change "match everywhere" to "match nowhere" */ + data->negate = false; + return false; + } + /* match positions are, and remain, uncertain */ + return true; + } + else + { + /* change "match nowhere" to "match everywhere" */ + Assert(data->npos == 0 && !data->negate); + data->negate = true; + return true; + } - if (!TS_phrase_execute(curitem + curitem->qoperator.left, - arg, flags, &Ldata, chkcond)) - return false; + case OP_PHRASE: + case OP_AND: + memset(&Ldata, 0, sizeof(Ldata)); + memset(&Rdata, 0, sizeof(Rdata)); - if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond)) - return false; + if (!TS_phrase_execute(curitem + curitem->qoperator.left, + arg, flags, chkcond, &Ldata)) + return false; - /* - * If either operand has no position information, then we normally - * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we - * return true, treating OP_PHRASE as if it were OP_AND. - */ - if (Ldata.npos == 0 || Rdata.npos == 0) - return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false; + if (!TS_phrase_execute(curitem + 1, + arg, flags, chkcond, &Rdata)) + return false; - /* - * Prepare output position array if needed. - */ - if (data) - { /* - * We can recycle the righthand operand's result array if it was - * palloc'd, else must allocate our own. The number of matches - * couldn't be more than the smaller of the two operands' matches. + * If either operand has no position information, then we can't + * return position data, only a "possible match" result. "Possible + * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag + * is set, otherwise return false. */ - if (!Rdata.allocated) - data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos)); - else - data->pos = Rdata.pos; + if ((Ldata.npos == 0 && !Ldata.negate) || + (Rdata.npos == 0 && !Rdata.negate)) + return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false; - data->allocated = true; - data->npos = 0; - pos_iter = data->pos; - } + if (curitem->qoperator.oper == OP_PHRASE) + { + /* + * Compute Loffset and Roffset suitable for phrase match, and + * compute overall width of whole phrase match. + */ + Loffset = curitem->qoperator.distance + Rdata.width; + Roffset = 0; + if (data) + data->width = curitem->qoperator.distance + + Ldata.width + Rdata.width; + } + else + { + /* + * For OP_AND, set output width and alignment like OP_OR (see + * comment below) + */ + maxwidth = Max(Ldata.width, Rdata.width); + Loffset = maxwidth - Ldata.width; + Roffset = maxwidth - Rdata.width; + if (data) + data->width = maxwidth; + } - /* - * Find matches by distance. WEP_GETPOS() is needed because - * ExecPhraseData->data can point to a tsvector's WordEntryPosVector. - * - * Note that the output positions are those of the matching RIGHT - * operands. - */ - Rpos = Rdata.pos; - LposStart = Ldata.pos; - while (Rpos < Rdata.pos + Rdata.npos) - { - /* - * We need to check all possible distances, so reset Lpos to - * guaranteed not yet satisfied position. - */ - Lpos = LposStart; - while (Lpos < Ldata.pos + Ldata.npos) + if (Ldata.negate && Rdata.negate) { - if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) == - curitem->qoperator.distance) - { - /* MATCH! */ - if (data) - { - /* Store position for upper phrase operator */ - *pos_iter = WEP_GETPOS(*Rpos); - pos_iter++; - - /* - * Set left start position to next, because current - * one could not satisfy distance for any other right - * position - */ - LposStart = Lpos + 1; - break; - } - else - { - /* - * We are at the root of the phrase tree and hence we - * don't have to identify all the match positions. - * Just report success. - */ - return true; - } + /* !L & !R: treat as !(L | R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + Loffset, Roffset, + Ldata.npos + Rdata.npos); + if (data) + data->negate = true; + return true; + } + else if (Ldata.negate) + { + /* !L & R */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_R_ONLY, + Loffset, Roffset, + Rdata.npos); + } + else if (Rdata.negate) + { + /* L & !R */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_L_ONLY, + Loffset, Roffset, + Ldata.npos); + } + else + { + /* straight AND */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH, + Loffset, Roffset, + Min(Ldata.npos, Rdata.npos)); + } - } - else if (WEP_GETPOS(*Rpos) <= WEP_GETPOS(*Lpos) || - WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) < - curitem->qoperator.distance) - { - /* - * Go to the next Rpos, because Lpos is ahead or on less - * distance than required by current operator - */ - break; + case OP_OR: + memset(&Ldata, 0, sizeof(Ldata)); + memset(&Rdata, 0, sizeof(Rdata)); - } + lmatch = TS_phrase_execute(curitem + curitem->qoperator.left, + arg, flags, chkcond, &Ldata); + rmatch = TS_phrase_execute(curitem + 1, + arg, flags, chkcond, &Rdata); - Lpos++; - } + if (!lmatch && !rmatch) + return false; - Rpos++; - } + /* + * If a valid operand has no position information, then we can't + * return position data, only a "possible match" result. "Possible + * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag + * is set, otherwise return false. + */ + if ((lmatch && Ldata.npos == 0 && !Ldata.negate) || + (rmatch && Rdata.npos == 0 && !Rdata.negate)) + return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false; - if (data) - { - data->npos = pos_iter - data->pos; + /* + * Cope with undefined output width from failed submatch. (This + * takes less code than trying to ensure that all failure returns + * set data->width to zero.) + */ + if (!lmatch) + Ldata.width = 0; + if (!rmatch) + Rdata.width = 0; - if (data->npos > 0) + /* + * For OP_AND and OP_OR, report the width of the wider of the two + * inputs, and align the narrower input's positions to the right + * end of that width. This rule deals at least somewhat + * reasonably with cases like "x <-> (y | z <-> q)". + */ + maxwidth = Max(Ldata.width, Rdata.width); + Loffset = maxwidth - Ldata.width; + Roffset = maxwidth - Rdata.width; + data->width = maxwidth; + + if (Ldata.negate && Rdata.negate) + { + /* !L | !R: treat as !(L & R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH, + Loffset, Roffset, + Min(Ldata.npos, Rdata.npos)); + data->negate = true; return true; - } + } + else if (Ldata.negate) + { + /* !L | R: treat as !(L & !R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_L_ONLY, + Loffset, Roffset, + Ldata.npos); + data->negate = true; + return true; + } + else if (Rdata.negate) + { + /* L | !R: treat as !(!L & R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_R_ONLY, + Loffset, Roffset, + Rdata.npos); + data->negate = true; + return true; + } + else + { + /* straight OR */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + Loffset, Roffset, + Ldata.npos + Rdata.npos); + } + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); } + /* not reachable, but keep compiler quiet */ return false; } @@ -1594,12 +1842,7 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags, return TS_execute(curitem + 1, arg, flags, chkcond); case OP_PHRASE: - - /* - * do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could - * do something more if it's called from TS_phrase_execute() - */ - return TS_phrase_execute(curitem, arg, flags, NULL, chkcond); + return TS_phrase_execute(curitem, arg, flags, chkcond, NULL); default: elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index 1fbd9838984581aa2368bef3adec3795c2db1186..81602d1adc5b47cab62d841281b8b24cabac001a 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -113,8 +113,8 @@ extern text *generateHeadline(HeadlineParsedText *prs); * struct ExecPhraseData is passed to a TSExecuteCallback function if we need * lexeme position data (because of a phrase-match operator in the tsquery). * The callback should fill in position data when it returns true (success). - * If it cannot return position data, it may ignore its "data" argument, but - * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_AS_AND flag + * If it cannot return position data, it may leave "data" unchanged, but + * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag * and must arrange for a later recheck with position data available. * * The reported lexeme positions must be sorted and unique. Callers must only @@ -123,13 +123,21 @@ extern text *generateHeadline(HeadlineParsedText *prs); * portion of a tsvector value. If "allocated" is true then the pos array * is palloc'd workspace and caller may free it when done. * + * "negate" means that the pos array contains positions where the query does + * not match, rather than positions where it does. "width" is positive when + * the match is wider than one lexeme. Neither of these fields normally need + * to be touched by TSExecuteCallback functions; they are used for + * phrase-search processing within TS_execute. + * * All fields of the ExecPhraseData struct are initially zeroed by caller. */ typedef struct ExecPhraseData { int npos; /* number of positions reported */ bool allocated; /* pos points to palloc'd data? */ + bool negate; /* positions are where query is NOT matched */ WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */ + int width; /* width of match in lexemes, less 1 */ } ExecPhraseData; /* @@ -139,7 +147,9 @@ typedef struct ExecPhraseData * val: lexeme to test for presence of * data: to be filled with lexeme positions; NULL if position data not needed * - * Return TRUE if lexeme is present in data, else FALSE + * Return TRUE if lexeme is present in data, else FALSE. If data is not + * NULL, it should be filled with lexeme positions, but function can leave + * it as zeroes if position data is not available. */ typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, ExecPhraseData *data); @@ -151,15 +161,18 @@ typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, /* * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically * evaluated to be true. Useful in cases where NOT cannot be accurately - * computed (GiST) or it isn't important (ranking). + * computed (GiST) or it isn't important (ranking). From TS_execute's + * perspective, !CALC_NOT means that the TSExecuteCallback function might + * return false-positive indications of a lexeme's presence. */ #define TS_EXEC_CALC_NOT (0x01) /* - * Treat OP_PHRASE as OP_AND. Used when positional information is not - * accessible, like in consistent methods of GIN/GiST indexes; rechecking - * must occur later. + * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily + * in the absence of position information: a TRUE result indicates that the + * phrase might be present. Without this flag, OP_PHRASE always returns + * false if lexeme position information is not available. */ -#define TS_EXEC_PHRASE_AS_AND (0x02) +#define TS_EXEC_PHRASE_NO_POS (0x02) extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond); @@ -228,7 +241,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS); * TSQuery Utilities */ extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len); -extern TSQuery cleanup_fakeval_and_phrase(TSQuery in); +extern TSQuery cleanup_tsquery_stopwords(TSQuery in); typedef struct QTNode { diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out index c55591a6787d1434f07a92acd46123a50c7b6234..8ed64d3c68e07107fbe048e114b1c48a6f21c037 100644 --- a/src/test/regress/expected/tsdicts.out +++ b/src/test/regress/expected/tsdicts.out @@ -470,15 +470,15 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); (1 row) SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky'); - to_tsquery ------------------------------------------------------------------ - 'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky' + to_tsquery +------------------------------------------------- + ( 'foot':B & 'ball':B & 'klubber':B ) <-> 'sky' (1 row) SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky'); - phraseto_tsquery ------------------------------------------------------------ - 'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky' + phraseto_tsquery +------------------------------------------- + ( 'foot' & 'ball' & 'klubber' ) <-> 'sky' (1 row) -- Test ispell dictionary with hunspell affix with FLAG long in configuration diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index cf3beb35a994aaeff06af0eaf7218d67cc47b132..0681d43358bf446be82ea4c1883184c88ae0162b 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -556,15 +556,15 @@ SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg'; -- Check stop word deletion, a and s are stop-words SELECT to_tsquery('english', '!(a & !b) & c'); - to_tsquery ------------- - 'b' & 'c' + to_tsquery +------------- + !!'b' & 'c' (1 row) SELECT to_tsquery('english', '!(a & !b)'); to_tsquery ------------ - 'b' + !!'b' (1 row) SELECT to_tsquery('english', '(1 <-> 2) <-> a'); @@ -1240,15 +1240,15 @@ SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::t (1 row) SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text ); - ts_rewrite ---------------------------------------- - '5' <-> '1' & '5' <-> ( '2' <-> '3' ) + ts_rewrite +------------------------- + '5' <-> ( '2' <-> '4' ) (1 row) SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text ); - ts_rewrite ---------------------------- - '5' <-> '7' | '5' <-> '8' + ts_rewrite +----------------------- + '5' <-> ( '6' | '8' ) (1 row) -- Check empty substitution @@ -1386,6 +1386,26 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) (1 row) +SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz'); + ts_rewrite +----------------------------------------- + ( 'bar' | 'baz' ) <-> ( 'bar' | 'baz' ) +(1 row) + +SELECT to_tsvector('foo bar') @@ + ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz'); + ?column? +---------- + f +(1 row) + +SELECT to_tsvector('bar baz') @@ + ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz'); + ?column? +---------- + t +(1 row) + RESET enable_seqscan; --test GUC SET default_text_search_config=simple; diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out index 8d9290cbac122d00222c97304f2f6de3c42438b7..dcce82fdc4f99c1abd70ef6b488d06a22d32c0c6 100644 --- a/src/test/regress/expected/tstypes.out +++ b/src/test/regress/expected/tstypes.out @@ -366,133 +366,6 @@ SELECT '!!a & !!b'::tsquery; !!'a' & !!'b' (1 row) --- phrase transformation -SELECT 'a <-> (b|c)'::tsquery; - tsquery ---------------------------- - 'a' <-> 'b' | 'a' <-> 'c' -(1 row) - -SELECT '(a|b) <-> c'::tsquery; - tsquery ---------------------------- - 'a' <-> 'c' | 'b' <-> 'c' -(1 row) - -SELECT '(a|b) <-> (d|c)'::tsquery; - tsquery -------------------------------------------------------- - 'a' <-> 'd' | 'b' <-> 'd' | 'a' <-> 'c' | 'b' <-> 'c' -(1 row) - -SELECT 'a <-> (b&c)'::tsquery; - tsquery ---------------------------- - 'a' <-> 'b' & 'a' <-> 'c' -(1 row) - -SELECT '(a&b) <-> c'::tsquery; - tsquery ---------------------------- - 'a' <-> 'c' & 'b' <-> 'c' -(1 row) - -SELECT '(a&b) <-> (d&c)'::tsquery; - tsquery -------------------------------------------------------- - 'a' <-> 'd' & 'b' <-> 'd' & 'a' <-> 'c' & 'b' <-> 'c' -(1 row) - -SELECT 'a <-> !b'::tsquery; - tsquery ------------------------- - 'a' & !( 'a' <-> 'b' ) -(1 row) - -SELECT '!a <-> b'::tsquery; - tsquery ------------------------- - !( 'a' <-> 'b' ) & 'b' -(1 row) - -SELECT '!a <-> !b'::tsquery; - tsquery ------------------------------------- - !'a' & !( !( 'a' <-> 'b' ) & 'b' ) -(1 row) - -SELECT 'a <-> !(b&c)'::tsquery; - tsquery --------------------------------------- - 'a' & !( 'a' <-> 'b' & 'a' <-> 'c' ) -(1 row) - -SELECT 'a <-> !(b|c)'::tsquery; - tsquery --------------------------------------- - 'a' & !( 'a' <-> 'b' | 'a' <-> 'c' ) -(1 row) - -SELECT '!(a&b) <-> c'::tsquery; - tsquery --------------------------------------- - !( 'a' <-> 'c' & 'b' <-> 'c' ) & 'c' -(1 row) - -SELECT '!(a|b) <-> c'::tsquery; - tsquery --------------------------------------- - !( 'a' <-> 'c' | 'b' <-> 'c' ) & 'c' -(1 row) - -SELECT '(!a|b) <-> c'::tsquery; - tsquery --------------------------------------- - !( 'a' <-> 'c' ) & 'c' | 'b' <-> 'c' -(1 row) - -SELECT '(!a&b) <-> c'::tsquery; - tsquery --------------------------------------- - !( 'a' <-> 'c' ) & 'c' & 'b' <-> 'c' -(1 row) - -SELECT 'c <-> (!a|b)'::tsquery; - tsquery --------------------------------------- - 'c' & !( 'c' <-> 'a' ) | 'c' <-> 'b' -(1 row) - -SELECT 'c <-> (!a&b)'::tsquery; - tsquery --------------------------------------- - 'c' & !( 'c' <-> 'a' ) & 'c' <-> 'b' -(1 row) - -SELECT '(a|b) <-> !c'::tsquery; - tsquery ------------------------------------------------- - ( 'a' | 'b' ) & !( 'a' <-> 'c' | 'b' <-> 'c' ) -(1 row) - -SELECT '(a&b) <-> !c'::tsquery; - tsquery --------------------------------------------- - 'a' & 'b' & !( 'a' <-> 'c' & 'b' <-> 'c' ) -(1 row) - -SELECT '!c <-> (a|b)'::tsquery; - tsquery -------------------------------------------------- - !( 'c' <-> 'a' ) & 'a' | !( 'c' <-> 'b' ) & 'b' -(1 row) - -SELECT '!c <-> (a&b)'::tsquery; - tsquery -------------------------------------------------- - !( 'c' <-> 'a' ) & 'a' & !( 'c' <-> 'b' ) & 'b' -(1 row) - --comparisons SELECT 'a' < 'b & c'::tsquery as "true"; true @@ -568,33 +441,33 @@ SELECT 'foo & bar'::tsquery && 'asd | fg'; (1 row) SELECT 'a' <-> 'b & d'::tsquery; - ?column? ---------------------------- - 'a' <-> 'b' & 'a' <-> 'd' + ?column? +----------------------- + 'a' <-> ( 'b' & 'd' ) (1 row) SELECT 'a & g' <-> 'b & d'::tsquery; - ?column? -------------------------------------------------------- - 'a' <-> 'b' & 'g' <-> 'b' & 'a' <-> 'd' & 'g' <-> 'd' + ?column? +--------------------------------- + ( 'a' & 'g' ) <-> ( 'b' & 'd' ) (1 row) SELECT 'a & g' <-> 'b | d'::tsquery; - ?column? -------------------------------------------------------- - 'a' <-> 'b' & 'g' <-> 'b' | 'a' <-> 'd' & 'g' <-> 'd' + ?column? +--------------------------------- + ( 'a' & 'g' ) <-> ( 'b' | 'd' ) (1 row) SELECT 'a & g' <-> 'b <-> d'::tsquery; - ?column? ---------------------------------------------------- - 'a' <-> ( 'b' <-> 'd' ) & 'g' <-> ( 'b' <-> 'd' ) + ?column? +----------------------------------- + ( 'a' & 'g' ) <-> ( 'b' <-> 'd' ) (1 row) SELECT tsquery_phrase('a <3> g', 'b & d', 10); - tsquery_phrase ---------------------------------------------- - 'a' <3> 'g' <10> 'b' & 'a' <3> 'g' <10> 'd' + tsquery_phrase +-------------------------------- + 'a' <3> 'g' <10> ( 'b' & 'd' ) (1 row) -- tsvector-tsquery operations @@ -749,25 +622,152 @@ SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true"; t (1 row) -SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false"; +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "true"; + true +------ + t +(1 row) + +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "false"; false ------- f (1 row) -SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true"; +SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true"; true ------ t (1 row) -SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true"; +SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true"; true ------ t (1 row) -SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true"; +-- without position data, phrase search does not match +SELECT strip(to_tsvector('simple', '1 2 3 4')) @@ '1 <-> 2 <-> 3' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'q x q y') @@ 'q <-> (x & y)' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'q x') @@ 'q <-> (x | y <-> z)' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'q y') @@ 'q <-> (x | y <-> z)' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'q y z') @@ 'q <-> (x | y <-> z)' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'q y x') @@ 'q <-> (x | y <-> z)' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'q x y') @@ 'q <-> (x | y <-> z)' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'q x') @@ '(x | y <-> z) <-> q' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'x q') @@ '(x | y <-> z) <-> q' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'x y q') @@ '(x | y <-> z) <-> q' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'x y z') @@ '(x | y <-> z) <-> q' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'x y z q') @@ '(x | y <-> z) <-> q' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'y y q') @@ '(x | y <-> z) <-> q' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'y y q') @@ '(!x | y <-> z) <-> q' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'x y q') @@ '(!x | y <-> z) <-> q' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'y y q') @@ '(x | y <-> !z) <-> q' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false"; + false +------- + f +(1 row) + +select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true"; + true +------ + t +(1 row) + +select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true"; true ------ t @@ -1002,6 +1002,12 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false"; f (1 row) +SELECT 'a:1 b:3'::tsvector @@ 'a <0> a:*'::tsquery AS "true"; + true +------ + t +(1 row) + -- tsvector editing operations SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); strip diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index de43860c7044f4e3a1b27b43ee967034033bbb16..1255f6954db95498751f08b0b6856fc417af171f 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -447,6 +447,12 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; +SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz'); +SELECT to_tsvector('foo bar') @@ + ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz'); +SELECT to_tsvector('bar baz') @@ + ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz'); + RESET enable_seqscan; --test GUC diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql index 9ea93a2993845a71ee452e8a0e18674f7ea78b15..77436ce04e06f88866b4c9855f36559b776dc6b8 100644 --- a/src/test/regress/sql/tstypes.sql +++ b/src/test/regress/sql/tstypes.sql @@ -64,34 +64,6 @@ SELECT 'a & !!b'::tsquery; SELECT '!!a & b'::tsquery; SELECT '!!a & !!b'::tsquery; --- phrase transformation -SELECT 'a <-> (b|c)'::tsquery; -SELECT '(a|b) <-> c'::tsquery; -SELECT '(a|b) <-> (d|c)'::tsquery; - -SELECT 'a <-> (b&c)'::tsquery; -SELECT '(a&b) <-> c'::tsquery; -SELECT '(a&b) <-> (d&c)'::tsquery; - -SELECT 'a <-> !b'::tsquery; -SELECT '!a <-> b'::tsquery; -SELECT '!a <-> !b'::tsquery; - -SELECT 'a <-> !(b&c)'::tsquery; -SELECT 'a <-> !(b|c)'::tsquery; -SELECT '!(a&b) <-> c'::tsquery; -SELECT '!(a|b) <-> c'::tsquery; - -SELECT '(!a|b) <-> c'::tsquery; -SELECT '(!a&b) <-> c'::tsquery; -SELECT 'c <-> (!a|b)'::tsquery; -SELECT 'c <-> (!a&b)'::tsquery; - -SELECT '(a|b) <-> !c'::tsquery; -SELECT '(a&b) <-> !c'::tsquery; -SELECT '!c <-> (a|b)'::tsquery; -SELECT '!c <-> (a&b)'::tsquery; - --comparisons SELECT 'a' < 'b & c'::tsquery as "true"; SELECT 'a' > 'b & c'::tsquery as "false"; @@ -146,10 +118,33 @@ SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true"; SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true"; SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true"; -SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false"; -SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true"; +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "true"; +SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "false"; SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true"; SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true"; +-- without position data, phrase search does not match +SELECT strip(to_tsvector('simple', '1 2 3 4')) @@ '1 <-> 2 <-> 3' AS "false"; + +select to_tsvector('simple', 'q x q y') @@ 'q <-> (x & y)' AS "false"; +select to_tsvector('simple', 'q x') @@ 'q <-> (x | y <-> z)' AS "true"; +select to_tsvector('simple', 'q y') @@ 'q <-> (x | y <-> z)' AS "false"; +select to_tsvector('simple', 'q y z') @@ 'q <-> (x | y <-> z)' AS "true"; +select to_tsvector('simple', 'q y x') @@ 'q <-> (x | y <-> z)' AS "false"; +select to_tsvector('simple', 'q x y') @@ 'q <-> (x | y <-> z)' AS "true"; +select to_tsvector('simple', 'q x') @@ '(x | y <-> z) <-> q' AS "false"; +select to_tsvector('simple', 'x q') @@ '(x | y <-> z) <-> q' AS "true"; +select to_tsvector('simple', 'x y q') @@ '(x | y <-> z) <-> q' AS "false"; +select to_tsvector('simple', 'x y z') @@ '(x | y <-> z) <-> q' AS "false"; +select to_tsvector('simple', 'x y z q') @@ '(x | y <-> z) <-> q' AS "true"; +select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS "true"; +select to_tsvector('simple', 'y y q') @@ '(x | y <-> z) <-> q' AS "false"; +select to_tsvector('simple', 'y y q') @@ '(!x | y <-> z) <-> q' AS "true"; +select to_tsvector('simple', 'x y q') @@ '(!x | y <-> z) <-> q' AS "true"; +select to_tsvector('simple', 'y y q') @@ '(x | y <-> !z) <-> q' AS "true"; +select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true"; +select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false"; +select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true"; +select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true"; --ranking SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); @@ -193,6 +188,7 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false"; SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false"; SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true"; SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false"; +SELECT 'a:1 b:3'::tsvector @@ 'a <0> a:*'::tsquery AS "true"; -- tsvector editing operations