From 3dbbd0f02a257d8d5c4cba14726371505f2e7266 Mon Sep 17 00:00:00 2001
From: Teodor Sigaev <teodor@sigaev.ru>
Date: Mon, 27 Jun 2016 20:47:32 +0300
Subject: [PATCH] Do not fallback to AND for FTS phrase operator.

If there is no positional information of lexemes then phrase operator will not
fallback to AND operator. This change makes needing to modify TS_execute()
interface, because somewhere (in indexes, for example) positional information
is unaccesible and in this cases we need to force fallback to AND.

Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.com
---
 src/backend/utils/adt/tsginidx.c      |  2 +-
 src/backend/utils/adt/tsgistidx.c     |  6 +++--
 src/backend/utils/adt/tsrank.c        |  6 +++--
 src/backend/utils/adt/tsvector_op.c   | 35 +++++++++++++++------------
 src/include/tsearch/ts_utils.h        | 19 ++++++++++++++-
 src/test/regress/expected/tsearch.out |  9 ++++---
 src/test/regress/sql/tsearch.sql      |  3 ++-
 7 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index b0963291433..c953f531ff7 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -308,7 +308,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
 
 		res = TS_execute(GETQUERY(query),
 						 &gcv,
-						 true,
+						 TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND,
 						 checkcondition_gin);
 	}
 
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index cdd5d43fce5..6cdfb13f6d1 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -361,7 +361,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
 
 		PG_RETURN_BOOL(TS_execute(
 								  GETQUERY(query),
-								  (void *) GETSIGN(key), false,
+								  (void *) GETSIGN(key),
+								  TS_EXEC_PHRASE_AS_AND,
 								  checkcondition_bit
 								  ));
 	}
@@ -373,7 +374,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
 		chkval.arre = chkval.arrb + ARRNELEM(key);
 		PG_RETURN_BOOL(TS_execute(
 								  GETQUERY(query),
-								  (void *) &chkval, true,
+								  (void *) &chkval,
+								  TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT,
 								  checkcondition_arr
 								  ));
 	}
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 32023821b3f..d887a14d057 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -662,7 +662,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
 	{
 		fillQueryRepresentationData(qr, ptr);
 
-		if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
+		if (TS_execute(GETQUERY(qr->query), (void *) qr,
+					   TS_EXEC_EMPTY, checkcondition_QueryOperand))
 		{
 			if (WEP_GETPOS(ptr->pos) > ext->q)
 			{
@@ -691,7 +692,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
 		 */
 		fillQueryRepresentationData(qr, ptr);
 
-		if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
+		if (TS_execute(GETQUERY(qr->query), (void *) qr,
+					   TS_EXEC_CALC_NOT, checkcondition_QueryOperand))
 		{
 			if (WEP_GETPOS(ptr->pos) < ext->p)
 			{
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 04718829a0b..242b7e137b7 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -1360,7 +1360,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
  */
 static bool
 TS_phrase_execute(QueryItem *curitem,
-				  void *checkval, bool calcnot, ExecPhraseData *data,
+				  void *checkval, uint32 flags, ExecPhraseData *data,
 				  bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
 {
 	/* since this function recurses, it could be driven to stack overflow */
@@ -1382,18 +1382,19 @@ TS_phrase_execute(QueryItem *curitem,
 		Assert(curitem->qoperator.oper == OP_PHRASE);
 
 		if (!TS_phrase_execute(curitem + curitem->qoperator.left,
-							   checkval, calcnot, &Ldata, chkcond))
+							   checkval, flags, &Ldata, chkcond))
 			return false;
 
-		if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond))
+		if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond))
 			return false;
 
 		/*
 		 * if at least one of the operands has no position information,
-		 * fallback to AND operation.
+		 * then return false. But if TS_EXEC_PHRASE_AS_AND flag is set then
+		 * we return true as it is a AND operation
 		 */
 		if (Ldata.npos == 0 || Rdata.npos == 0)
-			return true;
+			return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
 
 		/*
 		 * Result of the operation is a list of the corresponding positions of
@@ -1498,13 +1499,11 @@ TS_phrase_execute(QueryItem *curitem,
  * chkcond is a callback function used to evaluate each VAL node in the query.
  * checkval can be used to pass information to the callback. TS_execute doesn't
  * do anything with it.
- * if calcnot is false, NOT expressions are always evaluated to be true. This
- * is used in ranking.
  * It believes that ordinary operators are always closier to root than phrase
  * operator, so, TS_execute() may not take care of lexeme's position at all.
  */
 bool
-TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
+TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
    bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
 {
 	/* since this function recurses, it could be driven to stack overflow */
@@ -1517,25 +1516,29 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
 	switch (curitem->qoperator.oper)
 	{
 		case OP_NOT:
-			if (calcnot)
-				return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
+			if (flags & TS_EXEC_CALC_NOT)
+				return !TS_execute(curitem + 1, checkval, flags, chkcond);
 			else
 				return true;
 
 		case OP_AND:
-			if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
-				return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+			if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
+				return TS_execute(curitem + 1, checkval, flags, chkcond);
 			else
 				return false;
 
 		case OP_OR:
-			if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
+			if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
 				return true;
 			else
-				return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+				return TS_execute(curitem + 1, checkval, flags, chkcond);
 
 		case OP_PHRASE:
-			return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond);
+			/*
+			 * do not check TS_EXEC_PHRASE_AS_AND here because chkcond()
+			 * could do something more if it's called from TS_phrase_execute()
+			 */
+			return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond);
 
 		default:
 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
@@ -1633,7 +1636,7 @@ ts_match_vq(PG_FUNCTION_ARGS)
 	result = TS_execute(
 						GETQUERY(query),
 						&chkval,
-						true,
+						TS_EXEC_CALC_NOT,
 						checkcondition_str
 		);
 
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index e16ddaf72f4..e09a9c636f9 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -111,8 +111,25 @@ typedef struct ExecPhraseData
 	WordEntryPos *pos;
 } ExecPhraseData;
 
-extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
+/*
+ * Evaluates tsquery, flags are followe below
+ */
+extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
 		   bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
+
+#define TS_EXEC_EMPTY			(0x00)
+/*
+ * if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true,
+ * used in cases where NOT cannot be accurately computed (GiST) or
+ * it isn't important (ranking)
+ */
+#define TS_EXEC_CALC_NOT		(0x01)
+/*
+ * Treat OP_PHRASE as OP_AND. Used when posiotional information is not
+ * accessible, like in consistent methods of GIN/GiST indexes
+ */
+#define TS_EXEC_PHRASE_AS_AND	(0x02)
+
 extern bool tsquery_requires_match(QueryItem *curitem);
 
 /*
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 2c3aa1a2937..3a13ad985ff 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -1459,13 +1459,14 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
 
 --check OP_PHRASE on index
 create temp table phrase_index_test(fts tsvector);
-insert into phrase_index_test values('A fat cat has just eaten a rat.');
+insert into phrase_index_test values ('A fat cat has just eaten a rat.');
+insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
 create index phrase_index_test_idx on phrase_index_test using gin(fts);
 set enable_seqscan = off;
 select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
-                       fts                       
--------------------------------------------------
- 'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.'
+                fts                
+-----------------------------------
+ 'cat':3 'eaten':6 'fat':2 'rat':8
 (1 row)
 
 set enable_seqscan = on;
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 34b46fa3243..5f3d335fc39 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -482,7 +482,8 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
 
 --check OP_PHRASE on index
 create temp table phrase_index_test(fts tsvector);
-insert into phrase_index_test values('A fat cat has just eaten a rat.');
+insert into phrase_index_test values ('A fat cat has just eaten a rat.');
+insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
 create index phrase_index_test_idx on phrase_index_test using gin(fts);
 set enable_seqscan = off;
 select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
-- 
GitLab