From 1420f3a9827a39e5c6c998f6fa35a6fc97903145 Mon Sep 17 00:00:00 2001
From: Bruce Momjian <bruce@momjian.us>
Date: Mon, 24 Mar 2014 14:36:36 -0400
Subject: [PATCH] Fix ts_rank_cd() to ignore stripped lexemes

Previously, stripped lexemes got a default location and could be
considered if mixed with non-stripped lexemes.

BACKWARD INCOMPATIBILITY CHANGE
---
 doc/src/sgml/textsearch.sgml          | 10 +++++++---
 src/backend/utils/adt/tsrank.c        |  5 +++--
 src/test/regress/expected/tsearch.out | 14 ++++++++++++++
 src/test/regress/sql/tsearch.sql      |  6 ++++++
 4 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index 604e54579de..9e78286bb59 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -889,9 +889,13 @@ SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
        </para>
 
        <para>
-        This function requires positional information in its input.
-        Therefore it will not work on <quote>stripped</> <type>tsvector</>
-        values &mdash; it will always return zero.
+        This function requires lexeme positional information to perform
+        its calculation.  Therefore, it ignores any <quote>stripped</>
+        lexemes in the <type>tsvector</>.  If there are no unstripped
+        lexemes in the input, the result will be zero.  (See <xref
+        linkend="textsearch-manipulate-tsvector"> for more information
+        about the <function>strip</> function and positional information
+        in <type>tsvector</>s.)
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index c9e71c9e21d..dcb681d085f 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -658,8 +658,9 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 			}
 			else
 			{
-				dimt = POSNULL.npos;
-				post = POSNULL.pos;
+				/* ignore words without positions */
+				entry++;
+				continue;
 			}
 
 			while (cur + dimt >= len)
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 9341dbe0d77..d22d3451458 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -596,6 +596,20 @@ S. T. Coleridge (1772-1834)
         0.1
 (1 row)
 
+SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
+                  to_tsquery('both & stripped'));
+ ts_rank_cd 
+------------
+          0
+(1 row)
+
+SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
+                  to_tsquery('unstripped & stripped'));
+ ts_rank_cd 
+------------
+          0
+(1 row)
+
 --headline tests
 SELECT ts_headline('english', '
 Day after day, day after day,
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 9fd12076ace..fa460cd4b9b 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -165,6 +165,12 @@ Water, water, every where,
 S. T. Coleridge (1772-1834)
 '), to_tsquery('english', 'ocean'));
 
+SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
+                  to_tsquery('both & stripped'));
+
+SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
+                  to_tsquery('unstripped & stripped'));
+
 --headline tests
 SELECT ts_headline('english', '
 Day after day, day after day,
-- 
GitLab