From 1420f3a9827a39e5c6c998f6fa35a6fc97903145 Mon Sep 17 00:00:00 2001 From: Bruce Momjian <bruce@momjian.us> Date: Mon, 24 Mar 2014 14:36:36 -0400 Subject: [PATCH] Fix ts_rank_cd() to ignore stripped lexemes Previously, stripped lexemes got a default location and could be considered if mixed with non-stripped lexemes. BACKWARD INCOMPATIBILITY CHANGE --- doc/src/sgml/textsearch.sgml | 10 +++++++--- src/backend/utils/adt/tsrank.c | 5 +++-- src/test/regress/expected/tsearch.out | 14 ++++++++++++++ src/test/regress/sql/tsearch.sql | 6 ++++++ 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 604e54579de..9e78286bb59 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -889,9 +889,13 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C'); </para> <para> - This function requires positional information in its input. - Therefore it will not work on <quote>stripped</> <type>tsvector</> - values — it will always return zero. + This function requires lexeme positional information to perform + its calculation. Therefore, it ignores any <quote>stripped</> + lexemes in the <type>tsvector</>. If there are no unstripped + lexemes in the input, the result will be zero. (See <xref + linkend="textsearch-manipulate-tsvector"> for more information + about the <function>strip</> function and positional information + in <type>tsvector</>s.) </para> </listitem> </varlistentry> diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index c9e71c9e21d..dcb681d085f 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -658,8 +658,9 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) } else { - dimt = POSNULL.npos; - post = POSNULL.pos; + /* ignore words without positions */ + entry++; + continue; } while (cur + dimt >= len) diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 9341dbe0d77..d22d3451458 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -596,6 +596,20 @@ S. T. Coleridge (1772-1834) 0.1 (1 row) +SELECT ts_rank_cd(strip(to_tsvector('both stripped')), + to_tsquery('both & stripped')); + ts_rank_cd +------------ + 0 +(1 row) + +SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')), + to_tsquery('unstripped & stripped')); + ts_rank_cd +------------ + 0 +(1 row) + --headline tests SELECT ts_headline('english', ' Day after day, day after day, diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index 9fd12076ace..fa460cd4b9b 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -165,6 +165,12 @@ Water, water, every where, S. T. Coleridge (1772-1834) '), to_tsquery('english', 'ocean')); +SELECT ts_rank_cd(strip(to_tsvector('both stripped')), + to_tsquery('both & stripped')); + +SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')), + to_tsquery('unstripped & stripped')); + --headline tests SELECT ts_headline('english', ' Day after day, day after day, -- GitLab