From 9728eda7925b6d42621b38c48e91ef9ab8d90cbc Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Wed, 13 Feb 2013 14:07:06 -0500 Subject: [PATCH] Fix contrib/pg_trgm's similarity() function for trigram-free strings. Cases such as similarity('', '') produced a NaN result due to computing 0/0. Per discussion, make it return zero instead. This appears to be the basic cause of bug #7867 from Michele Baravalle, although it remains unclear why her installation doesn't think Cyrillic letters are letters. Back-patch to all active branches. --- contrib/pg_trgm/expected/pg_trgm.out | 6 ++++++ contrib/pg_trgm/sql/pg_trgm.sql | 2 ++ contrib/pg_trgm/trgm_op.c | 8 ++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out index 81d0ca80b20..ea4d5fab061 100644 --- a/contrib/pg_trgm/expected/pg_trgm.out +++ b/contrib/pg_trgm/expected/pg_trgm.out @@ -53,6 +53,12 @@ select similarity('wow',' WOW '); 1 (1 row) +select similarity('---', '####---'); + similarity +------------ + 0 +(1 row) + CREATE TABLE test_trgm(t text); \copy test_trgm from 'data/trgm.data select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t; diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql index 81ab1e79b17..b235ca73577 100644 --- a/contrib/pg_trgm/sql/pg_trgm.sql +++ b/contrib/pg_trgm/sql/pg_trgm.sql @@ -11,6 +11,8 @@ select show_trgm('a b C0*%^'); select similarity('wow','WOWa '); select similarity('wow',' WOW '); +select similarity('---', '####---'); + CREATE TABLE test_trgm(t text); \copy test_trgm from 'data/trgm.data diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c index 87dffd1dd2c..9638dbdb701 100644 --- a/contrib/pg_trgm/trgm_op.c +++ b/contrib/pg_trgm/trgm_op.c @@ -553,6 +553,10 @@ cnt_sml(TRGM *trg1, TRGM *trg2) len1 = ARRNELEM(trg1); len2 = ARRNELEM(trg2); + /* explicit test is needed to avoid 0/0 division when both lengths are 0 */ + if (len1 <= 0 || len2 <= 0) + return (float4) 0.0; + while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2) { int res = CMPTRGM(ptr1, ptr2); @@ -570,9 +574,9 @@ cnt_sml(TRGM *trg1, TRGM *trg2) } #ifdef DIVUNION - return ((((float4) count) / ((float4) (len1 + len2 - count)))); + return ((float4) count) / ((float4) (len1 + len2 - count)); #else - return (((float) count) / ((float) ((len1 > len2) ? len1 : len2))); + return ((float4) count) / ((float4) ((len1 > len2) ? len1 : len2)); #endif } -- GitLab