From c7faf451608a08e1aa65951bb02fac7f524f1d7c Mon Sep 17 00:00:00 2001
From: Teodor Sigaev <teodor@sigaev.ru>
Date: Fri, 2 Jun 2006 15:35:42 +0000
Subject: [PATCH] Add more strict check of stop and non-recognized words, allow
 only recognized words in thezaurus configuration file.

---
 contrib/tsearch2/dict_thesaurus.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/contrib/tsearch2/dict_thesaurus.c b/contrib/tsearch2/dict_thesaurus.c
index 8e543a4db71..a584aa15b72 100644
--- a/contrib/tsearch2/dict_thesaurus.c
+++ b/contrib/tsearch2/dict_thesaurus.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.1 2006/05/31 14:05:31 teodor Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.2 2006/06/02 15:35:42 teodor Exp $ */
 
 /*
  * thesaurus
@@ -330,8 +330,12 @@ compileTheLexeme(DictThesaurus *d) {
 			);
 
 		if ( !(ptr && ptr->lexeme) ) {
+			if ( !ptr )
+				elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary", d->wrds[i].lexeme);
+			else
+				elog(ERROR,"Thesaurus: word '%s' is recognized as stop-word, assign any stop-word", d->wrds[i].lexeme);
+
 			newwrds = addCompiledLexeme( newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
-			elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary or it's a stop-word, assign any non-recognized word", d->wrds[i].lexeme);
 		} else {
 			while( ptr->lexeme ) {
 				TSLexeme	*remptr = ptr+1;
@@ -420,7 +424,7 @@ compileTheSubstitute(DictThesaurus *d) {
 			);
 
 			reml = lexized;
-			if ( lexized ) {
+			if ( lexized && lexized->lexeme ) {
 				int toset = (lexized->lexeme && outptr != d->subst[i].res ) ? (outptr - d->subst[i].res)  : -1;
 
 				while( lexized->lexeme ) {
@@ -443,6 +447,8 @@ compileTheSubstitute(DictThesaurus *d) {
 
 				if ( toset > 0)
 					d->subst[i].res[toset].flags |= TSL_ADDPOS;
+			} else {
+				elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary or it's a stop-word, ignored", inptr->lexeme);
 			}
 
 			if ( inptr->lexeme )
@@ -450,6 +456,9 @@ compileTheSubstitute(DictThesaurus *d) {
 			inptr++;
 		}
 
+		if ( outptr == d->subst[i].res )
+			elog(ERROR,"Thesaurus: all words in subsitution aren't recognized by subdictionary");
+
 		d->subst[i].reslen = outptr - d->subst[i].res;
 
 		free(rem);
-- 
GitLab