diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index e484ddcbdcd3cd56600b3db403ce0c1a3814686b..1aec17efd97f0b4eea55ce5516651923d0cb9f9e 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ --> <chapter id="textsearch"> <title id="textsearch-title">Full Text Search</title> @@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The'); dictionary can be used to overcome linguistic problems, for example, to prevent an English stemmer dictionary from reducing the word 'Paris' to 'pari'. It is enough to have a <literal>Paris paris</literal> line in the - synonym dictionary and put it before the <literal>english_stem</> dictionary: + synonym dictionary and put it before the <literal>english_stem</> + dictionary. For example: <programlisting> SELECT * FROM ts_debug('english', 'Paris'); @@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris'); <productname>PostgreSQL</> installation's shared-data directory). The file format is just one line per word to be substituted, with the word followed by its synonym, - separated by white space. Blank lines and trailing spaces are ignored, - and upper case is folded to lower case. + separated by white space. Blank lines and trailing spaces are ignored. </para> + <para> + The <literal>synonym</> template also has an optional parameter + <literal>CaseSensitive</>, which defaults to <literal>false</>. When + <literal>CaseSensitive</> is <literal>false</>, words in the synonym file + are folded to lower case, as are input tokens. When it is + <literal>true</>, words and tokens are not folded to lower case, + but are compared as-is. + </para> </sect2> <sect2 id="textsearch-thesaurus"> diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c index 16eec98d8b2ba0847460996bfad5dfc7e7e3c7a6..6f263603d7a3900fb02acf3474ba58496fbbeaef 100644 --- a/src/backend/tsearch/dict_synonym.c +++ b/src/backend/tsearch/dict_synonym.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,7 @@ typedef struct { int len; /* length of syn array */ Syn *syn; + bool case_sensitive; } DictSyn; /* @@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS) DictSyn *d; ListCell *l; char *filename = NULL; + bool case_sensitive = false; FILE *fin; char *starti, *starto, @@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS) if (pg_strcasecmp("Synonyms", defel->defname) == 0) filename = defGetString(defel); + else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0) + case_sensitive = defGetBoolean(defel); else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS) } } - d->syn[cur].in = lowerstr(starti); - d->syn[cur].out = lowerstr(starto); + if (case_sensitive) + { + d->syn[cur].in = pstrdup(starti); + d->syn[cur].out = pstrdup(starto); + } + else + { + d->syn[cur].in = lowerstr(starti); + d->syn[cur].out = lowerstr(starto); + } cur++; @@ -168,6 +180,8 @@ skipline: d->len = cur; qsort(d->syn, d->len, sizeof(Syn), compareSyn); + d->case_sensitive = case_sensitive; + PG_RETURN_POINTER(d); } @@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS) if (len <= 0 || d->len <= 0) PG_RETURN_POINTER(NULL); - key.in = lowerstr_with_len(in, len); + if (d->case_sensitive) + key.in = pnstrdup(in, len); + else + key.in = lowerstr_with_len(in, len); + key.out = NULL; found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);