diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 0ee4be87f099a5f1efd550cbdb020e934c5c3ea2..cf4495e3b8925c15fccd9d2224fe40f7c1cc0ed8 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.4 2007/11/14 02:36:43 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.5 2007/12/02 21:13:34 tgl Exp $ --> <appendix id="contrib"> <title>Additional Supplied Modules</title> @@ -82,6 +82,8 @@ psql -d dbname -f <replaceable>SHAREDIR</>/contrib/<replaceable>module</>.sql &chkpass; &cube; &dblink; + &dict-int; + &dict-xsyn; &earthdistance; &fuzzystrmatch; &hstore; diff --git a/doc/src/sgml/dict-int.sgml b/doc/src/sgml/dict-int.sgml new file mode 100644 index 0000000000000000000000000000000000000000..4da62710fa3e1edd613c8491ee9376847902fa8c --- /dev/null +++ b/doc/src/sgml/dict-int.sgml @@ -0,0 +1,78 @@ +<sect1 id="dict-int"> + <title>dict_int</title> + + <indexterm zone="dict-int"> + <primary>dict_int</primary> + </indexterm> + + <para> + The motivation for this example dictionary is to control the indexing of + integers (signed and unsigned), and, consequently, to minimize the number of + unique words which greatly affect the performance of searching. + </para> + + <sect2> + <title>Configuration</title> + <para> + The dictionary accepts two options: + </para> + + <itemizedlist> + <listitem> + <para> + The MAXLEN parameter specifies the maximum length (number of digits) + allowed in an integer word. The default value is 6. + </para> + </listitem> + <listitem> + <para> + The REJECTLONG parameter specifies if an overlength integer should be + truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns + the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the + dictionary treats an overlength integer as a stop word, so that it will + not be indexed. + </para> + </listitem> + </itemizedlist> + </sect2> + + <sect2> + <title>Usage</title> + + <para> + Running the installation script creates a text search template + <literal>intdict_template</> and a dictionary <literal>intdict</> + based on it, with the default parameters. You can alter the + parameters, for example + +<programlisting> +mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true); +ALTER TEXT SEARCH DICTIONARY +</programlisting> + + or create new dictionaries based on the template. + </para> + + <para> + To test the dictionary, you can try + +<programlisting> +mydb# select ts_lexize('intdict', '12345678'); + ts_lexize +----------- + {123456} +</programlisting> + + but real-world usage will involve including it in a text search + configuration as described in <xref linkend="textsearch">. + That might look like this: + +<programlisting> +ALTER TEXT SEARCH CONFIGURATION english + ALTER MAPPING FOR int, uint WITH intdict; +</programlisting> + + </para> + </sect2> + +</sect1> diff --git a/doc/src/sgml/dict-xsyn.sgml b/doc/src/sgml/dict-xsyn.sgml new file mode 100644 index 0000000000000000000000000000000000000000..8126075ed65a7647695709f2cb782681c6efb041 --- /dev/null +++ b/doc/src/sgml/dict-xsyn.sgml @@ -0,0 +1,78 @@ +<sect1 id="dict-xsyn"> + <title>dict_xsyn</title> + + <indexterm zone="dict-xsyn"> + <primary>dict_xsyn</primary> + </indexterm> + + <para> + The Extended Synonym Dictionary module replaces words with groups of their + synonyms, and so makes it possible to search for a word using any of its + synonyms. + </para> + + <sect2> + <title>Configuration</title> + <para> + A <literal>dict_xsyn</> dictionary accepts the following options: + </para> + <itemizedlist> + <listitem> + <para> + KEEPORIG controls whether the original word is included, or only its + synonyms. Default is 'true'. + </para> + </listitem> + <listitem> + <para> + RULES is the base name of the file containing the list of synonyms. + This file must be in $(prefix)/share/tsearch_data/, and its name must + end in ".rules" (which is not included in the RULES parameter). + </para> + </listitem> + </itemizedlist> + <para> + The rules file has the following format: + </para> + <itemizedlist> + <listitem> + <para> + Each line represents a group of synonyms for a single word, which is + given first on the line. Synonyms are separated by whitespace: + </para> + <programlisting> +word syn1 syn2 syn3 + </programlisting> + </listitem> + <listitem> + <para> + Sharp ('#') sign is a comment delimiter. It may appear at any position + inside the line. The rest of the line will be skipped. + </para> + </listitem> + </itemizedlist> + + <para> + Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/, + for an example. + </para> + </sect2> + + <sect2> + <title>Usage</title> + <programlisting> +mydb=# SELECT ts_lexize('xsyn','word'); +ts_lexize +---------------- +{word,syn1,syn2,syn3) + </programlisting> + <para> + Change dictionary options: + </para> + <programlisting> +mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false); +ALTER TEXT SEARCH DICTIONARY + </programlisting> + </sect2> + +</sect1> diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 6857e8dda7d1122c556373d9fb9a30e835bb0737..1a5064660bcf6da5d6c5c43d9f408e5d9410b3c0 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.53 2007/11/14 01:09:50 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.54 2007/12/02 21:13:34 tgl Exp $ --> <!entity history SYSTEM "history.sgml"> <!entity info SYSTEM "info.sgml"> @@ -96,6 +96,8 @@ <!entity chkpass SYSTEM "chkpass.sgml"> <!entity cube SYSTEM "cube.sgml"> <!entity dblink SYSTEM "dblink.sgml"> +<!entity dict-int SYSTEM "dict-int.sgml"> +<!entity dict-xsyn SYSTEM "dict-xsyn.sgml"> <!entity earthdistance SYSTEM "earthdistance.sgml"> <!entity fuzzystrmatch SYSTEM "fuzzystrmatch.sgml"> <!entity hstore SYSTEM "hstore.sgml">