From d015d08b43cb123edc528012673d8fb7eeb5f5f5 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Sat, 27 Oct 2007 16:01:09 +0000 Subject: [PATCH] Rename default text search parser's "uri" token type to "url_path", per recommendation from Alvaro. This doesn't force initdb since the numeric token type in the catalogs doesn't change; but note that the expected regression test output changed. --- doc/src/sgml/textsearch.sgml | 16 ++++---- src/backend/snowball/snowball.sql.in | 7 ++-- src/backend/tsearch/wparser_def.c | 54 +++++++++++++-------------- src/test/regress/expected/tsearch.out | 2 +- 4 files changed, 40 insertions(+), 39 deletions(-) diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 71e29dbfcbf..9cab9a4b46c 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.27 2007/10/27 00:19:45 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.28 2007/10/27 16:01:08 tgl Exp $ --> <chapter id="textsearch"> <title id="textsearch-title">Full Text Search</title> @@ -1815,8 +1815,8 @@ LIMIT 10; <entry><literal>example.com</literal></entry> </row> <row> - <entry><literal>uri</></entry> - <entry>URI</entry> + <entry><literal>url_path</></entry> + <entry>URL path</entry> <entry><literal>/stuff/index.html</literal>, in the context of a URL</entry> </row> <row> @@ -1907,7 +1907,7 @@ SELECT alias, description, token FROM ts_debug('http://example.com/stuff/index.h protocol | Protocol head | http:// url | URL | example.com/stuff/index.html host | Host | example.com - uri | URI | /stuff/index.html + url_path | URL path | /stuff/index.html </programlisting> </para> @@ -2632,7 +2632,7 @@ ALTER TEXT SEARCH CONFIGURATION pg <programlisting> ALTER TEXT SEARCH CONFIGURATION pg - DROP MAPPING FOR email, url, sfloat, uri, float; + DROP MAPPING FOR email, url, url_path, sfloat, float; </programlisting> </para> @@ -2939,7 +2939,7 @@ SELECT * FROM ts_token_type('default'); 15 | numhword | Hyphenated word, letters and digits 16 | asciihword | Hyphenated word, all ASCII 17 | hword | Hyphenated word, all letters - 18 | uri | URI + 18 | url_path | URL path 19 | file | File or path name 20 | float | Decimal notation 21 | int | Signed integer @@ -3308,8 +3308,8 @@ Parser: "pg_catalog.default" numword | simple sfloat | simple uint | simple - uri | simple url | simple + url_path | simple version | simple word | russian_stem </programlisting> @@ -3398,8 +3398,8 @@ Parser: "pg_catalog.default" sfloat | Scientific notation tag | HTML tag uint | Unsigned integer - uri | URI url | URL + url_path | URL path version | Version number word | Word, all letters (23 rows) diff --git a/src/backend/snowball/snowball.sql.in b/src/backend/snowball/snowball.sql.in index 7a32c85edb2..197d94faf1f 100644 --- a/src/backend/snowball/snowball.sql.in +++ b/src/backend/snowball/snowball.sql.in @@ -1,4 +1,4 @@ --- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.5 2007/10/23 20:46:12 tgl Exp $$ +-- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.6 2007/10/27 16:01:08 tgl Exp $$ -- text search configuration for _LANGNAME_ language CREATE TEXT SEARCH DICTIONARY _DICTNAME_ @@ -12,8 +12,9 @@ CREATE TEXT SEARCH CONFIGURATION _CFGNAME_ COMMENT ON TEXT SEARCH CONFIGURATION _CFGNAME_ IS 'configuration for _LANGNAME_ language'; ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING - FOR email, url, host, sfloat, version, uri, file, float, int, uint, - numword, hword_numpart, numhword + FOR email, url, url_path, host, file, version, + sfloat, float, int, uint, + numword, hword_numpart, numhword WITH simple; ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index e6df88d9c76..5f8643a61b6 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.4 2007/10/23 20:46:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.5 2007/10/27 16:01:08 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -41,7 +41,7 @@ #define NUMHWORD 15 #define ASCIIHWORD 16 #define HWORD 17 -#define URI 18 +#define URLPATH 18 #define FILEPATH 19 #define DECIMAL 20 #define SIGNEDINT 21 @@ -69,7 +69,7 @@ static const char * const tok_alias[] = { "numhword", "asciihword", "hword", - "uri", + "url_path", "file", "float", "int", @@ -96,7 +96,7 @@ static const char * const lex_descr[] = { "Hyphenated word, letters and digits", "Hyphenated word, all ASCII", "Hyphenated word, all letters", - "URI", + "URL path", "File or path name", "Decimal notation", "Signed integer", @@ -164,9 +164,9 @@ typedef enum TPS_InPathSecond, TPS_InFile, TPS_InFileNext, - TPS_InURIFirst, - TPS_InURIStart, - TPS_InURI, + TPS_InURLPathFirst, + TPS_InURLPathStart, + TPS_InURLPath, TPS_InFURL, TPS_InProtocolFirst, TPS_InProtocolSecond, @@ -624,7 +624,7 @@ p_ishost(TParser * prs) } static int -p_isURI(TParser * prs) +p_isURLPath(TParser * prs) { TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte); int res = 0; @@ -632,7 +632,7 @@ p_isURI(TParser * prs) tmpprs->state = newTParserPosition(tmpprs->state); tmpprs->state->state = TPS_InFileFirst; - if (TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH)) + if (TParserGet(tmpprs) && (tmpprs->type == URLPATH || tmpprs->type == FILEPATH)) { prs->state->posbyte += tmpprs->lenbytelexeme; prs->state->poschar += tmpprs->lencharlexeme; @@ -995,7 +995,7 @@ static TParserStateActionItem actionTPS_InHostDomain[] = { {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, {p_isdigit, 0, A_POP, TPS_Null, 0, NULL}, - {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL}, + {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL}, {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL} }; @@ -1009,7 +1009,7 @@ static TParserStateActionItem actionTPS_InPortFirst[] = { static TParserStateActionItem actionTPS_InPort[] = { {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}, {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, - {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL}, + {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL}, {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL} }; @@ -1042,7 +1042,7 @@ static TParserStateActionItem actionTPS_InFileFirst[] = { {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, {p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL}, {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, - {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InURLPathFirst, 0, NULL}, {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; @@ -1089,7 +1089,7 @@ static TParserStateActionItem actionTPS_InFile[] = { {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, {p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL}, {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, - {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InURLPathFirst, 0, NULL}, {NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL} }; @@ -1101,29 +1101,29 @@ static TParserStateActionItem actionTPS_InFileNext[] = { {NULL, 0, A_POP, TPS_Null, 0, NULL} }; -static TParserStateActionItem actionTPS_InURIFirst[] = { +static TParserStateActionItem actionTPS_InURLPathFirst[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, {p_iseqC, '"', A_POP, TPS_Null, 0, NULL}, {p_iseqC, '\'', A_POP, TPS_Null, 0, NULL}, - {p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL}, + {p_isnotspace, 0, A_CLEAR, TPS_InURLPath, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL}, }; -static TParserStateActionItem actionTPS_InURIStart[] = { - {NULL, 0, A_NEXT, TPS_InURI, 0, NULL} +static TParserStateActionItem actionTPS_InURLPathStart[] = { + {NULL, 0, A_NEXT, TPS_InURLPath, 0, NULL} }; -static TParserStateActionItem actionTPS_InURI[] = { - {p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL}, - {p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL}, - {p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL}, - {p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL}, - {NULL, 0, A_BINGO, TPS_Base, URI, NULL} +static TParserStateActionItem actionTPS_InURLPath[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, URLPATH, NULL}, + {p_iseqC, '"', A_BINGO, TPS_Base, URLPATH, NULL}, + {p_iseqC, '\'', A_BINGO, TPS_Base, URLPATH, NULL}, + {p_isnotspace, 0, A_NEXT, TPS_InURLPath, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, URLPATH, NULL} }; static TParserStateActionItem actionTPS_InFURL[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, - {p_isURI, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL}, + {p_isURLPath, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; @@ -1344,9 +1344,9 @@ static const TParserStateAction Actions[] = { {TPS_InPathSecond, actionTPS_InPathSecond}, {TPS_InFile, actionTPS_InFile}, {TPS_InFileNext, actionTPS_InFileNext}, - {TPS_InURIFirst, actionTPS_InURIFirst}, - {TPS_InURIStart, actionTPS_InURIStart}, - {TPS_InURI, actionTPS_InURI}, + {TPS_InURLPathFirst, actionTPS_InURLPathFirst}, + {TPS_InURLPathStart, actionTPS_InURLPathStart}, + {TPS_InURLPath, actionTPS_InURLPath}, {TPS_InFURL, actionTPS_InFURL}, {TPS_InProtocolFirst, actionTPS_InProtocolFirst}, {TPS_InProtocolSecond, actionTPS_InProtocolSecond}, diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 57a4b39700c..9de79591341 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -227,7 +227,7 @@ SELECT * FROM ts_token_type('default'); 15 | numhword | Hyphenated word, letters and digits 16 | asciihword | Hyphenated word, all ASCII 17 | hword | Hyphenated word, all letters - 18 | uri | URI + 18 | url_path | URL path 19 | file | File or path name 20 | float | Decimal notation 21 | int | Signed integer -- GitLab