From d015d08b43cb123edc528012673d8fb7eeb5f5f5 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 27 Oct 2007 16:01:09 +0000
Subject: [PATCH] Rename default text search parser's "uri" token type to
 "url_path", per recommendation from Alvaro.  This doesn't force initdb since
 the numeric token type in the catalogs doesn't change; but note that the
 expected regression test output changed.

---
 doc/src/sgml/textsearch.sgml          | 16 ++++----
 src/backend/snowball/snowball.sql.in  |  7 ++--
 src/backend/tsearch/wparser_def.c     | 54 +++++++++++++--------------
 src/test/regress/expected/tsearch.out |  2 +-
 4 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index 71e29dbfcbf..9cab9a4b46c 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.27 2007/10/27 00:19:45 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.28 2007/10/27 16:01:08 tgl Exp $ -->
 
 <chapter id="textsearch">
  <title id="textsearch-title">Full Text Search</title>
@@ -1815,8 +1815,8 @@ LIMIT 10;
       <entry><literal>example.com</literal></entry>
      </row>
      <row>
-      <entry><literal>uri</></entry>
-      <entry>URI</entry>
+      <entry><literal>url_path</></entry>
+      <entry>URL path</entry>
       <entry><literal>/stuff/index.html</literal>, in the context of a URL</entry>
      </row>
      <row>
@@ -1907,7 +1907,7 @@ SELECT alias, description, token FROM ts_debug('http://example.com/stuff/index.h
  protocol | Protocol head | http://
  url      | URL           | example.com/stuff/index.html
  host     | Host          | example.com
- uri      | URI           | /stuff/index.html
+ url_path | URL path      | /stuff/index.html
 </programlisting>
   </para>
 
@@ -2632,7 +2632,7 @@ ALTER TEXT SEARCH CONFIGURATION pg
 
 <programlisting>
 ALTER TEXT SEARCH CONFIGURATION pg
-    DROP MAPPING FOR email, url, sfloat, uri, float;
+    DROP MAPPING FOR email, url, url_path, sfloat, float;
 </programlisting>
    </para>
 
@@ -2939,7 +2939,7 @@ SELECT * FROM ts_token_type('default');
     15 | numhword        | Hyphenated word, letters and digits
     16 | asciihword      | Hyphenated word, all ASCII
     17 | hword           | Hyphenated word, all letters
-    18 | uri             | URI
+    18 | url_path        | URL path
     19 | file            | File or path name
     20 | float           | Decimal notation
     21 | int             | Signed integer
@@ -3308,8 +3308,8 @@ Parser: "pg_catalog.default"
  numword         | simple
  sfloat          | simple
  uint            | simple
- uri             | simple
  url             | simple
+ url_path        | simple
  version         | simple
  word            | russian_stem
 </programlisting>
@@ -3398,8 +3398,8 @@ Parser: "pg_catalog.default"
  sfloat          | Scientific notation
  tag             | HTML tag
  uint            | Unsigned integer
- uri             | URI
  url             | URL
+ url_path        | URL path
  version         | Version number
  word            | Word, all letters
 (23 rows)
diff --git a/src/backend/snowball/snowball.sql.in b/src/backend/snowball/snowball.sql.in
index 7a32c85edb2..197d94faf1f 100644
--- a/src/backend/snowball/snowball.sql.in
+++ b/src/backend/snowball/snowball.sql.in
@@ -1,4 +1,4 @@
--- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.5 2007/10/23 20:46:12 tgl Exp $$
+-- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.6 2007/10/27 16:01:08 tgl Exp $$
 
 -- text search configuration for _LANGNAME_ language
 CREATE TEXT SEARCH DICTIONARY _DICTNAME_
@@ -12,8 +12,9 @@ CREATE TEXT SEARCH CONFIGURATION _CFGNAME_
 COMMENT ON TEXT SEARCH CONFIGURATION _CFGNAME_ IS 'configuration for _LANGNAME_ language';
 
 ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
-	FOR email, url, host, sfloat, version, uri, file, float, int, uint,
-            numword, hword_numpart, numhword
+	FOR email, url, url_path, host, file, version,
+	    sfloat, float, int, uint,
+	    numword, hword_numpart, numhword
 	WITH simple;
 
 ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index e6df88d9c76..5f8643a61b6 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.4 2007/10/23 20:46:12 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.5 2007/10/27 16:01:08 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -41,7 +41,7 @@
 #define NUMHWORD		15
 #define ASCIIHWORD		16
 #define HWORD			17
-#define URI				18
+#define URLPATH			18
 #define FILEPATH		19
 #define DECIMAL			20
 #define SIGNEDINT		21
@@ -69,7 +69,7 @@ static const char * const tok_alias[] = {
 	"numhword",
 	"asciihword",
 	"hword",
-	"uri",
+	"url_path",
 	"file",
 	"float",
 	"int",
@@ -96,7 +96,7 @@ static const char * const lex_descr[] = {
 	"Hyphenated word, letters and digits",
 	"Hyphenated word, all ASCII",
 	"Hyphenated word, all letters",
-	"URI",
+	"URL path",
 	"File or path name",
 	"Decimal notation",
 	"Signed integer",
@@ -164,9 +164,9 @@ typedef enum
 	TPS_InPathSecond,
 	TPS_InFile,
 	TPS_InFileNext,
-	TPS_InURIFirst,
-	TPS_InURIStart,
-	TPS_InURI,
+	TPS_InURLPathFirst,
+	TPS_InURLPathStart,
+	TPS_InURLPath,
 	TPS_InFURL,
 	TPS_InProtocolFirst,
 	TPS_InProtocolSecond,
@@ -624,7 +624,7 @@ p_ishost(TParser * prs)
 }
 
 static int
-p_isURI(TParser * prs)
+p_isURLPath(TParser * prs)
 {
 	TParser    *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
 	int			res = 0;
@@ -632,7 +632,7 @@ p_isURI(TParser * prs)
 	tmpprs->state = newTParserPosition(tmpprs->state);
 	tmpprs->state->state = TPS_InFileFirst;
 
-	if (TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH))
+	if (TParserGet(tmpprs) && (tmpprs->type == URLPATH || tmpprs->type == FILEPATH))
 	{
 		prs->state->posbyte += tmpprs->lenbytelexeme;
 		prs->state->poschar += tmpprs->lencharlexeme;
@@ -995,7 +995,7 @@ static TParserStateActionItem actionTPS_InHostDomain[] = {
 	{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
 	{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
 	{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
-	{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL},
+	{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
 	{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
 };
@@ -1009,7 +1009,7 @@ static TParserStateActionItem actionTPS_InPortFirst[] = {
 static TParserStateActionItem actionTPS_InPort[] = {
 	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
-	{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL},
+	{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
 	{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
 };
@@ -1042,7 +1042,7 @@ static TParserStateActionItem actionTPS_InFileFirst[] = {
 	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
 	{p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL},
 	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
-	{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
+	{p_iseqC, '?', A_PUSH, TPS_InURLPathFirst, 0, NULL},
 	{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
@@ -1089,7 +1089,7 @@ static TParserStateActionItem actionTPS_InFile[] = {
 	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
 	{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
 	{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
-	{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
+	{p_iseqC, '?', A_PUSH, TPS_InURLPathFirst, 0, NULL},
 	{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
 };
 
@@ -1101,29 +1101,29 @@ static TParserStateActionItem actionTPS_InFileNext[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InURIFirst[] = {
+static TParserStateActionItem actionTPS_InURLPathFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '"', A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '\'', A_POP, TPS_Null, 0, NULL},
-	{p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL},
+	{p_isnotspace, 0, A_CLEAR, TPS_InURLPath, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL},
 };
 
-static TParserStateActionItem actionTPS_InURIStart[] = {
-	{NULL, 0, A_NEXT, TPS_InURI, 0, NULL}
+static TParserStateActionItem actionTPS_InURLPathStart[] = {
+	{NULL, 0, A_NEXT, TPS_InURLPath, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InURI[] = {
-	{p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL},
-	{p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL},
-	{p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL},
-	{p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL},
-	{NULL, 0, A_BINGO, TPS_Base, URI, NULL}
+static TParserStateActionItem actionTPS_InURLPath[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, URLPATH, NULL},
+	{p_iseqC, '"', A_BINGO, TPS_Base, URLPATH, NULL},
+	{p_iseqC, '\'', A_BINGO, TPS_Base, URLPATH, NULL},
+	{p_isnotspace, 0, A_NEXT, TPS_InURLPath, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, URLPATH, NULL}
 };
 
 static TParserStateActionItem actionTPS_InFURL[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
-	{p_isURI, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL},
+	{p_isURLPath, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
@@ -1344,9 +1344,9 @@ static const TParserStateAction Actions[] = {
 	{TPS_InPathSecond, actionTPS_InPathSecond},
 	{TPS_InFile, actionTPS_InFile},
 	{TPS_InFileNext, actionTPS_InFileNext},
-	{TPS_InURIFirst, actionTPS_InURIFirst},
-	{TPS_InURIStart, actionTPS_InURIStart},
-	{TPS_InURI, actionTPS_InURI},
+	{TPS_InURLPathFirst, actionTPS_InURLPathFirst},
+	{TPS_InURLPathStart, actionTPS_InURLPathStart},
+	{TPS_InURLPath, actionTPS_InURLPath},
 	{TPS_InFURL, actionTPS_InFURL},
 	{TPS_InProtocolFirst, actionTPS_InProtocolFirst},
 	{TPS_InProtocolSecond, actionTPS_InProtocolSecond},
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 57a4b39700c..9de79591341 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -227,7 +227,7 @@ SELECT * FROM ts_token_type('default');
     15 | numhword        | Hyphenated word, letters and digits
     16 | asciihword      | Hyphenated word, all ASCII
     17 | hword           | Hyphenated word, all letters
-    18 | uri             | URI
+    18 | url_path        | URL path
     19 | file            | File or path name
     20 | float           | Decimal notation
     21 | int             | Signed integer
-- 
GitLab