From 41d17e042b58cfcf57bf6ecbfe4b2bea3b178aa0 Mon Sep 17 00:00:00 2001
From: Teodor Sigaev <teodor@sigaev.ru>
Date: Thu, 15 Jan 2009 16:33:59 +0000
Subject: [PATCH] Fix URL generation in headline. Only tag lexeme will be
 replaced by space. Per
 http://archives.postgresql.org/pgsql-bugs/2008-12/msg00013.php

---
 src/backend/tsearch/ts_parse.c    |  4 ++--
 src/backend/tsearch/wparser_def.c | 27 ++++++++++++++++-----------
 src/include/tsearch/ts_public.h   |  5 +++--
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c
index f5239e8a7be..21b723385db 100644
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.10 2009/01/01 17:23:48 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.11 2009/01/15 16:33:59 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -625,7 +625,7 @@ generateHeadline(HeadlineParsedText *prs)
 				*ptr = ' ';
 				ptr++;
 			}
-			else
+			else if (!wrd->skip)
 			{
 				if (wrd->selected)
 				{
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 1943c1143ce..a4143549756 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.19 2009/01/15 16:33:28 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.20 2009/01/15 16:33:59 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1587,10 +1587,11 @@ prsd_end(PG_FUNCTION_ARGS)
 #define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
 #define ENDPUNCTOKEN(x) ( (x)==SPACE )
 
-#define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
-#define HLIDIGNORE(x) ( (x)==URL_T || (x)==TAG_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
-#define XMLHLIDIGNORE(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
-#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDIGNORE(x) )
+#define TS_IDIGNORE(x)  ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
+#define HLIDREPLACE(x)  ( (x)==TAG_T )
+#define HLIDSKIP(x)     ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
+#define XMLHLIDSKIP(x)  ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
+#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
 #define NOENDTOKEN(x)	( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
 
 typedef struct
@@ -1695,13 +1696,15 @@ mark_fragment(HeadlineParsedText *prs, int highlight, int startpos, int endpos)
 			prs->words[i].selected = 1;
 		if (highlight == 0)
 		{
-			if (HLIDIGNORE(prs->words[i].type))
+			if (HLIDREPLACE(prs->words[i].type))
 				prs->words[i].replace = 1;
+			else if ( HLIDSKIP(prs->words[i].type) )
+				prs->words[i].skip = 1;
 		}
 		else
 		{
-			if (XMLHLIDIGNORE(prs->words[i].type))
-				prs->words[i].replace = 1;
+			if (XMLHLIDSKIP(prs->words[i].type))
+				prs->words[i].skip = 1;
 		}
 
 		prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
@@ -2050,13 +2053,15 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
 			prs->words[i].selected = 1;
 		if (highlight == 0)
 		{
-			if (HLIDIGNORE(prs->words[i].type))
+			if (HLIDREPLACE(prs->words[i].type))
 				prs->words[i].replace = 1;
+			else if ( HLIDSKIP(prs->words[i].type) )
+				prs->words[i].skip = 1;
 		}
 		else
 		{
-			if (XMLHLIDIGNORE(prs->words[i].type))
-				prs->words[i].replace = 1;
+			if (XMLHLIDSKIP(prs->words[i].type))
+				prs->words[i].skip = 1;
 		}
 
 		prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index 1ae9a67f5ee..0bcc2be00e1 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1998-2009, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.12 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.13 2009/01/15 16:33:59 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,7 +38,8 @@ typedef struct
 				in:1,
 				replace:1,
 				repeated:1,
-				unused:4,
+				skip:1,
+				unused:3,
 				type:8,
 				len:16;
 	char	   *word;
-- 
GitLab