From 1aaf39bd20122984bafa30bf4ec8ba357b59e955 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 27 Oct 2007 17:53:15 +0000
Subject: [PATCH] Add some rudimentary tracing code to the default text search
 parser, to help in debugging its state-machine rules.  Const-ify all the
 constant tables. Minor other code cleanup, including using "token" rather
 than "lexeme" to describe the output strings.

---
 src/backend/tsearch/wparser_def.c | 496 ++++++++++++++++--------------
 1 file changed, 272 insertions(+), 224 deletions(-)

diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 5f8643a61b6..7fa0f435b20 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.5 2007/10/27 16:01:08 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.6 2007/10/27 17:53:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,10 @@
 #include "utils/builtins.h"
 
 
+/* Define me to enable tracing of parser behavior */
+/* #define WPARSER_TRACE */
+
+
 /* Output token categories */
 
 #define ASCIIWORD		1
@@ -221,23 +225,16 @@ typedef struct
 #define A_MERGE		0x0020
 #define A_CLRALL	0x0040
 
-typedef struct
-{
-	TParserState state;
-	TParserStateActionItem *action;
-} TParserStateAction;
-
 typedef struct TParserPosition
 {
 	int			posbyte;		/* position of parser in bytes */
-	int			poschar;		/* osition of parser in characters */
+	int			poschar;		/* position of parser in characters */
 	int			charlen;		/* length of current char */
-	int			lenbytelexeme;
-	int			lencharlexeme;
+	int			lenbytetoken;	/* length of token-so-far in bytes */
+	int			lenchartoken;	/* and in chars */
 	TParserState state;
 	struct TParserPosition *prev;
-	int			flags;
-	TParserStateActionItem *pushedAtAction;
+	const TParserStateActionItem *pushedAtAction;
 } TParserPosition;
 
 typedef struct TParser
@@ -261,11 +258,10 @@ typedef struct TParser
 	char		c;
 
 	/* out */
-	char	   *lexeme;
-	int			lenbytelexeme;
-	int			lencharlexeme;
+	char	   *token;
+	int			lenbytetoken;
+	int			lenchartoken;
 	int			type;
-
 } TParser;
 
 
@@ -318,6 +314,10 @@ TParserInit(char *str, int len)
 	prs->state = newTParserPosition(NULL);
 	prs->state->state = TPS_Base;
 
+#ifdef WPARSER_TRACE
+	fprintf(stderr, "parsing \"%.*s\"\n", len, str);
+#endif
+
 	return prs;
 }
 
@@ -541,20 +541,20 @@ _make_compiler_happy(void)
 static void
 SpecialTags(TParser * prs)
 {
-	switch (prs->state->lencharlexeme)
+	switch (prs->state->lenchartoken)
 	{
 		case 8:			/* </script */
-			if (pg_strncasecmp(prs->lexeme, "</script", 8) == 0)
+			if (pg_strncasecmp(prs->token, "</script", 8) == 0)
 				prs->ignore = false;
 			break;
 		case 7:			/* <script || </style */
-			if (pg_strncasecmp(prs->lexeme, "</style", 7) == 0)
+			if (pg_strncasecmp(prs->token, "</style", 7) == 0)
 				prs->ignore = false;
-			else if (pg_strncasecmp(prs->lexeme, "<script", 7) == 0)
+			else if (pg_strncasecmp(prs->token, "<script", 7) == 0)
 				prs->ignore = true;
 			break;
 		case 6:			/* <style */
-			if (pg_strncasecmp(prs->lexeme, "<style", 6) == 0)
+			if (pg_strncasecmp(prs->token, "<style", 6) == 0)
 				prs->ignore = true;
 			break;
 		default:
@@ -566,24 +566,24 @@ static void
 SpecialFURL(TParser * prs)
 {
 	prs->wanthost = true;
-	prs->state->posbyte -= prs->state->lenbytelexeme;
-	prs->state->poschar -= prs->state->lencharlexeme;
+	prs->state->posbyte -= prs->state->lenbytetoken;
+	prs->state->poschar -= prs->state->lenchartoken;
 }
 
 static void
 SpecialHyphen(TParser * prs)
 {
-	prs->state->posbyte -= prs->state->lenbytelexeme;
-	prs->state->poschar -= prs->state->lencharlexeme;
+	prs->state->posbyte -= prs->state->lenbytetoken;
+	prs->state->poschar -= prs->state->lenchartoken;
 }
 
 static void
 SpecialVerVersion(TParser * prs)
 {
-	prs->state->posbyte -= prs->state->lenbytelexeme;
-	prs->state->poschar -= prs->state->lencharlexeme;
-	prs->state->lenbytelexeme = 0;
-	prs->state->lencharlexeme = 0;
+	prs->state->posbyte -= prs->state->lenbytetoken;
+	prs->state->poschar -= prs->state->lenchartoken;
+	prs->state->lenbytetoken = 0;
+	prs->state->lenchartoken = 0;
 }
 
 static int
@@ -611,10 +611,10 @@ p_ishost(TParser * prs)
 
 	if (TParserGet(tmpprs) && tmpprs->type == HOST)
 	{
-		prs->state->posbyte += tmpprs->lenbytelexeme;
-		prs->state->poschar += tmpprs->lencharlexeme;
-		prs->state->lenbytelexeme += tmpprs->lenbytelexeme;
-		prs->state->lencharlexeme += tmpprs->lencharlexeme;
+		prs->state->posbyte += tmpprs->lenbytetoken;
+		prs->state->poschar += tmpprs->lenchartoken;
+		prs->state->lenbytetoken += tmpprs->lenbytetoken;
+		prs->state->lenchartoken += tmpprs->lenchartoken;
 		prs->state->charlen = tmpprs->state->charlen;
 		res = 1;
 	}
@@ -634,10 +634,10 @@ p_isURLPath(TParser * prs)
 
 	if (TParserGet(tmpprs) && (tmpprs->type == URLPATH || tmpprs->type == FILEPATH))
 	{
-		prs->state->posbyte += tmpprs->lenbytelexeme;
-		prs->state->poschar += tmpprs->lencharlexeme;
-		prs->state->lenbytelexeme += tmpprs->lenbytelexeme;
-		prs->state->lencharlexeme += tmpprs->lencharlexeme;
+		prs->state->posbyte += tmpprs->lenbytetoken;
+		prs->state->poschar += tmpprs->lenchartoken;
+		prs->state->lenbytetoken += tmpprs->lenbytetoken;
+		prs->state->lenchartoken += tmpprs->lenchartoken;
 		prs->state->charlen = tmpprs->state->charlen;
 		res = 1;
 	}
@@ -650,7 +650,7 @@ p_isURLPath(TParser * prs)
  * Table of state/action of parser
  */
 
-static TParserStateActionItem actionTPS_Base[] = {
+static const TParserStateActionItem actionTPS_Base[] = {
 	{p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL},
 	{p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL},
 	{p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL},
@@ -667,7 +667,7 @@ static TParserStateActionItem actionTPS_Base[] = {
 };
 
 
-static TParserStateActionItem actionTPS_InNumWord[] = {
+static const TParserStateActionItem actionTPS_InNumWord[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, NUMWORD, NULL},
 	{p_isalnum, 0, A_NEXT, TPS_InNumWord, 0, NULL},
 	{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
@@ -677,7 +677,7 @@ static TParserStateActionItem actionTPS_InNumWord[] = {
 	{NULL, 0, A_BINGO, TPS_Base, NUMWORD, NULL}
 };
 
-static TParserStateActionItem actionTPS_InAsciiWord[] = {
+static const TParserStateActionItem actionTPS_InAsciiWord[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
@@ -693,7 +693,7 @@ static TParserStateActionItem actionTPS_InAsciiWord[] = {
 	{NULL, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL}
 };
 
-static TParserStateActionItem actionTPS_InWord[] = {
+static const TParserStateActionItem actionTPS_InWord[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, WORD_T, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
@@ -701,7 +701,7 @@ static TParserStateActionItem actionTPS_InWord[] = {
 	{NULL, 0, A_BINGO, TPS_Base, WORD_T, NULL}
 };
 
-static TParserStateActionItem actionTPS_InUnsignedInt[] = {
+static const TParserStateActionItem actionTPS_InUnsignedInt[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
@@ -714,13 +714,13 @@ static TParserStateActionItem actionTPS_InUnsignedInt[] = {
 	{NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
 };
 
-static TParserStateActionItem actionTPS_InSignedIntFirst[] = {
+static const TParserStateActionItem actionTPS_InSignedIntFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT | A_CLEAR, TPS_InSignedInt, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InSignedInt[] = {
+static const TParserStateActionItem actionTPS_InSignedInt[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL},
@@ -729,7 +729,7 @@ static TParserStateActionItem actionTPS_InSignedInt[] = {
 	{NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}
 };
 
-static TParserStateActionItem actionTPS_InSpace[] = {
+static const TParserStateActionItem actionTPS_InSpace[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL},
 	{p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL},
 	{p_isignore, 0, A_NEXT, TPS_Null, 0, NULL},
@@ -741,13 +741,13 @@ static TParserStateActionItem actionTPS_InSpace[] = {
 	{NULL, 0, A_BINGO, TPS_Base, SPACE, NULL}
 };
 
-static TParserStateActionItem actionTPS_InUDecimalFirst[] = {
+static const TParserStateActionItem actionTPS_InUDecimalFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InUDecimal[] = {
+static const TParserStateActionItem actionTPS_InUDecimal[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
@@ -756,13 +756,13 @@ static TParserStateActionItem actionTPS_InUDecimal[] = {
 	{NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
 };
 
-static TParserStateActionItem actionTPS_InDecimalFirst[] = {
+static const TParserStateActionItem actionTPS_InDecimalFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InDecimal[] = {
+static const TParserStateActionItem actionTPS_InDecimal[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL},
@@ -771,33 +771,33 @@ static TParserStateActionItem actionTPS_InDecimal[] = {
 	{NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
 };
 
-static TParserStateActionItem actionTPS_InVerVersion[] = {
+static const TParserStateActionItem actionTPS_InVerVersion[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_RERUN, TPS_InSVerVersion, 0, SpecialVerVersion},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InSVerVersion[] = {
+static const TParserStateActionItem actionTPS_InSVerVersion[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_BINGO | A_CLRALL, TPS_InUnsignedInt, SPACE, NULL},
 	{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
 };
 
 
-static TParserStateActionItem actionTPS_InVersionFirst[] = {
+static const TParserStateActionItem actionTPS_InVersionFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InVersion[] = {
+static const TParserStateActionItem actionTPS_InVersion[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
 	{NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}
 };
 
-static TParserStateActionItem actionTPS_InMantissaFirst[] = {
+static const TParserStateActionItem actionTPS_InMantissaFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
 	{p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL},
@@ -805,50 +805,50 @@ static TParserStateActionItem actionTPS_InMantissaFirst[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InMantissaSign[] = {
+static const TParserStateActionItem actionTPS_InMantissaSign[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InMantissa[] = {
+static const TParserStateActionItem actionTPS_InMantissa[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL},
 	{NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHTMLEntityFirst[] = {
+static const TParserStateActionItem actionTPS_InHTMLEntityFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHTMLEntity[] = {
+static const TParserStateActionItem actionTPS_InHTMLEntity[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
 	{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
+static const TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
+static const TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
 	{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHTMLEntityEnd[] = {
+static const TParserStateActionItem actionTPS_InHTMLEntityEnd[] = {
 	{NULL, 0, A_BINGO | A_CLEAR, TPS_Base, HTMLENTITY, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTagFirst[] = {
+static const TParserStateActionItem actionTPS_InTagFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL},
 	{p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL},
@@ -857,7 +857,7 @@ static TParserStateActionItem actionTPS_InTagFirst[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InXMLBegin[] = {
+static const TParserStateActionItem actionTPS_InXMLBegin[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	/* <?xml ... */
 	{p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL},
@@ -865,13 +865,13 @@ static TParserStateActionItem actionTPS_InXMLBegin[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTagCloseFirst[] = {
+static const TParserStateActionItem actionTPS_InTagCloseFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InTagName, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTagName[] = {
+static const TParserStateActionItem actionTPS_InTagName[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	/* <br/> case */
 	{p_iseqC, '/', A_NEXT, TPS_InTagBeginEnd, 0, NULL},
@@ -881,13 +881,13 @@ static TParserStateActionItem actionTPS_InTagName[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTagBeginEnd[] = {
+static const TParserStateActionItem actionTPS_InTagBeginEnd[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTag[] = {
+static const TParserStateActionItem actionTPS_InTag[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags},
 	{p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL},
@@ -908,30 +908,30 @@ static TParserStateActionItem actionTPS_InTag[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTagEscapeK[] = {
+static const TParserStateActionItem actionTPS_InTagEscapeK[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
 	{p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL},
 	{NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTagEscapeKK[] = {
+static const TParserStateActionItem actionTPS_InTagEscapeKK[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
 	{p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL},
 	{NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTagBackSleshed[] = {
+static const TParserStateActionItem actionTPS_InTagBackSleshed[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{NULL, 0, A_MERGE, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InTagEnd[] = {
+static const TParserStateActionItem actionTPS_InTagEnd[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
 };
 
-static TParserStateActionItem actionTPS_InCommentFirst[] = {
+static const TParserStateActionItem actionTPS_InCommentFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
 	/* <!DOCTYPE ...> */
@@ -940,43 +940,43 @@ static TParserStateActionItem actionTPS_InCommentFirst[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InCommentLast[] = {
+static const TParserStateActionItem actionTPS_InCommentLast[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InComment[] = {
+static const TParserStateActionItem actionTPS_InComment[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst, 0, NULL},
 	{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InCloseCommentFirst[] = {
+static const TParserStateActionItem actionTPS_InCloseCommentFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL},
 	{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InCloseCommentLast[] = {
+static const TParserStateActionItem actionTPS_InCloseCommentLast[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
 	{p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL},
 	{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InCommentEnd[] = {
+static const TParserStateActionItem actionTPS_InCommentEnd[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHostFirstDomain[] = {
+static const TParserStateActionItem actionTPS_InHostFirstDomain[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHostDomainSecond, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHostDomainSecond[] = {
+static const TParserStateActionItem actionTPS_InHostDomainSecond[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHostDomain, 0, NULL},
 	{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
@@ -986,7 +986,7 @@ static TParserStateActionItem actionTPS_InHostDomainSecond[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHostDomain[] = {
+static const TParserStateActionItem actionTPS_InHostDomain[] = {
 	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHostDomain, 0, NULL},
 	{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
@@ -1000,13 +1000,13 @@ static TParserStateActionItem actionTPS_InHostDomain[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
 };
 
-static TParserStateActionItem actionTPS_InPortFirst[] = {
+static const TParserStateActionItem actionTPS_InPortFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InPort[] = {
+static const TParserStateActionItem actionTPS_InPort[] = {
 	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
 	{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
@@ -1014,14 +1014,14 @@ static TParserStateActionItem actionTPS_InPort[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHostFirstAN[] = {
+static const TParserStateActionItem actionTPS_InHostFirstAN[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHost[] = {
+static const TParserStateActionItem actionTPS_InHost[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
@@ -1031,12 +1031,12 @@ static TParserStateActionItem actionTPS_InHost[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InEmail[] = {
+static const TParserStateActionItem actionTPS_InEmail[] = {
 	{p_ishost, 0, A_BINGO | A_CLRALL, TPS_Base, EMAIL, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InFileFirst[] = {
+static const TParserStateActionItem actionTPS_InFileFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
@@ -1047,7 +1047,7 @@ static TParserStateActionItem actionTPS_InFileFirst[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InFileTwiddle[] = {
+static const TParserStateActionItem actionTPS_InFileTwiddle[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
@@ -1056,7 +1056,7 @@ static TParserStateActionItem actionTPS_InFileTwiddle[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InPathFirst[] = {
+static const TParserStateActionItem actionTPS_InPathFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
@@ -1066,14 +1066,14 @@ static TParserStateActionItem actionTPS_InPathFirst[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InPathFirstFirst[] = {
+static const TParserStateActionItem actionTPS_InPathFirstFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
 	{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InPathSecond[] = {
+static const TParserStateActionItem actionTPS_InPathSecond[] = {
 	{p_isEOF, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
 	{p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
 	{p_iseqC, '/', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
@@ -1081,7 +1081,7 @@ static TParserStateActionItem actionTPS_InPathSecond[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InFile[] = {
+static const TParserStateActionItem actionTPS_InFile[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
@@ -1093,7 +1093,7 @@ static TParserStateActionItem actionTPS_InFile[] = {
 	{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
 };
 
-static TParserStateActionItem actionTPS_InFileNext[] = {
+static const TParserStateActionItem actionTPS_InFileNext[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_CLEAR, TPS_InFile, 0, NULL},
 	{p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
@@ -1101,7 +1101,7 @@ static TParserStateActionItem actionTPS_InFileNext[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InURLPathFirst[] = {
+static const TParserStateActionItem actionTPS_InURLPathFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '"', A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '\'', A_POP, TPS_Null, 0, NULL},
@@ -1109,11 +1109,11 @@ static TParserStateActionItem actionTPS_InURLPathFirst[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL},
 };
 
-static TParserStateActionItem actionTPS_InURLPathStart[] = {
+static const TParserStateActionItem actionTPS_InURLPathStart[] = {
 	{NULL, 0, A_NEXT, TPS_InURLPath, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InURLPath[] = {
+static const TParserStateActionItem actionTPS_InURLPath[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, URLPATH, NULL},
 	{p_iseqC, '"', A_BINGO, TPS_Base, URLPATH, NULL},
 	{p_iseqC, '\'', A_BINGO, TPS_Base, URLPATH, NULL},
@@ -1121,29 +1121,29 @@ static TParserStateActionItem actionTPS_InURLPath[] = {
 	{NULL, 0, A_BINGO, TPS_Base, URLPATH, NULL}
 };
 
-static TParserStateActionItem actionTPS_InFURL[] = {
+static const TParserStateActionItem actionTPS_InFURL[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isURLPath, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InProtocolFirst[] = {
+static const TParserStateActionItem actionTPS_InProtocolFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InProtocolSecond[] = {
+static const TParserStateActionItem actionTPS_InProtocolSecond[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InProtocolEnd[] = {
+static const TParserStateActionItem actionTPS_InProtocolEnd[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, PROTOCOL, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[] = {
+static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWord, 0, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
@@ -1152,7 +1152,7 @@ static TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenAsciiWord[] = {
+static const TParserStateActionItem actionTPS_InHyphenAsciiWord[] = {
 	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, ASCIIHWORD, SpecialHyphen},
 	{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWord, 0, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
@@ -1161,7 +1161,7 @@ static TParserStateActionItem actionTPS_InHyphenAsciiWord[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, ASCIIHWORD, SpecialHyphen}
 };
 
-static TParserStateActionItem actionTPS_InHyphenWordFirst[] = {
+static const TParserStateActionItem actionTPS_InHyphenWordFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
@@ -1169,7 +1169,7 @@ static TParserStateActionItem actionTPS_InHyphenWordFirst[] = {
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenWord[] = {
+static const TParserStateActionItem actionTPS_InHyphenWord[] = {
 	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HWORD, SpecialHyphen},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
@@ -1177,27 +1177,27 @@ static TParserStateActionItem actionTPS_InHyphenWord[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HWORD, SpecialHyphen}
 };
 
-static TParserStateActionItem actionTPS_InHyphenNumWordFirst[] = {
+static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenNumWord[] = {
+static const TParserStateActionItem actionTPS_InHyphenNumWord[] = {
 	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen},
 	{p_isalnum, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
 	{p_iseqC, '-', A_PUSH, TPS_InHyphenNumWordFirst, 0, NULL},
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen}
 };
 
-static TParserStateActionItem actionTPS_InHyphenValueFirst[] = {
+static const TParserStateActionItem actionTPS_InHyphenValueFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenValue[] = {
+static const TParserStateActionItem actionTPS_InHyphenValue[] = {
 	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen},
 	{p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL},
@@ -1206,7 +1206,7 @@ static TParserStateActionItem actionTPS_InHyphenValue[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen}
 };
 
-static TParserStateActionItem actionTPS_InHyphenValueExact[] = {
+static const TParserStateActionItem actionTPS_InHyphenValueExact[] = {
 	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen},
 	{p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL},
@@ -1214,7 +1214,7 @@ static TParserStateActionItem actionTPS_InHyphenValueExact[] = {
 	{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen}
 };
 
-static TParserStateActionItem actionTPS_InParseHyphen[] = {
+static const TParserStateActionItem actionTPS_InParseHyphen[] = {
 	{p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWordPart, 0, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
@@ -1223,20 +1223,20 @@ static TParserStateActionItem actionTPS_InParseHyphen[] = {
 	{NULL, 0, A_RERUN, TPS_Base, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
+static const TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isalnum, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenWordPart[] = {
+static const TParserStateActionItem actionTPS_InHyphenWordPart[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, PARTHWORD, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
 	{NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHWORD, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenAsciiWordPart[] = {
+static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, ASCIIPARTHWORD, NULL},
 	{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWordPart, 0, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
@@ -1244,13 +1244,13 @@ static TParserStateActionItem actionTPS_InHyphenAsciiWordPart[] = {
 	{NULL, 0, A_BINGO, TPS_InParseHyphen, ASCIIPARTHWORD, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenNumWordPart[] = {
+static const TParserStateActionItem actionTPS_InHyphenNumWordPart[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, NUMPARTHWORD, NULL},
 	{p_isalnum, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
 	{NULL, 0, A_BINGO, TPS_InParseHyphen, NUMPARTHWORD, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
+static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt, 0, NULL},
 	{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
@@ -1258,133 +1258,153 @@ static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
 	{NULL, 0, A_BINGO, TPS_InParseHyphen, UNSIGNEDINT, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHDecimalPartFirst[] = {
+static const TParserStateActionItem actionTPS_InHDecimalPartFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_CLEAR, TPS_InHDecimalPart, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHDecimalPart[] = {
+static const TParserStateActionItem actionTPS_InHDecimalPart[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHDecimalPart, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst, 0, NULL},
 	{NULL, 0, A_BINGO, TPS_InParseHyphen, DECIMAL, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHVersionPartFirst[] = {
+static const TParserStateActionItem actionTPS_InHVersionPartFirst[] = {
 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
 	{p_isdigit, 0, A_CLEAR, TPS_InHVersionPart, 0, NULL},
 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
 };
 
-static TParserStateActionItem actionTPS_InHVersionPart[] = {
+static const TParserStateActionItem actionTPS_InHVersionPart[] = {
 	{p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
 	{p_isdigit, 0, A_NEXT, TPS_InHVersionPart, 0, NULL},
 	{p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst, 0, NULL},
 	{NULL, 0, A_BINGO, TPS_InParseHyphen, VERSIONNUMBER, NULL}
 };
 
+
+/*
+ * main table of per-state parser actions
+ */
+typedef struct
+{
+	const TParserStateActionItem *action;	/* the actual state info */
+	TParserState state;			/* only for Assert crosscheck */
+#ifdef WPARSER_TRACE
+	const char *state_name;		/* only for debug printout */
+#endif
+} TParserStateAction;
+
+#ifdef WPARSER_TRACE
+#define TPARSERSTATEACTION(state) \
+	{ CppConcat(action,state), state, CppAsString(state) }
+#else
+#define TPARSERSTATEACTION(state) \
+	{ CppConcat(action,state), state }
+#endif
+
 /*
  * order must be the same as in typedef enum {} TParserState!!
  */
 
 static const TParserStateAction Actions[] = {
-	{TPS_Base, actionTPS_Base},
-	{TPS_InNumWord, actionTPS_InNumWord},
-	{TPS_InAsciiWord, actionTPS_InAsciiWord},
-	{TPS_InWord, actionTPS_InWord},
-	{TPS_InUnsignedInt, actionTPS_InUnsignedInt},
-	{TPS_InSignedIntFirst, actionTPS_InSignedIntFirst},
-	{TPS_InSignedInt, actionTPS_InSignedInt},
-	{TPS_InSpace, actionTPS_InSpace},
-	{TPS_InUDecimalFirst, actionTPS_InUDecimalFirst},
-	{TPS_InUDecimal, actionTPS_InUDecimal},
-	{TPS_InDecimalFirst, actionTPS_InDecimalFirst},
-	{TPS_InDecimal, actionTPS_InDecimal},
-	{TPS_InVerVersion, actionTPS_InVerVersion},
-	{TPS_InSVerVersion, actionTPS_InSVerVersion},
-	{TPS_InVersionFirst, actionTPS_InVersionFirst},
-	{TPS_InVersion, actionTPS_InVersion},
-	{TPS_InMantissaFirst, actionTPS_InMantissaFirst},
-	{TPS_InMantissaSign, actionTPS_InMantissaSign},
-	{TPS_InMantissa, actionTPS_InMantissa},
-	{TPS_InHTMLEntityFirst, actionTPS_InHTMLEntityFirst},
-	{TPS_InHTMLEntity, actionTPS_InHTMLEntity},
-	{TPS_InHTMLEntityNumFirst, actionTPS_InHTMLEntityNumFirst},
-	{TPS_InHTMLEntityNum, actionTPS_InHTMLEntityNum},
-	{TPS_InHTMLEntityEnd, actionTPS_InHTMLEntityEnd},
-	{TPS_InTagFirst, actionTPS_InTagFirst},
-	{TPS_InXMLBegin, actionTPS_InXMLBegin},
-	{TPS_InTagCloseFirst, actionTPS_InTagCloseFirst},
-	{TPS_InTagName, actionTPS_InTagName},
-	{TPS_InTagBeginEnd, actionTPS_InTagBeginEnd},
-	{TPS_InTag, actionTPS_InTag},
-	{TPS_InTagEscapeK, actionTPS_InTagEscapeK},
-	{TPS_InTagEscapeKK, actionTPS_InTagEscapeKK},
-	{TPS_InTagBackSleshed, actionTPS_InTagBackSleshed},
-	{TPS_InTagEnd, actionTPS_InTagEnd},
-	{TPS_InCommentFirst, actionTPS_InCommentFirst},
-	{TPS_InCommentLast, actionTPS_InCommentLast},
-	{TPS_InComment, actionTPS_InComment},
-	{TPS_InCloseCommentFirst, actionTPS_InCloseCommentFirst},
-	{TPS_InCloseCommentLast, actionTPS_InCloseCommentLast},
-	{TPS_InCommentEnd, actionTPS_InCommentEnd},
-	{TPS_InHostFirstDomain, actionTPS_InHostFirstDomain},
-	{TPS_InHostDomainSecond, actionTPS_InHostDomainSecond},
-	{TPS_InHostDomain, actionTPS_InHostDomain},
-	{TPS_InPortFirst, actionTPS_InPortFirst},
-	{TPS_InPort, actionTPS_InPort},
-	{TPS_InHostFirstAN, actionTPS_InHostFirstAN},
-	{TPS_InHost, actionTPS_InHost},
-	{TPS_InEmail, actionTPS_InEmail},
-	{TPS_InFileFirst, actionTPS_InFileFirst},
-	{TPS_InFileTwiddle, actionTPS_InFileTwiddle},
-	{TPS_InPathFirst, actionTPS_InPathFirst},
-	{TPS_InPathFirstFirst, actionTPS_InPathFirstFirst},
-	{TPS_InPathSecond, actionTPS_InPathSecond},
-	{TPS_InFile, actionTPS_InFile},
-	{TPS_InFileNext, actionTPS_InFileNext},
-	{TPS_InURLPathFirst, actionTPS_InURLPathFirst},
-	{TPS_InURLPathStart, actionTPS_InURLPathStart},
-	{TPS_InURLPath, actionTPS_InURLPath},
-	{TPS_InFURL, actionTPS_InFURL},
-	{TPS_InProtocolFirst, actionTPS_InProtocolFirst},
-	{TPS_InProtocolSecond, actionTPS_InProtocolSecond},
-	{TPS_InProtocolEnd, actionTPS_InProtocolEnd},
-	{TPS_InHyphenAsciiWordFirst, actionTPS_InHyphenAsciiWordFirst},
-	{TPS_InHyphenAsciiWord, actionTPS_InHyphenAsciiWord},
-	{TPS_InHyphenWordFirst, actionTPS_InHyphenWordFirst},
-	{TPS_InHyphenWord, actionTPS_InHyphenWord},
-	{TPS_InHyphenNumWordFirst, actionTPS_InHyphenNumWordFirst},
-	{TPS_InHyphenNumWord, actionTPS_InHyphenNumWord},
-	{TPS_InHyphenValueFirst, actionTPS_InHyphenValueFirst},
-	{TPS_InHyphenValue, actionTPS_InHyphenValue},
-	{TPS_InHyphenValueExact, actionTPS_InHyphenValueExact},
-	{TPS_InParseHyphen, actionTPS_InParseHyphen},
-	{TPS_InParseHyphenHyphen, actionTPS_InParseHyphenHyphen},
-	{TPS_InHyphenWordPart, actionTPS_InHyphenWordPart},
-	{TPS_InHyphenAsciiWordPart, actionTPS_InHyphenAsciiWordPart},
-	{TPS_InHyphenNumWordPart, actionTPS_InHyphenNumWordPart},
-	{TPS_InHyphenUnsignedInt, actionTPS_InHyphenUnsignedInt},
-	{TPS_InHDecimalPartFirst, actionTPS_InHDecimalPartFirst},
-	{TPS_InHDecimalPart, actionTPS_InHDecimalPart},
-	{TPS_InHVersionPartFirst, actionTPS_InHVersionPartFirst},
-	{TPS_InHVersionPart, actionTPS_InHVersionPart},
-	{TPS_Null, NULL}
+	TPARSERSTATEACTION(TPS_Base),
+	TPARSERSTATEACTION(TPS_InNumWord),
+	TPARSERSTATEACTION(TPS_InAsciiWord),
+	TPARSERSTATEACTION(TPS_InWord),
+	TPARSERSTATEACTION(TPS_InUnsignedInt),
+	TPARSERSTATEACTION(TPS_InSignedIntFirst),
+	TPARSERSTATEACTION(TPS_InSignedInt),
+	TPARSERSTATEACTION(TPS_InSpace),
+	TPARSERSTATEACTION(TPS_InUDecimalFirst),
+	TPARSERSTATEACTION(TPS_InUDecimal),
+	TPARSERSTATEACTION(TPS_InDecimalFirst),
+	TPARSERSTATEACTION(TPS_InDecimal),
+	TPARSERSTATEACTION(TPS_InVerVersion),
+	TPARSERSTATEACTION(TPS_InSVerVersion),
+	TPARSERSTATEACTION(TPS_InVersionFirst),
+	TPARSERSTATEACTION(TPS_InVersion),
+	TPARSERSTATEACTION(TPS_InMantissaFirst),
+	TPARSERSTATEACTION(TPS_InMantissaSign),
+	TPARSERSTATEACTION(TPS_InMantissa),
+	TPARSERSTATEACTION(TPS_InHTMLEntityFirst),
+	TPARSERSTATEACTION(TPS_InHTMLEntity),
+	TPARSERSTATEACTION(TPS_InHTMLEntityNumFirst),
+	TPARSERSTATEACTION(TPS_InHTMLEntityNum),
+	TPARSERSTATEACTION(TPS_InHTMLEntityEnd),
+	TPARSERSTATEACTION(TPS_InTagFirst),
+	TPARSERSTATEACTION(TPS_InXMLBegin),
+	TPARSERSTATEACTION(TPS_InTagCloseFirst),
+	TPARSERSTATEACTION(TPS_InTagName),
+	TPARSERSTATEACTION(TPS_InTagBeginEnd),
+	TPARSERSTATEACTION(TPS_InTag),
+	TPARSERSTATEACTION(TPS_InTagEscapeK),
+	TPARSERSTATEACTION(TPS_InTagEscapeKK),
+	TPARSERSTATEACTION(TPS_InTagBackSleshed),
+	TPARSERSTATEACTION(TPS_InTagEnd),
+	TPARSERSTATEACTION(TPS_InCommentFirst),
+	TPARSERSTATEACTION(TPS_InCommentLast),
+	TPARSERSTATEACTION(TPS_InComment),
+	TPARSERSTATEACTION(TPS_InCloseCommentFirst),
+	TPARSERSTATEACTION(TPS_InCloseCommentLast),
+	TPARSERSTATEACTION(TPS_InCommentEnd),
+	TPARSERSTATEACTION(TPS_InHostFirstDomain),
+	TPARSERSTATEACTION(TPS_InHostDomainSecond),
+	TPARSERSTATEACTION(TPS_InHostDomain),
+	TPARSERSTATEACTION(TPS_InPortFirst),
+	TPARSERSTATEACTION(TPS_InPort),
+	TPARSERSTATEACTION(TPS_InHostFirstAN),
+	TPARSERSTATEACTION(TPS_InHost),
+	TPARSERSTATEACTION(TPS_InEmail),
+	TPARSERSTATEACTION(TPS_InFileFirst),
+	TPARSERSTATEACTION(TPS_InFileTwiddle),
+	TPARSERSTATEACTION(TPS_InPathFirst),
+	TPARSERSTATEACTION(TPS_InPathFirstFirst),
+	TPARSERSTATEACTION(TPS_InPathSecond),
+	TPARSERSTATEACTION(TPS_InFile),
+	TPARSERSTATEACTION(TPS_InFileNext),
+	TPARSERSTATEACTION(TPS_InURLPathFirst),
+	TPARSERSTATEACTION(TPS_InURLPathStart),
+	TPARSERSTATEACTION(TPS_InURLPath),
+	TPARSERSTATEACTION(TPS_InFURL),
+	TPARSERSTATEACTION(TPS_InProtocolFirst),
+	TPARSERSTATEACTION(TPS_InProtocolSecond),
+	TPARSERSTATEACTION(TPS_InProtocolEnd),
+	TPARSERSTATEACTION(TPS_InHyphenAsciiWordFirst),
+	TPARSERSTATEACTION(TPS_InHyphenAsciiWord),
+	TPARSERSTATEACTION(TPS_InHyphenWordFirst),
+	TPARSERSTATEACTION(TPS_InHyphenWord),
+	TPARSERSTATEACTION(TPS_InHyphenNumWordFirst),
+	TPARSERSTATEACTION(TPS_InHyphenNumWord),
+	TPARSERSTATEACTION(TPS_InHyphenValueFirst),
+	TPARSERSTATEACTION(TPS_InHyphenValue),
+	TPARSERSTATEACTION(TPS_InHyphenValueExact),
+	TPARSERSTATEACTION(TPS_InParseHyphen),
+	TPARSERSTATEACTION(TPS_InParseHyphenHyphen),
+	TPARSERSTATEACTION(TPS_InHyphenWordPart),
+	TPARSERSTATEACTION(TPS_InHyphenAsciiWordPart),
+	TPARSERSTATEACTION(TPS_InHyphenNumWordPart),
+	TPARSERSTATEACTION(TPS_InHyphenUnsignedInt),
+	TPARSERSTATEACTION(TPS_InHDecimalPartFirst),
+	TPARSERSTATEACTION(TPS_InHDecimalPart),
+	TPARSERSTATEACTION(TPS_InHVersionPartFirst),
+	TPARSERSTATEACTION(TPS_InHVersionPart)
 };
 
 
 static bool
 TParserGet(TParser * prs)
 {
-	TParserStateActionItem *item = NULL;
+	const TParserStateActionItem *item = NULL;
 
 	Assert(prs->state);
 
 	if (prs->state->posbyte >= prs->lenstr)
 		return false;
 
-	prs->lexeme = prs->str + prs->state->posbyte;
+	prs->token = prs->str + prs->state->posbyte;
 	prs->state->pushedAtAction = NULL;
 
 	/* look at string */
@@ -1400,39 +1420,67 @@ TParserGet(TParser * prs)
 		Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
 		Assert(Actions[prs->state->state].state == prs->state->state);
 
-		item = Actions[prs->state->state].action;
-		Assert(item != NULL);
-
-		if (item < prs->state->pushedAtAction)
-			item = prs->state->pushedAtAction;
+		if (prs->state->pushedAtAction)
+		{
+			/* After a POP, pick up at the next test */
+			item = prs->state->pushedAtAction + 1;
+			prs->state->pushedAtAction = NULL;
+		}
+		else
+		{
+			item = Actions[prs->state->state].action;
+			Assert(item != NULL);
+		}
 
 		/* find action by character class */
 		while (item->isclass)
 		{
 			prs->c = item->c;
 			if (item->isclass(prs) != 0)
-			{
-				if (item > prs->state->pushedAtAction)	/* remember: after
-														 * pushing we were by
-														 * false way */
-					break;
-			}
+				break;
 			item++;
 		}
 
-		prs->state->pushedAtAction = NULL;
+#ifdef WPARSER_TRACE
+		{
+			TParserPosition *ptr;
+
+			fprintf(stderr, "state ");
+			/* indent according to stack depth */
+			for (ptr = prs->state->prev; ptr; ptr = ptr->prev)
+				fprintf(stderr, "  ");
+			fprintf(stderr, "%s ", Actions[prs->state->state].state_name);
+			if (prs->state->posbyte < prs->lenstr)
+				fprintf(stderr, "at %c", *(prs->str + prs->state->posbyte));
+			else
+				fprintf(stderr, "at EOF");
+			fprintf(stderr, " matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
+					(int) (item - Actions[prs->state->state].action),
+					(item->flags & A_BINGO) ? " BINGO" : "",
+					(item->flags & A_POP) ? " POP" : "",
+					(item->flags & A_PUSH) ? " PUSH" : "",
+					(item->flags & A_RERUN) ? " RERUN" : "",
+					(item->flags & A_CLEAR) ? " CLEAR" : "",
+					(item->flags & A_MERGE) ? " MERGE" : "",
+					(item->flags & A_CLRALL) ? " CLRALL" : "",
+					(item->tostate != TPS_Null) ? " tostate " : "",
+					(item->tostate != TPS_Null) ? Actions[item->tostate].state_name : "",
+					(item->type > 0) ? " type " : "",
+					tok_alias[item->type]);
+		}
+#endif
 
 		/* call special handler if exists */
 		if (item->special)
 			item->special(prs);
 
-		/* BINGO, lexeme is found */
+		/* BINGO, token is found */
 		if (item->flags & A_BINGO)
 		{
 			Assert(item->type > 0);
-			prs->lenbytelexeme = prs->state->lenbytelexeme;
-			prs->lencharlexeme = prs->state->lencharlexeme;
-			prs->state->lenbytelexeme = prs->state->lencharlexeme = 0;
+			prs->lenbytetoken = prs->state->lenbytetoken;
+			prs->lenchartoken = prs->state->lenchartoken;
+			prs->state->lenbytetoken = prs->state->lenchartoken = 0;
 			prs->type = item->type;
 		}
 
@@ -1480,8 +1528,8 @@ TParserGet(TParser * prs)
 			prs->state->posbyte = ptr->posbyte;
 			prs->state->poschar = ptr->poschar;
 			prs->state->charlen = ptr->charlen;
-			prs->state->lenbytelexeme = ptr->lenbytelexeme;
-			prs->state->lencharlexeme = ptr->lencharlexeme;
+			prs->state->lenbytetoken = ptr->lenbytetoken;
+			prs->state->lenchartoken = ptr->lenchartoken;
 			pfree(ptr);
 		}
 
@@ -1503,9 +1551,9 @@ TParserGet(TParser * prs)
 		if (prs->state->charlen)
 		{
 			prs->state->posbyte += prs->state->charlen;
-			prs->state->lenbytelexeme += prs->state->charlen;
+			prs->state->lenbytetoken += prs->state->charlen;
 			prs->state->poschar++;
-			prs->state->lencharlexeme++;
+			prs->state->lenchartoken++;
 		}
 	}
 
@@ -1546,8 +1594,8 @@ prsd_nexttoken(PG_FUNCTION_ARGS)
 	if (!TParserGet(p))
 		PG_RETURN_INT32(0);
 
-	*t = p->lexeme;
-	*tlen = p->lenbytelexeme;
+	*t = p->token;
+	*tlen = p->lenbytetoken;
 
 	PG_RETURN_INT32(p->type);
 }
-- 
GitLab