diff --git a/contrib/tsearch2/Makefile b/contrib/tsearch2/Makefile index 4901b611ee1e0648f914d8190213b5a533a89492..2ef904ddb4e01629b80c70b8f0a67caf7bce5dcd 100644 --- a/contrib/tsearch2/Makefile +++ b/contrib/tsearch2/Makefile @@ -1,4 +1,4 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.11 2005/11/08 17:08:46 teodor Exp $ +# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.12 2005/11/21 12:27:57 teodor Exp $ MODULE_big = tsearch2 OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \ @@ -6,7 +6,8 @@ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \ wparser.o wparser_def.o \ ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \ tsvector_op.o rank.o ts_stat.o \ - query_util.o query_support.o query_rewrite.o query_gist.o + query_util.o query_support.o query_rewrite.o query_gist.o \ + ts_locale.o SUBDIRS := snowball ispell wordparser SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o) diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out index 296c0ac676f874dd6e2ed3ed7996df6a8eb8ff0f..a98c2216a8da55d0074aa1c35e3a4190532fa376 100644 --- a/contrib/tsearch2/expected/tsearch2.out +++ b/contrib/tsearch2/expected/tsearch2.out @@ -13,12 +13,12 @@ psql:tsearch2.sql:342: NOTICE: argument type tsvector is only a shell psql:tsearch2.sql:396: NOTICE: type "tsquery" is not yet defined DETAIL: Creating a shell type definition. psql:tsearch2.sql:401: NOTICE: argument type tsquery is only a shell -psql:tsearch2.sql:544: NOTICE: type "gtsvector" is not yet defined +psql:tsearch2.sql:559: NOTICE: type "gtsvector" is not yet defined DETAIL: Creating a shell type definition. -psql:tsearch2.sql:549: NOTICE: argument type gtsvector is only a shell -psql:tsearch2.sql:998: NOTICE: type "gtsq" is not yet defined +psql:tsearch2.sql:564: NOTICE: argument type gtsvector is only a shell +psql:tsearch2.sql:1054: NOTICE: type "gtsq" is not yet defined DETAIL: Creating a shell type definition. -psql:tsearch2.sql:1003: NOTICE: argument type gtsq is only a shell +psql:tsearch2.sql:1059: NOTICE: argument type gtsq is only a shell --tsvector SELECT '1'::tsvector; tsvector @@ -653,7 +653,7 @@ select * from token_type('default'); 11 | lpart_hword | Latin part of hyphenated word 12 | blank | Space symbols 13 | tag | HTML Tag - 14 | http | HTTP head + 14 | protocol | Protocol head 15 | hword | Hyphenated word 16 | lhword | Latin hyphenated word 17 | nlhword | Non-latin hyphenated word @@ -672,14 +672,13 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc -------+-------------------------------------- 22 | 345 12 | - 4 | qwe@efd.r - 12 | - 12 | ' - 12 | + 1 | qwe + 12 | @ + 19 | efd.r + 12 | ' 14 | http:// 6 | www.com - 12 | / - 12 | + 12 | / 14 | http:// 5 | aew.werc.ewr/?ad=qwe&dw 6 | aew.werc.ewr @@ -700,10 +699,8 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc 6 | 4aew.werc.ewr 12 | 14 | http:// - 5 | 5aew.werc.ewr:8100/? - 6 | 5aew.werc.ewr - 18 | :8100/? - 12 | + 6 | 5aew.werc.ewr:8100 + 12 | /? 1 | ad 12 | = 1 | qwe @@ -711,12 +708,12 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc 1 | dw 12 | 5 | 6aew.werc.ewr:8100/?ad=qwe&dw - 6 | 6aew.werc.ewr - 18 | :8100/?ad=qwe&dw + 6 | 6aew.werc.ewr:8100 + 18 | /?ad=qwe&dw 12 | 5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 - 6 | 7aew.werc.ewr - 18 | :8100/?ad=qwe&dw=%20%32 + 6 | 7aew.werc.ewr:8100 + 18 | /?ad=qwe&dw=%20%32 12 | 7 | +4.0e-10 12 | @@ -747,11 +744,15 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc 1 | jf 12 | 1 | sdjk - 13 | <we hjwer <werrwe> + 12 | < + 1 | we 12 | - 3 | ewr1 - 12 | > + 1 | hjwer + 12 | + 13 | <werrwe> 12 | + 3 | ewr1 + 12 | > 3 | ewri2 12 | 13 | <a href="qwe<qwe>"> @@ -767,57 +768,53 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc 12 | 19 | /wqe-324/ewr 12 | - 6 | gist.h - 12 | - 6 | gist.h.c + 19 | gist.h 12 | - 6 | gist.c - 12 | . + 19 | gist.h.c 12 | + 19 | gist.c + 12 | . 1 | readline 12 | 20 | 4.2 12 | 20 | 4.2 - 12 | . - 12 | + 12 | . 20 | 4.2 - 12 | , - 12 | - 15 | readline-4 + 12 | , + 15 | readline-4.2 11 | readline 12 | - 20 | 4.2 12 | - 15 | readline-4 + 15 | readline-4.2 11 | readline 12 | - 20 | 4.2 - 12 | . - 12 | + 12 | . 22 | 234 12 | - 13 | <i <b> + 12 | < + 1 | i + 12 | + 13 | <b> 12 | 1 | wow 12 | - 12 | < - 12 | + 12 | < 1 | jqw 12 | - 12 | < - 12 | > - 12 | + 12 | <> 1 | qwerty -(138 rows) +(135 rows) SELECT to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> /usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 <i <b> wow < jqw <> qwerty'); - to_tsvector ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 'qwe@efd.r':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 'teodor@stack.net':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24 + to_tsvector +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 'ad':17 'dw':19 'jf':39 '234':63 '345':1 '4.2':54,55,56,59,62 '455':31 'jqw':66 'qwe':2,18,27,28,35 'wer':36 'wow':65 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':67 '234.435':30 'qwe-wer':34 'readlin':53,58,61 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 'readline-4.2':57,60 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23 (1 row) SELECT length(to_tsvector('default', '345 qw')); @@ -831,7 +828,7 @@ SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://ae <i <b> wow < jqw <> qwerty')); length -------- - 53 + 51 (1 row) select to_tsquery('default', 'qwe & sKies '); @@ -876,6 +873,36 @@ select to_tsquery('default', '(the|and&(i&1))&fghj'); '1' & 'fghj' (1 row) +select plainto_tsquery('default', 'the and z 1))& fghj'); + plainto_tsquery +-------------------- + 'z' & '1' & 'fghj' +(1 row) + +select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd'); + ?column? +----------------------- + 'foo' & 'bar' & 'asd' +(1 row) + +select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg'); + ?column? +------------------------------ + 'foo' & 'bar' | 'asd' & 'fg' +(1 row) + +select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg'); + ?column? +----------------------------------- + 'foo' & 'bar' | !( 'asd' & 'fg' ) +(1 row) + +select plainto_tsquery('default', 'foo bar') && 'asd | fg'; + ?column? +---------------------------------- + 'foo' & 'bar' & ( 'asd' | 'fg' ) +(1 row) + select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca'; ?column? ---------- diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c index e6f1ae3a8984fb36086edd3bb6da04fce160cfbe..e312cf6af7166f00ffde947583200552dd5cc35b 100644 --- a/contrib/tsearch2/query.c +++ b/contrib/tsearch2/query.c @@ -51,10 +51,20 @@ Datum to_tsquery_name(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(to_tsquery_current); Datum to_tsquery_current(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(plainto_tsquery); +Datum plainto_tsquery(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(plainto_tsquery_name); +Datum plainto_tsquery_name(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(plainto_tsquery_current); +Datum plainto_tsquery_current(PG_FUNCTION_ARGS); + /* parser's states */ #define WAITOPERAND 1 #define WAITOPERATOR 2 #define WAITFIRSTOPERAND 3 +#define WAITSINGLEOPERAND 4 /* * node of query tree, also used @@ -195,6 +205,14 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 else if (*(state->buf) != ' ') return ERR; break; + case WAITSINGLEOPERAND: + if ( *(state->buf) == '\0' ) + return END; + *strval = state->buf; + *lenval = strlen( state->buf ); + state->buf += strlen( state->buf ); + state->count++; + return VAL; default: return ERR; break; @@ -582,7 +600,7 @@ findoprnd(ITEM * ptr, int4 *pos) * input */ static QUERYTYPE * - queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id) +queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id, bool isplain) { QPRS_STATE state; int4 i; @@ -599,7 +617,7 @@ static QUERYTYPE * /* init state */ state.buf = buf; - state.state = WAITFIRSTOPERAND; + state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND; state.count = 0; state.num = 0; state.str = NULL; @@ -679,7 +697,7 @@ Datum tsquery_in(PG_FUNCTION_ARGS) { SET_FUNCOID(); - PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0)); + PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0, false)); } /* @@ -910,7 +928,7 @@ to_tsquery(PG_FUNCTION_ARGS) str = text2char(in); PG_FREE_IF_COPY(in, 1); - query = queryin(str, pushval_morph, PG_GETARG_INT32(0)); + query = queryin(str, pushval_morph, PG_GETARG_INT32(0),false); if ( query->size == 0 ) PG_RETURN_POINTER(query); @@ -950,3 +968,59 @@ to_tsquery_current(PG_FUNCTION_ARGS) Int32GetDatum(get_currcfg()), PG_GETARG_DATUM(0))); } + +Datum +plainto_tsquery(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(1); + char *str; + QUERYTYPE *query; + ITEM *res; + int4 len; + + SET_FUNCOID(); + + str = text2char(in); + PG_FREE_IF_COPY(in, 1); + + query = queryin(str, pushval_morph, PG_GETARG_INT32(0), true); + + if ( query->size == 0 ) + PG_RETURN_POINTER(query); + + res = clean_fakeval_v2(GETQUERY(query), &len); + if (!res) + { + query->len = HDRSIZEQT; + query->size = 0; + PG_RETURN_POINTER(query); + } + memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM)); + pfree(res); + PG_RETURN_POINTER(query); +} + +Datum +plainto_tsquery_name(PG_FUNCTION_ARGS) +{ + text *name = PG_GETARG_TEXT_P(0); + Datum res; + + SET_FUNCOID(); + res = DirectFunctionCall2(plainto_tsquery, + Int32GetDatum(name2id_cfg(name)), + PG_GETARG_DATUM(1)); + + PG_FREE_IF_COPY(name, 0); + PG_RETURN_DATUM(res); +} + +Datum +plainto_tsquery_current(PG_FUNCTION_ARGS) +{ + SET_FUNCOID(); + PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery, + Int32GetDatum(get_currcfg()), + PG_GETARG_DATUM(0))); +} + diff --git a/contrib/tsearch2/query_support.c b/contrib/tsearch2/query_support.c index c973def7d4df67472e42f3a1b3db395edd473707..edc2d48fcfbe7f31c54d1b660238bcb1fef29a04 100644 --- a/contrib/tsearch2/query_support.c +++ b/contrib/tsearch2/query_support.c @@ -14,6 +14,117 @@ tsquery_numnode(PG_FUNCTION_ARGS) { PG_RETURN_INT32(nnode); } +static QTNode* +join_tsqueries(QUERYTYPE *a, QUERYTYPE *b) { + QTNode *res=(QTNode*)palloc0( sizeof(QTNode) ); + + res->flags |= QTN_NEEDFREE; + + res->valnode = (ITEM*)palloc0( sizeof(ITEM) ); + res->valnode->type = OPR; + + res->child = (QTNode**)palloc0( sizeof(QTNode*)*2 ); + res->child[0] = QT2QTN( GETQUERY(b), GETOPERAND(b) ); + res->child[1] = QT2QTN( GETQUERY(a), GETOPERAND(a) ); + res->nchild = 2; + + return res; +} + +PG_FUNCTION_INFO_V1(tsquery_and); +Datum tsquery_and(PG_FUNCTION_ARGS); + +Datum +tsquery_and(PG_FUNCTION_ARGS) { + QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); + QTNode *res; + QUERYTYPE *query; + + if ( a->size == 0 ) { + PG_FREE_IF_COPY(a,1); + PG_RETURN_POINTER(b); + } else if ( b->size == 0 ) { + PG_FREE_IF_COPY(b,1); + PG_RETURN_POINTER(a); + } + + res = join_tsqueries(a, b); + + res->valnode->val = '&'; + + query = QTN2QT( res, PlainMemory ); + + QTNFree(res); + PG_FREE_IF_COPY(a,0); + PG_FREE_IF_COPY(b,1); + + PG_RETURN_POINTER(query); +} + +PG_FUNCTION_INFO_V1(tsquery_or); +Datum tsquery_or(PG_FUNCTION_ARGS); + +Datum +tsquery_or(PG_FUNCTION_ARGS) { + QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); + QTNode *res; + QUERYTYPE *query; + + if ( a->size == 0 ) { + PG_FREE_IF_COPY(a,1); + PG_RETURN_POINTER(b); + } else if ( b->size == 0 ) { + PG_FREE_IF_COPY(b,1); + PG_RETURN_POINTER(a); + } + + res = join_tsqueries(a, b); + + res->valnode->val = '|'; + + query = QTN2QT( res, PlainMemory ); + + QTNFree(res); + PG_FREE_IF_COPY(a,0); + PG_FREE_IF_COPY(b,1); + + PG_RETURN_POINTER(query); +} + +PG_FUNCTION_INFO_V1(tsquery_not); +Datum tsquery_not(PG_FUNCTION_ARGS); + +Datum +tsquery_not(PG_FUNCTION_ARGS) { + QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + QTNode *res; + QUERYTYPE *query; + + if ( a->size == 0 ) + PG_RETURN_POINTER(a); + + res=(QTNode*)palloc0( sizeof(QTNode) ); + + res->flags |= QTN_NEEDFREE; + + res->valnode = (ITEM*)palloc0( sizeof(ITEM) ); + res->valnode->type = OPR; + res->valnode->val = '!'; + + res->child = (QTNode**)palloc0( sizeof(QTNode*) ); + res->child[0] = QT2QTN( GETQUERY(a), GETOPERAND(a) ); + res->nchild = 1; + + query = QTN2QT( res, PlainMemory ); + + QTNFree(res); + PG_FREE_IF_COPY(a,0); + + PG_RETURN_POINTER(query); +} + static int CompareTSQ( QUERYTYPE *a, QUERYTYPE *b ) { if ( a->size != b->size ) { diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql index 0923ce7a19755022e181fcaea6d567152dfa3c0a..bd0baa3b41d4d88603a28194634119f75fab56ce 100644 --- a/contrib/tsearch2/sql/tsearch2.sql +++ b/contrib/tsearch2/sql/tsearch2.sql @@ -173,6 +173,13 @@ select to_tsquery('default', 'asd&(and|fghj)'); select to_tsquery('default', '(asd&and)|fghj'); select to_tsquery('default', '(asd&!and)|fghj'); select to_tsquery('default', '(the|and&(i&1))&fghj'); + +select plainto_tsquery('default', 'the and z 1))& fghj'); +select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd'); +select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg'); +select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg'); +select plainto_tsquery('default', 'foo bar') && 'asd | fg'; + select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca'; select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B'; select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A'; diff --git a/contrib/tsearch2/ts_locale.c b/contrib/tsearch2/ts_locale.c new file mode 100644 index 0000000000000000000000000000000000000000..b84681f1b072fbdd6bbb22e4261252b14d53f398 --- /dev/null +++ b/contrib/tsearch2/ts_locale.c @@ -0,0 +1,61 @@ +#include "ts_locale.h" + +#include "utils/builtins.h" +#include "utils/pg_locale.h" +#include "mb/pg_wchar.h" + + +#if defined(TS_USE_WIDE) && defined(WIN32) + +size_t +wchar2char( const char *to, const wchar_t *from, size_t len ) { + if (GetDatabaseEncoding() == PG_UTF8) { + int r; + + if (len==0) + return 0; + + r = WideCharToMultiByte(CP_UTF8, 0, from, len, to, nbytes, + NULL, NULL); + + + if ( r==0 ) + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("UTF-16 to UTF-8 translation failed: %lu", + GetLastError()))); + + return r; + } + + return wcstombs(to, from, len); +} + +size_t +char2wchar( const wchar_t *to, const char *from, size_t len ) { + if (GetDatabaseEncoding() == PG_UTF8) { + int r; + + if (len==0) + return 0; + + r = MultiByteToWideChar(CP_UTF8, 0, from, len, + to, len); + + if (!r) { + pg_verifymbstr(from, len, false); + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte character for locale"), + errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); + } + + Assert(r <= nbytes); + + return r; + } + + return mbstowcs(to, from, len); +} + +#endif diff --git a/contrib/tsearch2/ts_locale.h b/contrib/tsearch2/ts_locale.h new file mode 100644 index 0000000000000000000000000000000000000000..a7ce6f1bbc5393db4f675d071c428d220ee52f2c --- /dev/null +++ b/contrib/tsearch2/ts_locale.h @@ -0,0 +1,38 @@ +#ifndef __TSLOCALE_H__ +#define __TSLOCALE_H__ + +#include "postgres.h" + +#include <ctype.h> +#include <limits.h> + +/* + * towlower() and friends should be in <wctype.h>, but some pre-C99 systems + * declare them in <wchar.h>. + */ +#ifdef HAVE_WCHAR_H +#include <wchar.h> +#endif +#ifdef HAVE_WCTYPE_H +#include <wctype.h> +#endif + +#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) +#define TS_USE_WIDE + +#ifdef WIN32 + +size_t wchar2char( const char *to, const wchar_t *from, size_t len ); +size_t char2wchar( const wchar_t *to, const char *from, size_t len ); + +#else /* WIN32 */ + +/* correct mbstowcs */ +#define char2wchar mbstowcs +#define wchar2char wcstombs + +#endif /* WIN32 */ + +#endif /* defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) */ + +#endif /* __TSLOCALE_H__ */ diff --git a/contrib/tsearch2/tsearch.sql.in b/contrib/tsearch2/tsearch.sql.in index 9bdf641e121d239c00ec6cd5eecd6faef3895436..4fdf974d0d1defe50f07fa97d905dfb3a558e119 100644 --- a/contrib/tsearch2/tsearch.sql.in +++ b/contrib/tsearch2/tsearch.sql.in @@ -427,6 +427,21 @@ RETURNS tsquery AS 'MODULE_PATHNAME','to_tsquery_current' LANGUAGE 'c' with (isstrict,iscachable); +CREATE FUNCTION plainto_tsquery(oid, text) +RETURNS tsquery +AS 'MODULE_PATHNAME' +LANGUAGE 'c' with (isstrict,iscachable); + +CREATE FUNCTION plainto_tsquery(text, text) +RETURNS tsquery +AS 'MODULE_PATHNAME','plainto_tsquery_name' +LANGUAGE 'c' with (isstrict,iscachable); + +CREATE FUNCTION plainto_tsquery(text) +RETURNS tsquery +AS 'MODULE_PATHNAME','plainto_tsquery_current' +LANGUAGE 'c' with (isstrict,iscachable); + --operations CREATE FUNCTION exectsq(tsvector, tsquery) RETURNS bool @@ -929,6 +944,47 @@ CREATE OR REPLACE FUNCTION numnode(tsquery) language 'C' with (isstrict,iscachable); +CREATE OR REPLACE FUNCTION tsquery_and(tsquery,tsquery) + returns tsquery + as 'MODULE_PATHNAME', 'tsquery_and' + language 'C' + with (isstrict,iscachable); + +CREATE OPERATOR && ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsquery_and, + COMMUTATOR = '&&', + RESTRICT = contsel, + JOIN = contjoinsel +); + +CREATE OR REPLACE FUNCTION tsquery_or(tsquery,tsquery) + returns tsquery + as 'MODULE_PATHNAME', 'tsquery_or' + language 'C' + with (isstrict,iscachable); + +CREATE OPERATOR || ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsquery_or, + COMMUTATOR = '||', + RESTRICT = contsel, + JOIN = contjoinsel +); + +CREATE OR REPLACE FUNCTION tsquery_not(tsquery) + returns tsquery + as 'MODULE_PATHNAME', 'tsquery_not' + language 'C' + with (isstrict,iscachable); + +CREATE OPERATOR !! ( + RIGHTARG = tsquery, + PROCEDURE = tsquery_not +); + --------------rewrite subsystem CREATE OR REPLACE FUNCTION rewrite(tsquery, text) diff --git a/contrib/tsearch2/wordparser/Makefile b/contrib/tsearch2/wordparser/Makefile index 0070970e2165e054eea0d1afe88c528c02ee2e6d..c4eceba60bb22b2e515908786b8b2c43a7608fe3 100644 --- a/contrib/tsearch2/wordparser/Makefile +++ b/contrib/tsearch2/wordparser/Makefile @@ -1,8 +1,8 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/wordparser/Makefile,v 1.8 2005/10/18 01:30:49 tgl Exp $ +# $PostgreSQL: pgsql/contrib/tsearch2/wordparser/Makefile,v 1.9 2005/11/21 12:27:57 teodor Exp $ SUBOBJS = parser.o deflex.o -EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) parser.c +EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) PG_CPPFLAGS = -I$(srcdir)/.. @@ -20,13 +20,6 @@ override CFLAGS += $(CFLAGS_SL) all: SUBSYS.o -parser.c: parser.l -ifdef FLEX - $(FLEX) $(FLEXFLAGS) -8 -Ptsearch2_yy -o'$@' $< -else - @$(missing) flex $< $@ -endif - SUBSYS.o: $(SUBOBJS) $(LD) $(LDREL) $(LDOUT) $@ $^ diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c index bbf3271b666f4682143cc69f200e8759b084f15a..8f93d277a1e21a0942a2068d8ffaeff3362f4c85 100644 --- a/contrib/tsearch2/wordparser/deflex.c +++ b/contrib/tsearch2/wordparser/deflex.c @@ -15,7 +15,7 @@ const char *lex_descr[] = { "Latin part of hyphenated word", "Space symbols", "HTML Tag", - "HTTP head", + "Protocol head", "Hyphenated word", "Latin hyphenated word", "Non-latin hyphenated word", @@ -42,7 +42,7 @@ const char *tok_alias[] = { "lpart_hword", "blank", "tag", - "http", + "protocol", "hword", "lhword", "nlhword", diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h index 651d1f9e77301352fac86b64faa7c2eff87c9141..893f8430515ea4990f41e466e3c91cfea55e6a88 100644 --- a/contrib/tsearch2/wordparser/deflex.h +++ b/contrib/tsearch2/wordparser/deflex.h @@ -17,7 +17,7 @@ #define LATPARTHYPHENWORD 11 #define SPACE 12 #define TAG 13 -#define HTTP 14 +#define PROTOCOL 14 #define HYPHENWORD 15 #define LATHYPHENWORD 16 #define CYRHYPHENWORD 17 diff --git a/contrib/tsearch2/wordparser/parser.c b/contrib/tsearch2/wordparser/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..e414a865ffdbc43882c160663bf603ba87fd0b0d --- /dev/null +++ b/contrib/tsearch2/wordparser/parser.c @@ -0,0 +1,1028 @@ +#include "postgres.h" + +#include "utils/builtins.h" +#include "utils/pg_locale.h" +#include "mb/pg_wchar.h" + +#include "deflex.h" +#include "parser.h" +#include "ts_locale.h" + + +static TParserPosition* +newTParserPosition(TParserPosition *prev) { + TParserPosition *res = (TParserPosition*)palloc(sizeof(TParserPosition)); + + if ( prev ) + memcpy(res, prev, sizeof(TParserPosition)); + else + memset(res, 0, sizeof(TParserPosition)); + + res->prev = prev; + + res->pushedAtAction = NULL; + + return res; +} + +TParser* +TParserInit( char *str, int len ) { + TParser *prs = (TParser*)palloc0( sizeof(TParser) ); + + prs->charmaxlen = pg_database_encoding_max_length(); + prs->str = str; + prs->lenstr = len; + +#ifdef TS_USE_WIDE + /* + * Use wide char code only when max encoding length > 1 and ctype != C. + * Some operating systems fail with multi-byte encodings and a C locale. + * Also, for a C locale there is no need to process as multibyte. + * From backend/utils/adt/oracle_compat.c Teodor + */ + + if ( prs->charmaxlen > 1 && !lc_ctype_is_c() ) { + prs->usewide=true; + prs->wstr = (wchar_t*)palloc( sizeof(wchar_t) * prs->lenstr ); + prs->lenwstr = char2wchar( prs->wstr, prs->str, prs->lenstr ); + } else +#endif + prs->usewide=false; + + prs->state = newTParserPosition(NULL); + prs->state->state = TPS_Base; + + return prs; +} + +void +TParserClose( TParser* prs ) { + while( prs->state ) { + TParserPosition *ptr = prs->state->prev; + pfree( prs->state ); + prs->state = ptr; + } + + if ( prs->wstr ) + pfree( prs->wstr ); + pfree( prs ); +} + +/* + * defining support function, equvalent is* macroses, but + * working with any possible encodings and locales + */ + +#ifdef TS_USE_WIDE + +#define p_iswhat(type) \ +static int \ +p_is##type(TParser *prs) { \ + Assert( prs->state ); \ + return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \ + is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \ +} \ + \ +static int \ +p_isnot##type(TParser *prs) { \ + return !p_is##type(prs); \ +} + + + +/* p_iseq should be used only for ascii symbols */ + +static int +p_iseq(TParser *prs, char c) { + Assert( prs->state ); + return ( ( prs->state->charlen==1 && *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0; +} + +#else /* TS_USE_WIDE */ + +#define p_iswhat(type) \ +static int \ +p_is##type(TParser *prs) { \ + Assert( prs->state ); \ + return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \ +} \ + \ +static int \ +p_isnot##type(TParser *prs) { \ + return !p_is##type(prs); \ +} + + +static int +p_iseq(TParser *prs, char c) { + Assert( prs->state ); + return ( *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0; +} + +#endif /* TS_USE_WIDE */ + +p_iswhat(alnum) +p_iswhat(alpha) +p_iswhat(digit) +p_iswhat(lower) +p_iswhat(print) +p_iswhat(punct) +p_iswhat(space) +p_iswhat(upper) +p_iswhat(xdigit) + +static int +p_isEOF(TParser *prs) { + Assert( prs->state ); + return (prs->state->posbyte == prs->lenstr || prs->state->charlen==0) ? 1 : 0; +} + +static int +p_iseqC(TParser *prs) { + return p_iseq(prs, prs->c); +} + +static int +p_isneC(TParser *prs) { + return !p_iseq(prs, prs->c); +} + +static int +p_isascii(TParser *prs) { + return ( prs->state->charlen==1 && isascii( (unsigned char) *( prs->str + prs->state->posbyte ) ) ) ? 1 : 0; +} + +static int +p_islatin(TParser *prs) { + return ( p_isalpha(prs) && p_isascii(prs) ) ? 1 : 0; +} + +static int +p_isnonlatin(TParser *prs) { + return ( p_isalpha(prs) && !p_isascii(prs) ) ? 1 : 0; +} + +void _make_compiler_happy(void); +void +_make_compiler_happy(void) { + p_isalnum(NULL); p_isnotalnum(NULL); + p_isalpha(NULL); p_isnotalpha(NULL); + p_isdigit(NULL); p_isnotdigit(NULL); + p_islower(NULL); p_isnotlower(NULL); + p_isprint(NULL); p_isnotprint(NULL); + p_ispunct(NULL); p_isnotpunct(NULL); + p_isspace(NULL); p_isnotspace(NULL); + p_isupper(NULL); p_isnotupper(NULL); + p_isxdigit(NULL); p_isnotxdigit(NULL); + p_isEOF(NULL); + p_iseqC(NULL); p_isneC(NULL); +} + + +static void +SpecialTags(TParser *prs) { + switch( prs->state->lencharlexeme ) { + case 8: /* </script */ + if ( pg_strncasecmp( prs->lexeme, "</script", 8 ) == 0 ) + prs->ignore = false; + break; + case 7: /* <script || </style */ + if ( pg_strncasecmp( prs->lexeme, "</style", 7 ) == 0 ) + prs->ignore = false; + else if ( pg_strncasecmp( prs->lexeme, "<script", 7 ) == 0 ) + prs->ignore = true; + break; + case 6: /* <style */ + if ( pg_strncasecmp( prs->lexeme, "<style", 6 ) == 0 ) + prs->ignore = true; + break; + default: break; + } +} + +static void +SpecialFURL(TParser *prs) { + prs->wanthost = true; + prs->state->posbyte -= prs->state->lenbytelexeme; + prs->state->poschar -= prs->state->lencharlexeme; +} + +static void +SpecialHyphen(TParser *prs) { + prs->state->posbyte -= prs->state->lenbytelexeme; + prs->state->poschar -= prs->state->lencharlexeme; +} + +static int +p_isstophost(TParser *prs) { + if ( prs->wanthost ) { + prs->wanthost = false; + return 1; + } + return 0; +} + +static int +p_isignore(TParser *prs) { + return (prs->ignore) ? 1 : 0; +} + +static int +p_ishost(TParser *prs) { + TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte ); + int res = 0; + + if ( TParserGet(tmpprs) && tmpprs->type == HOST ) { + prs->state->posbyte += tmpprs->lenbytelexeme; + prs->state->poschar += tmpprs->lencharlexeme; + prs->state->lenbytelexeme += tmpprs->lenbytelexeme; + prs->state->lencharlexeme += tmpprs->lencharlexeme; + prs->state->charlen = tmpprs->state->charlen; + res = 1; + } + TParserClose(tmpprs); + + return res; +} + +static int +p_isURI(TParser *prs) { + TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte ); + int res = 0; + + tmpprs->state = newTParserPosition( tmpprs->state ); + tmpprs->state->state = TPS_InFileFirst; + + if ( TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH) ) { + prs->state->posbyte += tmpprs->lenbytelexeme; + prs->state->poschar += tmpprs->lencharlexeme; + prs->state->lenbytelexeme += tmpprs->lenbytelexeme; + prs->state->lencharlexeme += tmpprs->lencharlexeme; + prs->state->charlen = tmpprs->state->charlen; + res = 1; + } + TParserClose(tmpprs); + + return res; +} + +/* + * Table of state/action of parser + */ + +#define A_NEXT 0x0000 +#define A_BINGO 0x0001 +#define A_POP 0x0002 +#define A_PUSH 0x0004 +#define A_RERUN 0x0008 +#define A_CLEAR 0x0010 +#define A_MERGE 0x0020 +#define A_CLRALL 0x0040 + +static TParserStateActionItem actionTPS_Base[] = { + {p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL}, + {p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InCyrWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, + {p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, + {p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InSpace, 0, NULL} +}; + + +static TParserStateActionItem actionTPS_InUWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UWORD, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, UWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InLatWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, LATWORD, NULL}, + {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, LATWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InCyrWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, CYRWORD, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, CYRWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InUnsignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_islatin, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InSignedIntFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT|A_CLEAR, TPS_InSignedInt, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InSignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InSpace[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL}, + {p_isignore, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL}, + {p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SPACE, NULL} +}; + +static TParserStateActionItem actionTPS_InUDecimalFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InUDecimal[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InDecimalFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InDecimal[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InVersionFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InVersion[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissaFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL}, + {p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissaSign[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissa[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst,0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntity[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, + {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityNum[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, + {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityEnd[] = { + {NULL, 0, A_BINGO|A_CLEAR,TPS_Base, HTMLENTITY, NULL} +}; + +static TParserStateActionItem actionTPS_InTagFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL}, + {p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL}, + {p_islatin, 0, A_PUSH, TPS_InTag, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagCloseFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTag[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags}, + {p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL}, + {p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL}, + {p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEscapeK[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL}, + {p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEscapeKK[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL}, + {p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagBackSleshed[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {NULL, 0, A_MERGE, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEnd[] = { + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentLast[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InComment[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst,0, NULL}, + {NULL, 0, A_NEXT, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCloseCommentFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InComment, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCloseCommentLast[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InComment, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentEnd[] = { + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL} +}; + +static TParserStateActionItem actionTPS_InHostFirstDomen[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomenSecond, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + //{p_iseqC, '-', A_POP, TPS_InHostFirstAN, 0, NULL}, + //{p_iseqC, '.', A_POP, TPS_InHostFirstDomen, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHostDomenSecond[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHostDomen[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_isdigit, 0, A_POP, TPS_Null, 0, NULL}, + {p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL} +}; + +static TParserStateActionItem actionTPS_InPortFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InPort[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, + {p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL} +}; + +static TParserStateActionItem actionTPS_InHostFirstAN[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHost[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InEmail[] = { + {p_ishost, 0, A_BINGO|A_CLRALL, TPS_Base, EMAIL, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InFileFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '.', A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InFile[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL}, + {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL}, + {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL} +}; + +static TParserStateActionItem actionTPS_InFileNext[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InURIFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '"', A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\'', A_POP, TPS_Null, 0, NULL}, + {p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL}, +}; + +static TParserStateActionItem actionTPS_InURIStart[] = { + {NULL, 0, A_NEXT, TPS_InURI, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InURI[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL}, + {p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL}, + {p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL}, + {p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, URI, NULL} +}; + +static TParserStateActionItem actionTPS_InFURL[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isURI, 0, A_BINGO|A_CLRALL,TPS_Base, FURL, SpecialFURL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolSecond[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolEnd[] = { + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, PROTOCOL, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWord[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWord[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenUWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUWord[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenValueFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenValue[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenValueExact[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InParseHyphen[] = { + {p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen,0, NULL}, + {NULL, 0, A_RERUN, TPS_Base, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isalnum, 0, A_BINGO|A_CLEAR,TPS_InParseHyphen, SPACE, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, CYRPARTHYPHENWORD,NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, CYRPARTHYPHENWORD,NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, LATPARTHYPHENWORD,NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, LATPARTHYPHENWORD,NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, PARTHYPHENWORD, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHYPHENWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHDecimalPartFirst,0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, UNSIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InHDecimalPartFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InHDecimalPart, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHDecimalPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHDecimalPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InHVersionPartFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InHVersionPart, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHVersionPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHVersionPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, VERSIONNUMBER, NULL} +}; + +/* + * order should be the same as in typedef enum {} TParserState!! + */ + +static const TParserStateAction Actions[] = { + { TPS_Base, actionTPS_Base }, + { TPS_InUWord, actionTPS_InUWord }, + { TPS_InLatWord, actionTPS_InLatWord }, + { TPS_InCyrWord, actionTPS_InCyrWord }, + { TPS_InUnsignedInt, actionTPS_InUnsignedInt }, + { TPS_InSignedIntFirst, actionTPS_InSignedIntFirst }, + { TPS_InSignedInt, actionTPS_InSignedInt }, + { TPS_InSpace, actionTPS_InSpace }, + { TPS_InUDecimalFirst, actionTPS_InUDecimalFirst }, + { TPS_InUDecimal, actionTPS_InUDecimal }, + { TPS_InDecimalFirst, actionTPS_InDecimalFirst }, + { TPS_InDecimal, actionTPS_InDecimal }, + { TPS_InVersionFirst, actionTPS_InVersionFirst }, + { TPS_InVersion, actionTPS_InVersion }, + { TPS_InMantissaFirst, actionTPS_InMantissaFirst }, + { TPS_InMantissaSign, actionTPS_InMantissaSign }, + { TPS_InMantissa, actionTPS_InMantissa }, + { TPS_InHTMLEntityFirst, actionTPS_InHTMLEntityFirst }, + { TPS_InHTMLEntity, actionTPS_InHTMLEntity }, + { TPS_InHTMLEntityNumFirst, actionTPS_InHTMLEntityNumFirst }, + { TPS_InHTMLEntityNum, actionTPS_InHTMLEntityNum }, + { TPS_InHTMLEntityEnd, actionTPS_InHTMLEntityEnd }, + { TPS_InTagFirst, actionTPS_InTagFirst }, + { TPS_InTagCloseFirst, actionTPS_InTagCloseFirst }, + { TPS_InTag, actionTPS_InTag }, + { TPS_InTagEscapeK, actionTPS_InTagEscapeK }, + { TPS_InTagEscapeKK, actionTPS_InTagEscapeKK }, + { TPS_InTagBackSleshed, actionTPS_InTagBackSleshed }, + { TPS_InTagEnd, actionTPS_InTagEnd }, + { TPS_InCommentFirst, actionTPS_InCommentFirst }, + { TPS_InCommentLast, actionTPS_InCommentLast }, + { TPS_InComment, actionTPS_InComment }, + { TPS_InCloseCommentFirst, actionTPS_InCloseCommentFirst }, + { TPS_InCloseCommentLast, actionTPS_InCloseCommentLast }, + { TPS_InCommentEnd, actionTPS_InCommentEnd }, + { TPS_InHostFirstDomen, actionTPS_InHostFirstDomen }, + { TPS_InHostDomenSecond, actionTPS_InHostDomenSecond }, + { TPS_InHostDomen, actionTPS_InHostDomen }, + { TPS_InPortFirst, actionTPS_InPortFirst }, + { TPS_InPort, actionTPS_InPort }, + { TPS_InHostFirstAN, actionTPS_InHostFirstAN }, + { TPS_InHost, actionTPS_InHost }, + { TPS_InEmail, actionTPS_InEmail }, + { TPS_InFileFirst, actionTPS_InFileFirst }, + { TPS_InFile, actionTPS_InFile }, + { TPS_InFileNext, actionTPS_InFileNext }, + { TPS_InURIFirst, actionTPS_InURIFirst }, + { TPS_InURIStart, actionTPS_InURIStart }, + { TPS_InURI, actionTPS_InURI }, + { TPS_InFURL, actionTPS_InFURL }, + { TPS_InProtocolFirst, actionTPS_InProtocolFirst }, + { TPS_InProtocolSecond, actionTPS_InProtocolSecond }, + { TPS_InProtocolEnd, actionTPS_InProtocolEnd }, + { TPS_InHyphenLatWordFirst, actionTPS_InHyphenLatWordFirst }, + { TPS_InHyphenLatWord, actionTPS_InHyphenLatWord }, + { TPS_InHyphenCyrWordFirst, actionTPS_InHyphenCyrWordFirst }, + { TPS_InHyphenCyrWord, actionTPS_InHyphenCyrWord }, + { TPS_InHyphenUWordFirst, actionTPS_InHyphenUWordFirst }, + { TPS_InHyphenUWord, actionTPS_InHyphenUWord }, + { TPS_InHyphenValueFirst, actionTPS_InHyphenValueFirst }, + { TPS_InHyphenValue, actionTPS_InHyphenValue }, + { TPS_InHyphenValueExact, actionTPS_InHyphenValueExact }, + { TPS_InParseHyphen, actionTPS_InParseHyphen }, + { TPS_InParseHyphenHyphen, actionTPS_InParseHyphenHyphen }, + { TPS_InHyphenCyrWordPart, actionTPS_InHyphenCyrWordPart }, + { TPS_InHyphenLatWordPart, actionTPS_InHyphenLatWordPart }, + { TPS_InHyphenUWordPart, actionTPS_InHyphenUWordPart }, + { TPS_InHyphenUnsignedInt, actionTPS_InHyphenUnsignedInt }, + { TPS_InHDecimalPartFirst, actionTPS_InHDecimalPartFirst }, + { TPS_InHDecimalPart, actionTPS_InHDecimalPart }, + { TPS_InHVersionPartFirst, actionTPS_InHVersionPartFirst }, + { TPS_InHVersionPart, actionTPS_InHVersionPart }, + { TPS_Null, NULL } +}; + + +bool +TParserGet( TParser *prs ) { + TParserStateActionItem *item=NULL; + + if ( prs->state->posbyte >= prs->lenstr ) + return false; + + Assert( prs->state ); + prs->lexeme = prs->str + prs->state->posbyte; + prs->state->pushedAtAction = NULL; + + /* look at string */ + while (prs->state->posbyte <= prs->lenstr) { + if ( prs->state->posbyte == prs->lenstr ) + prs->state->charlen = 0; + else + prs->state->charlen = ( prs->charmaxlen == 1 ) ? prs->charmaxlen : + pg_mblen( prs->str + prs->state->posbyte ); + + Assert( prs->state->posbyte + prs->state->charlen <= prs->lenstr ); + Assert( prs->state->state >=TPS_Base && prs->state->state < TPS_Null ); + Assert( Actions[ prs->state->state ].state == prs->state->state ); + + item = Actions[ prs->state->state ].action; + Assert(item!=NULL); + + if ( item < prs->state->pushedAtAction ) + item = prs->state->pushedAtAction; + + /* find action by character class */ + while( item->isclass ) { + prs->c = item->c; + if ( item->isclass(prs)!=0 ) { + if ( item > prs->state->pushedAtAction ) /* remember: after pushing we were by false way */ + break; + } + item++; + } + + prs->state->pushedAtAction = NULL; + + /* call special handler if exists */ + if ( item->special ) + item->special(prs); + + /* BINGO, lexeme is found */ + if ( item->flags & A_BINGO ) { + Assert( item->type>0 ); + prs->lenbytelexeme = prs->state->lenbytelexeme; + prs->lencharlexeme = prs->state->lencharlexeme; + prs->state->lenbytelexeme = prs->state->lencharlexeme = 0; + prs->type = item->type; + } + + /* do various actions by flags */ + if ( item->flags & A_POP ) { /* pop stored state in stack */ + TParserPosition *ptr = prs->state->prev; + pfree( prs->state ); + prs->state = ptr; + Assert( prs->state ); + } else if ( item->flags & A_PUSH ) { /* push (store) state in stack */ + prs->state->pushedAtAction = item; /* remember where we push */ + prs->state = newTParserPosition( prs->state ); + } else if ( item->flags & A_CLEAR ) { /* clear previous pushed state */ + TParserPosition *ptr; + Assert( prs->state->prev ); + ptr = prs->state->prev->prev; + pfree( prs->state->prev ); + prs->state->prev = ptr; + } else if ( item->flags & A_CLRALL ) { /* clear all previous pushed state */ + TParserPosition *ptr; + while( prs->state->prev ) { + ptr = prs->state->prev->prev; + pfree( prs->state->prev ); + prs->state->prev = ptr; + } + } else if ( item->flags & A_MERGE ) { /* merge posinfo with current and pushed state */ + TParserPosition *ptr = prs->state; + Assert( prs->state->prev ); + prs->state = prs->state->prev; + + prs->state->posbyte = ptr->posbyte; + prs->state->poschar = ptr->poschar; + prs->state->charlen = ptr->charlen; + prs->state->lenbytelexeme = ptr->lenbytelexeme; + prs->state->lencharlexeme = ptr->lencharlexeme; + pfree(ptr); + } + + /* set new state if pointed */ + if ( item->tostate != TPS_Null ) + prs->state->state = item->tostate; + + /* check for go away */ + if ( (item->flags & A_BINGO) || (prs->state->posbyte >= prs->lenstr && (item->flags & A_RERUN)==0 ) ) + break; + + /* go to begining of loop if we should rerun or we just restore state */ + if ( item->flags & ( A_RERUN | A_POP ) ) + continue; + + /* move forward */ + if ( prs->state->charlen ) { + prs->state->posbyte += prs->state->charlen; + prs->state->lenbytelexeme += prs->state->charlen; + prs->state->poschar ++; + prs->state->lencharlexeme ++; + } + } + + return (item && (item->flags & A_BINGO)) ? true : false; +} + + diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h index 3f0e0cd6359ff66f4f91050a215703d4319c6ab9..ee5b3b7ab5471e6dc194647db98bcd3542977464 100644 --- a/contrib/tsearch2/wordparser/parser.h +++ b/contrib/tsearch2/wordparser/parser.h @@ -1,10 +1,147 @@ #ifndef __PARSER_H__ #define __PARSER_H__ -extern char *token; -extern int tokenlen; -int tsearch2_yylex(void); -void tsearch2_start_parse_str(char *, int); -void tsearch2_end_parse(void); +#include <ctype.h> +#include <limits.h> +#include "ts_locale.h" + +typedef enum { + TPS_Base = 0, + TPS_InUWord, + TPS_InLatWord, + TPS_InCyrWord, + TPS_InUnsignedInt, + TPS_InSignedIntFirst, + TPS_InSignedInt, + TPS_InSpace, + TPS_InUDecimalFirst, + TPS_InUDecimal, + TPS_InDecimalFirst, + TPS_InDecimal, + TPS_InVersionFirst, + TPS_InVersion, + TPS_InMantissaFirst, + TPS_InMantissaSign, + TPS_InMantissa, + TPS_InHTMLEntityFirst, + TPS_InHTMLEntity, + TPS_InHTMLEntityNumFirst, + TPS_InHTMLEntityNum, + TPS_InHTMLEntityEnd, + TPS_InTagFirst, + TPS_InTagCloseFirst, + TPS_InTag, + TPS_InTagEscapeK, + TPS_InTagEscapeKK, + TPS_InTagBackSleshed, + TPS_InTagEnd, + TPS_InCommentFirst, + TPS_InCommentLast, + TPS_InComment, + TPS_InCloseCommentFirst, + TPS_InCloseCommentLast, + TPS_InCommentEnd, + TPS_InHostFirstDomen, + TPS_InHostDomenSecond, + TPS_InHostDomen, + TPS_InPortFirst, + TPS_InPort, + TPS_InHostFirstAN, + TPS_InHost, + TPS_InEmail, + TPS_InFileFirst, + TPS_InFile, + TPS_InFileNext, + TPS_InURIFirst, + TPS_InURIStart, + TPS_InURI, + TPS_InFURL, + TPS_InProtocolFirst, + TPS_InProtocolSecond, + TPS_InProtocolEnd, + TPS_InHyphenLatWordFirst, + TPS_InHyphenLatWord, + TPS_InHyphenCyrWordFirst, + TPS_InHyphenCyrWord, + TPS_InHyphenUWordFirst, + TPS_InHyphenUWord, + TPS_InHyphenValueFirst, + TPS_InHyphenValue, + TPS_InHyphenValueExact, + TPS_InParseHyphen, + TPS_InParseHyphenHyphen, + TPS_InHyphenCyrWordPart, + TPS_InHyphenLatWordPart, + TPS_InHyphenUWordPart, + TPS_InHyphenUnsignedInt, + TPS_InHDecimalPartFirst, + TPS_InHDecimalPart, + TPS_InHVersionPartFirst, + TPS_InHVersionPart, + TPS_Null /* last state (fake value) */ +} TParserState; + +/* forward declaration */ +struct TParser; + + +typedef int (*TParserCharTest)(struct TParser*); /* any p_is* functions except p_iseq */ +typedef void (*TParserSpecial)(struct TParser*); /* special handler for special cases... */ + +typedef struct { + TParserCharTest isclass; + char c; + uint16 flags; + TParserState tostate; + int type; + TParserSpecial special; +} TParserStateActionItem; + +typedef struct { + TParserState state; + TParserStateActionItem *action; +} TParserStateAction; + +typedef struct TParserPosition { + int posbyte; /* position of parser in bytes */ + int poschar; /* osition of parser in characters */ + int charlen; /* length of current char */ + int lenbytelexeme; + int lencharlexeme; + TParserState state; + struct TParserPosition *prev; + int flags; + TParserStateActionItem *pushedAtAction; +} TParserPosition; + +typedef struct TParser { + /* string and position information */ + char *str; /* multibyte string */ + int lenstr; /* length of mbstring */ + wchar_t *wstr; /* wide character string */ + int lenwstr; /* length of wsting */ + + /* State of parse */ + int charmaxlen; + bool usewide; + TParserPosition *state; + bool ignore; + bool wanthost; + + /* silly char */ + char c; + + /* out */ + char *lexeme; + int lenbytelexeme; + int lencharlexeme; + int type; + +} TParser; + + +TParser* TParserInit( char *, int ); +bool TParserGet( TParser* ); +void TParserClose( TParser* ); #endif diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l deleted file mode 100644 index a7cb4684c32868b4c432b1b76ad4477afafcb9ad..0000000000000000000000000000000000000000 --- a/contrib/tsearch2/wordparser/parser.l +++ /dev/null @@ -1,346 +0,0 @@ -%{ -#include "postgres.h" - -#include "deflex.h" -#include "parser.h" -#include "common.h" - -/* Avoid exit() on fatal scanner errors */ -#undef fprintf -#define fprintf(file, fmt, msg) ts_error(ERROR, fmt, msg) - -char *token = NULL; /* pointer to token */ -int tokenlen; -static char *s = NULL; /* to return WHOLE hyphenated-word */ - -YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */ - -typedef struct { - int tlen; - int clen; - char *str; -} TagStorage; - -static TagStorage ts={0,0,NULL}; - -static void -addTag(void) -{ - while( ts.clen+tsearch2_yyleng+1 > ts.tlen ) { - ts.tlen*=2; - ts.str=realloc(ts.str,ts.tlen); - if (!ts.str) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - memcpy(ts.str+ts.clen,tsearch2_yytext,tsearch2_yyleng); - ts.clen+=tsearch2_yyleng; - ts.str[ts.clen]='\0'; -} - -static void -startTag(void) -{ - if ( ts.str==NULL ) { - ts.tlen=tsearch2_yyleng+1; - ts.str=malloc(ts.tlen); - if (!ts.str) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - ts.clen=0; - ts.str[0]='\0'; - addTag(); -} - -%} - -%option 8bit -%option never-interactive -%option nodefault -%option nounput -%option noyywrap - -/* parser's state for parsing hyphenated-word */ -%x DELIM -/* parser's state for parsing URL*/ -%x URL -%x SERVER - -/* parser's state for parsing TAGS */ -%x INTAG -%x QINTAG -%x INCOMMENT -%x INSCRIPT - -/* cyrillic koi8 char */ -CYRALNUM [0-9\200-\377] -CYRALPHA [\200-\377] -ALPHA [a-zA-Z\200-\377] -ALNUM [0-9a-zA-Z\200-\377] - - -HOSTNAME ([-_[:alnum:]]+\.)+[[:alpha:]]+ -URI [-_[:alnum:]/%,\.;=&?#]+ - -%% - -"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; startTag(); } - -<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" { - BEGIN INITIAL; - addTag(); - token = ts.str; - tokenlen = ts.clen; - return TAG; -} - -"<!--" { BEGIN INCOMMENT; startTag(); } - -<INCOMMENT>"-->" { - BEGIN INITIAL; - addTag(); - token = ts.str; - tokenlen = ts.clen; - return TAG; -} - - -"<"[\![:alpha:]] { BEGIN INTAG; startTag(); } - -"</"[[:alpha:]] { BEGIN INTAG; startTag(); } - -<INTAG>"\"" { BEGIN QINTAG; addTag(); } - -<QINTAG>"\\\"" { addTag(); } - -<QINTAG>"\"" { BEGIN INTAG; addTag(); } - -<INTAG>">" { - BEGIN INITIAL; - addTag(); - token = ts.str; - tokenlen = ts.clen; - return TAG; -} - -<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n { addTag(); } - -\&(quot|amp|nbsp|lt|gt)\; { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return HTMLENTITY; -} - -\&\#[0-9][0-9]?[0-9]?\; { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return HTMLENTITY; -} - -[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return EMAIL; -} - -[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return SCIENTIFIC; -} - -[0-9]+\.[0-9]+\.[0-9\.]*[0-9] { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return VERSIONNUMBER; -} - -[+-]?[0-9]+\.[0-9]+ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return DECIMAL; -} - -[+-][0-9]+ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return SIGNEDINT; -} - -<DELIM,INITIAL>[0-9]+ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return UNSIGNEDINT; -} - -http"://" { - BEGIN URL; - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return HTTP; -} - -ftp"://" { - BEGIN URL; - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return HTTP; -} - -<URL,INITIAL>{HOSTNAME}[/:]{URI} { - BEGIN SERVER; - if (s) { free(s); s=NULL; } - s = strdup( tsearch2_yytext ); - tokenlen = tsearch2_yyleng; - yyless( 0 ); - token = s; - return FURL; -} - -<SERVER,URL,INITIAL>{HOSTNAME} { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return HOST; -} - -<SERVER>[/:]{URI} { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return URI; -} - -[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return FILEPATH; -} - -({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */ { - BEGIN DELIM; - if (s) { free(s); s=NULL; } - s = strdup( tsearch2_yytext ); - tokenlen = tsearch2_yyleng; - yyless( 0 ); - token = s; - return CYRHYPHENWORD; -} - -([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */ { - BEGIN DELIM; - if (s) { free(s); s=NULL; } - s = strdup( tsearch2_yytext ); - tokenlen = tsearch2_yyleng; - yyless( 0 ); - token = s; - return LATHYPHENWORD; -} - -({ALNUM}+-)+{ALNUM}+ /* composite-word */ { - BEGIN DELIM; - if (s) { free(s); s=NULL; } - s = strdup( tsearch2_yytext ); - tokenlen = tsearch2_yyleng; - yyless( 0 ); - token = s; - return HYPHENWORD; -} - -<DELIM>[0-9]+\.[0-9]+\.[0-9\.]*[0-9] { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return VERSIONNUMBER; -} - -<DELIM>\+?[0-9]+\.[0-9]+ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return DECIMAL; -} - -<DELIM>{CYRALPHA}+ /* one word in composite-word */ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return CYRPARTHYPHENWORD; -} - -<DELIM>[[:alpha:]]+ /* one word in composite-word */ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return LATPARTHYPHENWORD; -} - -<DELIM>{ALNUM}+ /* one word in composite-word */ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return PARTHYPHENWORD; -} - -<DELIM>- { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return SPACE; -} - -<DELIM,SERVER,URL>.|\n /* return in basic state */ { - BEGIN INITIAL; - yyless( 0 ); -} - -{CYRALPHA}+ /* normal word */ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return CYRWORD; -} - -[[:alpha:]]+ /* normal word */ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return LATWORD; -} - -{ALNUM}+ /* normal word */ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return UWORD; -} - -[ \r\n\t]+ { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return SPACE; -} - -. { - token = tsearch2_yytext; - tokenlen = tsearch2_yyleng; - return SPACE; -} - -%% - -/* clearing after parsing from string */ -void -tsearch2_end_parse(void) -{ - if (s) - { - free(s); - s = NULL; - } - tsearch2_yy_delete_buffer( buf ); - buf = NULL; -} - -/* start parse from string */ -void -tsearch2_start_parse_str(char* str, int limit) -{ - if (buf) - tsearch2_end_parse(); - buf = tsearch2_yy_scan_bytes( str, limit ); - tsearch2_yy_switch_to_buffer( buf ); - BEGIN INITIAL; -} diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c index 6686257887222aa2face214f4d3dfd24cb7a8190..897ff2795e27690f7f0885eb16d82840718fe826 100644 --- a/contrib/tsearch2/wparser_def.c +++ b/contrib/tsearch2/wparser_def.c @@ -39,8 +39,7 @@ Datum prsd_start(PG_FUNCTION_ARGS); Datum prsd_start(PG_FUNCTION_ARGS) { - tsearch2_start_parse_str((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1)); - PG_RETURN_POINTER(NULL); + PG_RETURN_POINTER(TParserInit( (char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1))); } PG_FUNCTION_INFO_V1(prsd_getlexeme); @@ -48,14 +47,17 @@ Datum prsd_getlexeme(PG_FUNCTION_ARGS); Datum prsd_getlexeme(PG_FUNCTION_ARGS) { - /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */ + TParser *p=(TParser*)PG_GETARG_POINTER(0); char **t = (char **) PG_GETARG_POINTER(1); int *tlen = (int *) PG_GETARG_POINTER(2); - int type = tsearch2_yylex(); - *t = token; - *tlen = tokenlen; - PG_RETURN_INT32(type); + if ( !TParserGet(p) ) + PG_RETURN_INT32(0); + + *t = p->lexeme; + *tlen = p->lenbytelexeme; + + PG_RETURN_INT32(p->type); } PG_FUNCTION_INFO_V1(prsd_end); @@ -63,8 +65,8 @@ Datum prsd_end(PG_FUNCTION_ARGS); Datum prsd_end(PG_FUNCTION_ARGS) { - /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */ - tsearch2_end_parse(); + TParser *p=(TParser*)PG_GETARG_POINTER(0); + TParserClose(p); PG_RETURN_VOID(); }