From e5be89981fc70648eedb325781cf2fbd4da05ba8 Mon Sep 17 00:00:00 2001
From: Teodor Sigaev <teodor@sigaev.ru>
Date: Fri, 7 Sep 2007 15:09:56 +0000
Subject: [PATCH] Refactoring by Heikki Linnakangas <heikki@enterprisedb.com>
 with small editorization by me

- Brake the QueryItem struct into QueryOperator and QueryOperand.
  Type was really the only common field between them. QueryItem still
  exists, and is used in the TSQuery struct as before, but it's now a
  union of the two. Many other changes fell from that, like separation
  of pushval_asis function into pushValue, pushOperator and pushStop.

- Moved some structs that were for internal use only from header files
  to the right .c-files.

- Moved tsvector parser to a new tsvector_parser.c file. Parser code was
  about half of the size of tsvector.c, it's also used from tsquery.c, and
  it has some data structures of its own, so it seems better to separate
  it. Cleaned up the API so that TSVectorParserState is not accessed from
  outside tsvector_parser.c.

- Separated enumerations (#defines, really) used for QueryItem.type
  field and as return codes from gettoken_query. It was just accidental
  code sharing.

- Removed ParseQueryNode struct used internally by makepol and friends.
  push*-functions now construct QueryItems directly.

- Changed int4 variables to just ints for variables like "i" or "array
  size", where the storage-size was not significant.
---
 src/backend/tsearch/to_tsany.c          |  30 +-
 src/backend/tsearch/ts_parse.c          |  20 +-
 src/backend/tsearch/wparser_def.c       |  12 +-
 src/backend/utils/adt/Makefile          |   4 +-
 src/backend/utils/adt/tsginidx.c        |  21 +-
 src/backend/utils/adt/tsgistidx.c       |  12 +-
 src/backend/utils/adt/tsquery.c         | 582 +++++++++++++++---------
 src/backend/utils/adt/tsquery_cleanup.c |  63 +--
 src/backend/utils/adt/tsquery_op.c      |  27 +-
 src/backend/utils/adt/tsquery_rewrite.c |  60 ++-
 src/backend/utils/adt/tsquery_util.c    | 134 ++++--
 src/backend/utils/adt/tsrank.c          | 116 +++--
 src/backend/utils/adt/tsvector.c        | 414 +++++------------
 src/backend/utils/adt/tsvector_op.c     |  75 +--
 src/backend/utils/adt/tsvector_parser.c | 357 +++++++++++++++
 src/include/tsearch/ts_public.h         |   4 +-
 src/include/tsearch/ts_type.h           |  79 +++-
 src/include/tsearch/ts_utils.h          |  90 ++--
 18 files changed, 1278 insertions(+), 822 deletions(-)
 create mode 100644 src/backend/utils/adt/tsvector_parser.c

diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ee4b61d44bf..d0b1bcc19de 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.2 2007/09/07 15:09:55 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS)
 
 
 /*
- * This function is used for morph parsing
+ * This function is used for morph parsing.
+ *
+ * The value is passed to parsetext which will call the right dictionary to
+ * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
+ * to the stack.
+ *
+ * All words belonging to the same variant are pushed as an ANDed list,
+ * and different variants are ORred together. 
  */
 static void
-pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight)
+pushval_morph(void *opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
 {
 	int4		count = 0;
 	ParsedText	prs;
@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
 				cntvar = 0,
 				cntpos = 0,
 				cnt = 0;
+	Oid cfg_id = (Oid) opaque; /* the input is actually an Oid, not a pointer */
 
 	prs.lenwords = 4;
 	prs.curwords = 0;
 	prs.pos = 0;
 	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
 
-	parsetext(state->cfg_id, &prs, strval, lenval);
+	parsetext(cfg_id, &prs, strval, lenval);
 
 	if (prs.curwords > 0)
 	{
@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
 				while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
 				{
 
-					pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+					pushValue(state, prs.words[count].word, prs.words[count].len, weight);
 					pfree(prs.words[count].word);
 					if (cnt)
-						pushquery(state, OPR, (int4) '&', 0, 0, 0);
+						pushOperator(state, OP_AND);
 					cnt++;
 					count++;
 				}
 
 				if (cntvar)
-					pushquery(state, OPR, (int4) '|', 0, 0, 0);
+					pushOperator(state, OP_OR);
 				cntvar++;
 			}
 
 			if (cntpos)
-				pushquery(state, OPR, (int4) '&', 0, 0, 0);
+				pushOperator(state, OP_AND);
 
 			cntpos++;
 		}
@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
 
 	}
 	else
-		pushval_asis(state, VALSTOP, NULL, 0, 0);
+		pushStop(state);
 }
 
 Datum
@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
 	QueryItem  *res;
 	int4		len;
 
-	query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false);
+	query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *) cfgid, false);
 
 	if (query->size == 0)
 		PG_RETURN_TSQUERY(query);
@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
 	QueryItem  *res;
 	int4		len;
 
-	query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true);
+	query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *)cfgid, true);
 
 	if (query->size == 0)
 		PG_RETURN_TSQUERY(query);
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c
index 47e18fc1ac5..22c5f2b86ea 100644
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
 }
 
 /*
- * Parse string and lexize words
+ * Parse string and lexize words.
+ *
+ * prs will be filled in.
  */
 void
-parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
+parsetext(Oid cfgId, ParsedText * prs, char *buf, int buflen)
 {
 	int			type,
 				lenlemm;
@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
  * Headline framework
  */
 static void
-hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type)
+hladdword(HeadlineParsedText * prs, char *buf, int buflen, int type)
 {
 	while (prs->curwords >= prs->lenwords)
 	{
@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
 	word = &(prs->words[prs->curwords - 1]);
 	for (i = 0; i < query->size; i++)
 	{
-		if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
+		if (item->type == QI_VAL &&
+			item->operand.length == buflen &&
+			strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
 		{
 			if (word->item)
 			{
 				memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
-				prs->words[prs->curwords].item = item;
+				prs->words[prs->curwords].item = &item->operand;
 				prs->words[prs->curwords].repeated = 1;
 				prs->curwords++;
 			}
 			else
-				word->item = item;
+				word->item = &item->operand;
 		}
 		item++;
 	}
@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe
 }
 
 void
-hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen)
+hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
 {
 	int			type,
 				lenlemm;
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 5b47f66d07f..5f65cbc9fb2 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1575,7 +1575,7 @@ typedef struct
 } hlCheck;
 
 static bool
-checkcondition_HL(void *checkval, QueryItem * val)
+checkcondition_HL(void *checkval, QueryOperand * val)
 {
 	int			i;
 
@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
 
 	for (j = 0; j < query->size; j++)
 	{
-		if (item->type != VAL)
+		if (item->type != QI_VAL)
 		{
 			item++;
 			continue;
 		}
 		for (i = pos; i < prs->curwords; i++)
 		{
-			if (prs->words[i].item == item)
+			if (prs->words[i].item == &item->operand)
 			{
 				if (i > *q)
 					*q = i;
@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
 	item = GETQUERY(query);
 	for (j = 0; j < query->size; j++)
 	{
-		if (item->type != VAL)
+		if (item->type != QI_VAL)
 		{
 			item++;
 			continue;
 		}
 		for (i = *q; i >= pos; i--)
 		{
-			if (prs->words[i].item == item)
+			if (prs->words[i].item == &item->operand)
 			{
 				if (i < *p)
 					*p = i;
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index a1f233dca82..9a75c736df6 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for utils/adt
 #
-# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.66 2007/08/27 01:39:24 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.67 2007/09/07 15:09:56 teodor Exp $
 #
 
 subdir = src/backend/utils/adt
@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
 	ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
 	tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
 	tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
-	tsvector.o tsvector_op.o \
+	tsvector.o tsvector_op.o tsvector_parser.o\
 	uuid.o xml.o
 
 like.o: like.c like_match.c
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 491dd21aa81..10b80dc9566 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS)
 		item = GETQUERY(query);
 
 		for (i = 0; i < query->size; i++)
-			if (item[i].type == VAL)
+			if (item[i].type == QI_VAL)
 				(*nentries)++;
 
 		entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
 
 		for (i = 0; i < query->size; i++)
-			if (item[i].type == VAL)
+			if (item[i].type == QI_VAL)
 			{
 				text	   *txt;
+				QueryOperand *val = &item[i].operand;
 
-				txt = (text *) palloc(VARHDRSZ + item[i].length);
+				txt = (text *) palloc(VARHDRSZ + val->length);
 
-				SET_VARSIZE(txt, VARHDRSZ + item[i].length);
-				memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
+				SET_VARSIZE(txt, VARHDRSZ + val->length);
+				memcpy(VARDATA(txt), GETOPERAND(query) + val->distance, val->length);
 
 				entries[j++] = PointerGetDatum(txt);
 
-				if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
+				if (strategy != TSearchWithClassStrategyNumber && val->weight != 0)
 					ereport(ERROR,
 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 							 errmsg("@@ operator does not support lexeme class restrictions"),
@@ -116,11 +117,11 @@ typedef struct
 } GinChkVal;
 
 static bool
-checkcondition_gin(void *checkval, QueryItem * val)
+checkcondition_gin(void *checkval, QueryOperand * val)
 {
 	GinChkVal  *gcv = (GinChkVal *) checkval;
 
-	return gcv->mapped_check[val - gcv->frst];
+	return gcv->mapped_check[((QueryItem *) val) - gcv->frst];
 }
 
 Datum
@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
 		gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
 
 		for (i = 0; i < query->size; i++)
-			if (item[i].type == VAL)
+			if (item[i].type == QI_VAL)
 				gcv.mapped_check[i] = check[j++];
 
 		res = TS_execute(
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 6c262521ef4..4fc51378b4b 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.2 2007/08/21 06:34:42 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -293,7 +293,7 @@ typedef struct
  * is there value 'val' in array or not ?
  */
 static bool
-checkcondition_arr(void *checkval, QueryItem * val)
+checkcondition_arr(void *checkval, QueryOperand * val)
 {
 	int4	   *StopLow = ((CHKVAL *) checkval)->arrb;
 	int4	   *StopHigh = ((CHKVAL *) checkval)->arre;
@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-		if (*StopMiddle == val->val)
+		if (*StopMiddle == val->valcrc)
 			return (true);
-		else if (*StopMiddle < val->val)
+		else if (*StopMiddle < val->valcrc)
 			StopLow = StopMiddle + 1;
 		else
 			StopHigh = StopMiddle;
@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
 }
 
 static bool
-checkcondition_bit(void *checkval, QueryItem * val)
+checkcondition_bit(void *checkval, QueryOperand * val)
 {
-	return GETBIT(checkval, HASHVAL(val->val));
+	return GETBIT(checkval, HASHVAL(val->valcrc));
 }
 
 Datum
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 83759728ff9..27b93eb64d7 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,6 +23,29 @@
 #include "utils/pg_crc.h"
 
 
+struct TSQueryParserStateData
+{
+	/* State for gettoken_query */
+	char	   *buffer;			/* entire string we are scanning */
+	char	   *buf;			/* current scan point */
+	int			state;
+	int			count;			/* nesting count, incremented by (, 
+								   decremented by ) */
+
+	/* polish (prefix) notation in list, filled in by push* functions */
+	List	   *polstr;
+
+	/* Strings from operands are collected in op. curop is a pointer to
+	 * the end of used space of op. */
+	char	   *op;
+	char	   *curop;
+	int			lenop; /* allocated size of op */
+	int			sumlen; /* used size of op */
+
+	/* state for value's parser */
+	TSVectorParseState valstate;
+};
+
 /* parser's states */
 #define WAITOPERAND 1
 #define WAITOPERATOR	2
@@ -30,21 +53,10 @@
 #define WAITSINGLEOPERAND 4
 
 /*
- * node of query tree, also used
- * for storing polish notation in parser
+ * subroutine to parse the weight part, like ':1AB' of a query.
  */
-typedef struct ParseQueryNode
-{
-	int2		weight;
-	int2		type;
-	int4		val;
-	int2		distance;
-	int2		length;
-	struct ParseQueryNode *next;
-} ParseQueryNode;
-
 static char *
-get_weight(char *buf, int2 *weight)
+get_weight(char *buf, int16 *weight)
 {
 	*weight = 0;
 
@@ -81,11 +93,28 @@ get_weight(char *buf, int2 *weight)
 	return buf;
 }
 
+/*
+ * token types for parsing
+ */
+typedef enum {
+	PT_END = 0,
+	PT_ERR = 1,
+	PT_VAL = 2,
+	PT_OPR = 3,
+	PT_OPEN = 4,
+	PT_CLOSE = 5,
+} ts_tokentype;
+
 /*
  * get token from query string
+ *
+ * *operator is filled in with OP_* when return values is PT_OPR
+ * *strval, *lenval and *weight are filled in when return value is PT_VAL
  */
-static int4
-gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+static ts_tokentype
+gettoken_query(TSQueryParserState state, 
+			   int8 *operator,
+			   int *lenval, char **strval, int16 *weight)
 {
 	while (1)
 	{
@@ -97,16 +126,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
 				{
 					(state->buf)++;		/* can safely ++, t_iseq guarantee
 										 * that pg_mblen()==1 */
-					*val = (int4) '!';
+					*operator = OP_NOT;
 					state->state = WAITOPERAND;
-					return OPR;
+					return PT_OPR;
 				}
 				else if (t_iseq(state->buf, '('))
 				{
 					state->count++;
 					(state->buf)++;
 					state->state = WAITOPERAND;
-					return OPEN;
+					return PT_OPEN;
 				}
 				else if (t_iseq(state->buf, ':'))
 				{
@@ -117,17 +146,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
 				}
 				else if (!t_isspace(state->buf))
 				{
-					state->valstate.prsbuf = state->buf;
-					if (gettoken_tsvector(&(state->valstate)))
+					/* We rely on the tsvector parser to parse the value for us */
+					reset_tsvector_parser(state->valstate, state->buf);
+					if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
 					{
-						*strval = state->valstate.word;
-						*lenval = state->valstate.curpos - state->valstate.word;
-						state->buf = get_weight(state->valstate.prsbuf, weight);
+						state->buf = get_weight(state->buf, weight);
 						state->state = WAITOPERATOR;
-						return VAL;
+						return PT_VAL;
 					}
 					else if (state->state == WAITFIRSTOPERAND)
-						return END;
+						return PT_END;
 					else
 						ereport(ERROR,
 								(errcode(ERRCODE_SYNTAX_ERROR),
@@ -136,52 +164,71 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
 				}
 				break;
 			case WAITOPERATOR:
-				if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
+				if (t_iseq(state->buf, '&'))
+				{
+					state->state = WAITOPERAND;
+					*operator = OP_AND;
+					(state->buf)++;
+					return PT_OPR;
+				}
+				if (t_iseq(state->buf, '|'))
 				{
 					state->state = WAITOPERAND;
-					*val = (int4) *(state->buf);
+					*operator = OP_OR;
 					(state->buf)++;
-					return OPR;
+					return PT_OPR;
 				}
 				else if (t_iseq(state->buf, ')'))
 				{
 					(state->buf)++;
 					state->count--;
-					return (state->count < 0) ? ERR : CLOSE;
+					return (state->count < 0) ? PT_ERR : PT_CLOSE;
 				}
 				else if (*(state->buf) == '\0')
-					return (state->count) ? ERR : END;
+					return (state->count) ? PT_ERR : PT_END;
 				else if (!t_isspace(state->buf))
-					return ERR;
+					return PT_ERR;
 				break;
 			case WAITSINGLEOPERAND:
 				if (*(state->buf) == '\0')
-					return END;
+					return PT_END;
 				*strval = state->buf;
 				*lenval = strlen(state->buf);
 				state->buf += strlen(state->buf);
 				state->count++;
-				return VAL;
+				return PT_VAL;
 			default:
-				return ERR;
+				return PT_ERR;
 				break;
 		}
 		state->buf += pg_mblen(state->buf);
 	}
-	return END;
+	return PT_END;
 }
 
 /*
- * push new one in polish notation reverse view
+ * Push an operator to state->polstr
  */
 void
-pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+pushOperator(TSQueryParserState state, int8 oper)
 {
-	ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
+	QueryOperator *tmp;
+
+	Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
+	
+	tmp = (QueryOperator *) palloc(sizeof(QueryOperator));
+	tmp->type = QI_OPR;
+	tmp->oper = oper;
+	/* left is filled in later with findoprnd */
+
+	state->polstr = lcons(tmp, state->polstr);
+}
+
+static void
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
+{
+	QueryOperand *tmp;
 
-	tmp->weight = weight;
-	tmp->type = type;
-	tmp->val = val;
 	if (distance >= MAXSTRPOS)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
@@ -192,20 +239,27 @@ pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 l
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("operand is too long in tsearch query: \"%s\"",
 						state->buffer)));
-	tmp->distance = distance;
+
+	tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+	tmp->type = QI_VAL;
+	tmp->weight = weight;
+	tmp->valcrc = (int32) valcrc;
 	tmp->length = lenval;
-	tmp->next = state->str;
-	state->str = tmp;
-	state->num++;
+	tmp->distance = distance;
+
+	state->polstr = lcons(tmp, state->polstr);
 }
 
 /*
- * This function is used for tsquery parsing
+ * Push an operand to state->polstr.
+ *
+ * strval must point to a string equal to state->curop. lenval is the length
+ * of the string.
  */
 void
-pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
+pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
 {
-	pg_crc32	c;
+	pg_crc32	valcrc;
 
 	if (lenval >= MAXSTRLEN)
 		ereport(ERROR,
@@ -213,162 +267,202 @@ pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int
 				 errmsg("word is too long in tsearch query: \"%s\"",
 						state->buffer)));
 
-	INIT_CRC32(c);
-	COMP_CRC32(c, strval, lenval);
-	FIN_CRC32(c);
-	pushquery(state, type, *(int4 *) &c,
-			  state->curop - state->op, lenval, weight);
+	INIT_CRC32(valcrc);
+	COMP_CRC32(valcrc, strval, lenval);
+	FIN_CRC32(valcrc);
+	pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
 
+	/* append the value string to state.op, enlarging buffer if needed first */
 	while (state->curop - state->op + lenval + 1 >= state->lenop)
 	{
-		int4		tmp = state->curop - state->op;
+		int	used = state->curop - state->op;
 
 		state->lenop *= 2;
 		state->op = (char *) repalloc((void *) state->op, state->lenop);
-		state->curop = state->op + tmp;
+		state->curop = state->op + used;
 	}
 	memcpy((void *) state->curop, (void *) strval, lenval);
 	state->curop += lenval;
 	*(state->curop) = '\0';
 	state->curop++;
 	state->sumlen += lenval + 1 /* \0 */ ;
-	return;
 }
 
+
+/*
+ * Push a stopword placeholder to state->polstr
+ */
+void
+pushStop(TSQueryParserState state)
+{
+	QueryOperand *tmp;
+
+	tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+	tmp->type = QI_VALSTOP;
+
+	state->polstr = lcons(tmp, state->polstr);
+}
+
+
 #define STACKDEPTH	32
 
 /*
- * make polish notation of query
+ * Make polish (prefix) notation of query.
+ *
+ * See parse_tsquery for explanation of pushval.
  */
-static int4
-makepol(TSQueryParserState * state,
-		void (*pushval) (TSQueryParserState *, int, char *, int, int2))
+static void
+makepol(TSQueryParserState state, 
+		PushFunction pushval,
+		void *opaque)
 {
-	int4		val = 0,
-				type;
-	int4		lenval = 0;
+	int8		operator = 0;
+	ts_tokentype type;
+	int			lenval = 0;
 	char	   *strval = NULL;
-	int4		stack[STACKDEPTH];
-	int4		lenstack = 0;
-	int2		weight = 0;
+	int8		opstack[STACKDEPTH];
+	int			lenstack = 0;
+	int16		weight = 0;
 
 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();
 
-	while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+	while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
 	{
 		switch (type)
 		{
-			case VAL:
-				pushval(state, VAL, strval, lenval, weight);
-				while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
-									stack[lenstack - 1] == (int4) '!'))
+			case PT_VAL:
+				pushval(opaque, state, strval, lenval, weight);
+				while (lenstack && (opstack[lenstack - 1] == OP_AND ||
+									opstack[lenstack - 1] == OP_NOT))
 				{
 					lenstack--;
-					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+					pushOperator(state, opstack[lenstack]);
 				}
 				break;
-			case OPR:
-				if (lenstack && val == (int4) '|')
-					pushquery(state, OPR, val, 0, 0, 0);
+			case PT_OPR:
+				if (lenstack && operator == OP_OR)
+					pushOperator(state, OP_OR);
 				else
 				{
 					if (lenstack == STACKDEPTH)			/* internal error */
 						elog(ERROR, "tsquery stack too small");
-					stack[lenstack] = val;
+					opstack[lenstack] = operator;
 					lenstack++;
 				}
 				break;
-			case OPEN:
-				if (makepol(state, pushval) == ERR)
-					return ERR;
-				if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
-								 stack[lenstack - 1] == (int4) '!'))
+			case PT_OPEN:
+				makepol(state, pushval, opaque);
+
+				if (lenstack && (opstack[lenstack - 1] == OP_AND ||
+								 opstack[lenstack - 1] == OP_NOT))
 				{
 					lenstack--;
-					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+					pushOperator(state, opstack[lenstack]);
 				}
 				break;
-			case CLOSE:
+			case PT_CLOSE:
 				while (lenstack)
 				{
 					lenstack--;
-					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+					pushOperator(state, opstack[lenstack]);
 				};
-				return END;
-				break;
-			case ERR:
+				return;
+			case PT_ERR:
 			default:
 				ereport(ERROR,
 						(errcode(ERRCODE_SYNTAX_ERROR),
 						 errmsg("syntax error in tsearch query: \"%s\"",
 								state->buffer)));
-				return ERR;
-
 		}
 	}
 	while (lenstack)
 	{
 		lenstack--;
-		pushquery(state, OPR, stack[lenstack], 0, 0, 0);
-	};
-	return END;
+		pushOperator(state, opstack[lenstack]);
+	}
 }
 
+/*
+ * Fills in the left-fields previously left unfilled. The input
+ * QueryItems must be in polish (prefix) notation. 
+ */
 static void
-findoprnd(QueryItem * ptr, int4 *pos)
+findoprnd(QueryItem *ptr, int *pos)
 {
-	if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
-	{
-		ptr[*pos].left = 0;
-		(*pos)++;
-	}
-	else if (ptr[*pos].val == (int4) '!')
+	/* since this function recurses, it could be driven to stack overflow. */
+	check_stack_depth();
+
+	if (ptr[*pos].type == QI_VAL ||
+		ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
+									   * they haven't been cleansed
+									   * away yet.
+									   */
 	{
-		ptr[*pos].left = 1;
 		(*pos)++;
-		findoprnd(ptr, pos);
 	}
-	else
+	else 
 	{
-		QueryItem  *curitem = &ptr[*pos];
-		int4		tmp = *pos;
+		Assert(ptr[*pos].type == QI_OPR);
 
-		(*pos)++;
-		findoprnd(ptr, pos);
-		curitem->left = *pos - tmp;
-		findoprnd(ptr, pos);
+		if (ptr[*pos].operator.oper == OP_NOT)
+		{
+			ptr[*pos].operator.left = 1;
+			(*pos)++;
+			findoprnd(ptr, pos);
+		}
+		else
+		{
+			QueryOperator  *curitem = &ptr[*pos].operator;
+			int	tmp = *pos;
+
+			Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
+
+			(*pos)++;
+			findoprnd(ptr, pos);
+			curitem->left = *pos - tmp;
+			findoprnd(ptr, pos);
+		}
 	}
 }
 
-
 /*
- * input
+ * Each value (operand) in the query is be passed to pushval. pushval can
+ * transform the simple value to an arbitrarily complex expression using
+ * pushValue and pushOperator. It must push a single value with pushValue,
+ * a complete expression with all operands, or a a stopword placeholder
+ * with pushStop, otherwise the prefix notation representation will be broken,
+ * having an operator with no operand.
+ *
+ * opaque is passed on to pushval as is, pushval can use it to store its 
+ * private state.
+ *
+ * The returned query might contain QI_STOPVAL nodes. The caller is responsible
+ * for cleaning them up (with clean_fakeval)
  */
 TSQuery
-parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
+parse_tsquery(char *buf, 
+			  PushFunction pushval,
+			  void *opaque,
+			  bool isplain)
 {
-	TSQueryParserState state;
-	int4		i;
+	struct TSQueryParserStateData state;
+	int			i;
 	TSQuery		query;
-	int4		commonlen;
+	int			commonlen;
 	QueryItem  *ptr;
-	ParseQueryNode *tmp;
-	int4		pos = 0;
+	int			pos = 0;
+	ListCell   *cell;
 
 	/* init state */
 	state.buffer = buf;
 	state.buf = buf;
 	state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
 	state.count = 0;
-	state.num = 0;
-	state.str = NULL;
-	state.cfg_id = cfg_id;
+	state.polstr = NIL;
 
 	/* init value parser's state */
-	state.valstate.oprisdelim = true;
-	state.valstate.len = 32;
-	state.valstate.word = (char *) palloc(state.valstate.len);
+	state.valstate = init_tsvector_parser(NULL, true);
 
 	/* init list of operand */
 	state.sumlen = 0;
@@ -377,9 +471,11 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
 	*(state.curop) = '\0';
 
 	/* parse query & make polish notation (postfix, but in reverse order) */
-	makepol(&state, pushval);
-	pfree(state.valstate.word);
-	if (!state.num)
+	makepol(&state, pushval, opaque);
+
+	close_tsvector_parser(state.valstate);
+
+	if (list_length(state.polstr) == 0)
 	{
 		ereport(NOTICE,
 				(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
@@ -390,37 +486,54 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
 		return query;
 	}
 
-	/* make finish struct */
-	commonlen = COMPUTESIZE(state.num, state.sumlen);
-	query = (TSQuery) palloc(commonlen);
+	/* Pack the QueryItems in the final TSQuery struct to return to caller */
+	commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
+	query = (TSQuery) palloc0(commonlen);
 	SET_VARSIZE(query, commonlen);
-	query->size = state.num;
+	query->size = list_length(state.polstr);
 	ptr = GETQUERY(query);
 
-	/* set item in polish notation */
-	for (i = 0; i < state.num; i++)
+	/* Copy QueryItems to TSQuery */
+	i = 0;
+	foreach(cell, state.polstr)
 	{
-		ptr[i].weight = state.str->weight;
-		ptr[i].type = state.str->type;
-		ptr[i].val = state.str->val;
-		ptr[i].distance = state.str->distance;
-		ptr[i].length = state.str->length;
-		tmp = state.str->next;
-		pfree(state.str);
-		state.str = tmp;
+		QueryItem *item = (QueryItem *) lfirst(cell);
+
+		switch(item->type)
+		{
+			case QI_VAL:
+				memcpy(&ptr[i], item, sizeof(QueryOperand));
+				break;
+			case QI_VALSTOP:
+				ptr[i].type = QI_VALSTOP;
+				break;
+			case QI_OPR:
+				memcpy(&ptr[i], item, sizeof(QueryOperator));
+				break;
+			default:
+				elog(ERROR, "unknown QueryItem type %d", item->type);
+		}
+		i++;
 	}
 
-	/* set user friendly-operand view */
+	/* Copy all the operand strings to TSQuery */
 	memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
 	pfree(state.op);
 
-	/* set left operand's position for every operator */
+	/* Set left operand pointers for every operator. */
 	pos = 0;
 	findoprnd(ptr, &pos);
 
 	return query;
 }
 
+static void
+pushval_asis(void *opaque, TSQueryParserState state, char *strval, int lenval,
+			 int16 weight)
+{
+	pushValue(state, strval, lenval, weight);
+}
+
 /*
  * in without morphology
  */
@@ -431,7 +544,7 @@ tsqueryin(PG_FUNCTION_ARGS)
 
 	pg_verifymbstr(in, strlen(in), false);
 
-	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
+	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, NULL, false));
 }
 
 /*
@@ -443,13 +556,14 @@ typedef struct
 	char	   *buf;
 	char	   *cur;
 	char	   *op;
-	int4		buflen;
+	int			buflen;
 } INFIX;
 
-#define RESIZEBUF(inf,addsize) \
+/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
+#define RESIZEBUF(inf, addsize) \
 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
 { \
-	int4 len = (inf)->cur - (inf)->buf; \
+	int len = (inf)->cur - (inf)->buf; \
 	(inf)->buflen *= 2; \
 	(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
 	(inf)->cur = (inf)->buf + len; \
@@ -462,12 +576,16 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
 static void
 infix(INFIX * in, bool first)
 {
-	if (in->curpol->type == VAL)
+	/* since this function recurses, it could be driven to stack overflow. */
+	check_stack_depth();
+
+	if (in->curpol->type == QI_VAL)
 	{
-		char	   *op = in->op + in->curpol->distance;
+		QueryOperand *curpol = &in->curpol->operand;
+		char	   *op = in->op + curpol->distance;
 		int			clen;
 
-		RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+		RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
 		*(in->cur) = '\'';
 		in->cur++;
 		while (*op)
@@ -485,26 +603,26 @@ infix(INFIX * in, bool first)
 		}
 		*(in->cur) = '\'';
 		in->cur++;
-		if (in->curpol->weight)
+		if (curpol->weight)
 		{
 			*(in->cur) = ':';
 			in->cur++;
-			if (in->curpol->weight & (1 << 3))
+			if (curpol->weight & (1 << 3))
 			{
 				*(in->cur) = 'A';
 				in->cur++;
 			}
-			if (in->curpol->weight & (1 << 2))
+			if (curpol->weight & (1 << 2))
 			{
 				*(in->cur) = 'B';
 				in->cur++;
 			}
-			if (in->curpol->weight & (1 << 1))
+			if (curpol->weight & (1 << 1))
 			{
 				*(in->cur) = 'C';
 				in->cur++;
 			}
-			if (in->curpol->weight & 1)
+			if (curpol->weight & 1)
 			{
 				*(in->cur) = 'D';
 				in->cur++;
@@ -513,7 +631,7 @@ infix(INFIX * in, bool first)
 		*(in->cur) = '\0';
 		in->curpol++;
 	}
-	else if (in->curpol->val == (int4) '!')
+	else if (in->curpol->operator.oper == OP_NOT)
 	{
 		bool		isopr = false;
 
@@ -522,13 +640,15 @@ infix(INFIX * in, bool first)
 		in->cur++;
 		*(in->cur) = '\0';
 		in->curpol++;
-		if (in->curpol->type == OPR)
+
+		if (in->curpol->type == QI_OPR)
 		{
 			isopr = true;
 			RESIZEBUF(in, 2);
 			sprintf(in->cur, "( ");
 			in->cur = strchr(in->cur, '\0');
 		}
+
 		infix(in, isopr);
 		if (isopr)
 		{
@@ -539,11 +659,11 @@ infix(INFIX * in, bool first)
 	}
 	else
 	{
-		int4		op = in->curpol->val;
+		int8		op = in->curpol->operator.oper;
 		INFIX		nrm;
 
 		in->curpol++;
-		if (op == (int4) '|' && !first)
+		if (op == OP_OR && !first)
 		{
 			RESIZEBUF(in, 2);
 			sprintf(in->cur, "( ");
@@ -564,11 +684,22 @@ infix(INFIX * in, bool first)
 
 		/* print operator & right operand */
 		RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
-		sprintf(in->cur, " %c %s", op, nrm.buf);
+		switch(op)
+		{
+			case OP_OR:
+				sprintf(in->cur, " | %s", nrm.buf);
+				break;
+			case OP_AND:
+				sprintf(in->cur, " & %s", nrm.buf);
+				break;
+			default:
+				/* OP_NOT is handled in above if-branch*/
+				elog(ERROR, "unexpected operator type %d", op);
+		}
 		in->cur = strchr(in->cur, '\0');
 		pfree(nrm.buf);
 
-		if (op == (int4) '|' && !first)
+		if (op == OP_OR && !first)
 		{
 			RESIZEBUF(in, 2);
 			sprintf(in->cur, " )");
@@ -615,28 +746,33 @@ tsquerysend(PG_FUNCTION_ARGS)
 	pq_sendint(&buf, query->size, sizeof(int32));
 	for (i = 0; i < query->size; i++)
 	{
-		int			tmp;
-
 		pq_sendint(&buf, item->type, sizeof(item->type));
-		pq_sendint(&buf, item->weight, sizeof(item->weight));
-		pq_sendint(&buf, item->left, sizeof(item->left));
-		pq_sendint(&buf, item->val, sizeof(item->val));
-
-		/*
-		 * We are sure that sizeof(WordEntry) == sizeof(int32), and about
-		 * layout of QueryItem
-		 */
-		tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
-		pq_sendint(&buf, tmp, sizeof(tmp));
 
+		switch(item->type)
+		{
+			case QI_VAL:
+				pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
+				pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
+				pq_sendint(&buf, item->operand.length, sizeof(int16));
+				/* istrue flag is just for temporary use in tsrank.c/Cover,
+				 * so we don't need to transfer that */
+				break;
+			case QI_OPR:
+				pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
+				if (item->operator.oper != OP_NOT)
+					pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
+				break;
+			default:
+				elog(ERROR, "unknown tsquery node type %d", item->type);
+		}
 		item++;
 	}
 
 	item = GETQUERY(query);
 	for (i = 0; i < query->size; i++)
 	{
-		if (item->type == VAL)
-			pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
+		if (item->type == QI_VAL)
+			pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
 		item++;
 	}
 
@@ -652,8 +788,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 	TSQuery		query;
 	int			i,
 				size,
-				tmp,
-				len = HDRSIZETQ;
+				len;
 	QueryItem  *item;
 	int			datalen = 0;
 	char	   *ptr;
@@ -661,7 +796,8 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 	size = pq_getmsgint(buf, sizeof(uint32));
 	if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
 		elog(ERROR, "invalid size of tsquery");
-	len += sizeof(QueryItem) * size;
+
+	len = HDRSIZETQ + sizeof(QueryItem) * size;
 
 	query = (TSQuery) palloc(len);
 	query->size = size;
@@ -670,32 +806,67 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 	for (i = 0; i < size; i++)
 	{
 		item->type = (int8) pq_getmsgint(buf, sizeof(int8));
-		item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
-		item->left = (int16) pq_getmsgint(buf, sizeof(int16));
-		item->val = (int32) pq_getmsgint(buf, sizeof(int32));
-		tmp = pq_getmsgint(buf, sizeof(int32));
-		memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
-
-		/*
-		 * Sanity checks
-		 */
-		if (item->type == VAL)
-		{
-			datalen += item->length + 1;		/* \0 */
-		}
-		else if (item->type == OPR)
+
+		switch(item->type)
 		{
-			if (item->val == '|' || item->val == '&')
-			{
-				if (item->left <= 0 || i + item->left >= size)
-					elog(ERROR, "invalid pointer to left operand");
-			}
+			case QI_VAL:
+				item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
+				item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
+				item->operand.length = pq_getmsgint(buf, sizeof(int16));
+
+				/*
+				 * Check that datalen doesn't grow too large. Without the
+				 * check, a malicious client could induce a buffer overflow
+				 * by sending a tsquery whose size exceeds 2GB. datalen
+				 * would overflow, we would allocate a too small buffer below,
+				 * and overflow the buffer. Because operand.length is a 20-bit
+				 * field, adding one such value to datalen must exceed
+				 * MaxAllocSize before wrapping over the 32-bit datalen field,
+				 * so this check will protect from it.
+				 */
+				if (datalen > MAXSTRLEN)
+					elog(ERROR, "invalid tsquery; total operand length exceeded");
+
+				/* We can calculate distance from datalen, no need to send it
+				 * through the wire. If we did, we would have to check that
+				 * it's valid anyway.
+				 */
+				item->operand.distance = datalen;
+
+				datalen += item->operand.length + 1;		/* \0 */
 
-			if (i == size - 1)
-				elog(ERROR, "invalid pointer to right operand");
+				break;
+			case QI_OPR:
+				item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
+				if (item->operator.oper != OP_NOT &&
+					item->operator.oper != OP_OR &&
+					item->operator.oper != OP_AND)
+					elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
+				if(item->operator.oper != OP_NOT)
+				{
+					item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
+					/*
+					 * Sanity checks
+					 */
+					if (item->operator.left <= 0 || i + item->operator.left >= size)
+						elog(ERROR, "invalid pointer to left operand");
+
+					/* XXX: Though there's no way to construct a TSQuery that's
+					 * not in polish notation, we don't enforce that for
+					 * queries received from client in binary mode. Is there
+					 * anything that relies on it?
+					 *
+					 * XXX: The tree could be malformed in other ways too,
+					 * a node could have two parents, for example.
+					 */
+				}
+
+				if (i == size - 1)
+					elog(ERROR, "invalid pointer to right operand");
+				break;
+			default:
+				elog(ERROR, "unknown tsquery node type %d", item->type);
 		}
-		else
-			elog(ERROR, "unknown tsquery node type");
 
 		item++;
 	}
@@ -706,13 +877,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 	ptr = GETOPERAND(query);
 	for (i = 0; i < size; i++)
 	{
-		if (item->type == VAL)
+		if (item->type == QI_VAL)
 		{
-			item->distance = ptr - GETOPERAND(query);
 			memcpy(ptr,
-				   pq_getmsgbytes(buf, item->length),
-				   item->length);
-			ptr += item->length;
+				   pq_getmsgbytes(buf, item->operand.length),
+				   item->operand.length);
+			ptr += item->operand.length;
 			*ptr++ = '\0';
 		}
 		item++;
@@ -736,7 +906,7 @@ tsquerytree(PG_FUNCTION_ARGS)
 	INFIX		nrm;
 	text	   *res;
 	QueryItem  *q;
-	int4		len;
+	int			len;
 
 	if (query->size == 0)
 	{
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c
index 7991a4ad198..22e6f7c8198 100644
--- a/src/backend/utils/adt/tsquery_cleanup.c
+++ b/src/backend/utils/adt/tsquery_cleanup.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -35,20 +35,23 @@ maketree(QueryItem * in)
 
 	node->valnode = in;
 	node->right = node->left = NULL;
-	if (in->type == OPR)
+	if (in->type == QI_OPR)
 	{
 		node->right = maketree(in + 1);
-		if (in->val != (int4) '!')
-			node->left = maketree(in + in->left);
+		if (in->operator.oper != OP_NOT)
+			node->left = maketree(in + in->operator.left);
 	}
 	return node;
 }
 
+/*
+ * Internal state for plaintree and plainnode
+ */
 typedef struct
 {
 	QueryItem  *ptr;
-	int4		len;
-	int4		cur;
+	int		len; /* allocated size of ptr */
+	int		cur; /* number of elements in ptr */
 } PLAINTREE;
 
 static void
@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node)
 		state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
 	}
 	memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
-	if (node->valnode->type == VAL)
+	if (node->valnode->type == QI_VAL)
 		state->cur++;
-	else if (node->valnode->val == (int4) '!')
+	else if (node->valnode->operator.oper == OP_NOT)
 	{
-		state->ptr[state->cur].left = 1;
+		state->ptr[state->cur].operator.left = 1;
 		state->cur++;
 		plainnode(state, node->right);
 	}
 	else
 	{
-		int4		cur = state->cur;
+		int	cur = state->cur;
 
 		state->cur++;
 		plainnode(state, node->right);
-		state->ptr[cur].left = state->cur - cur;
+		state->ptr[cur].operator.left = state->cur - cur;
 		plainnode(state, node->left);
 	}
 	pfree(node);
 }
 
 /*
- * make plain view of tree from 'normal' view of tree
+ * make plain view of tree from a NODE-tree representation
  */
 static QueryItem *
-plaintree(NODE * root, int4 *len)
+plaintree(NODE * root, int *len)
 {
 	PLAINTREE	pl;
 
 	pl.cur = 0;
 	pl.len = 16;
-	if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+	if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
 	{
 		pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
 		plainnode(&pl, root);
@@ -122,17 +125,17 @@ freetree(NODE * node)
 static NODE *
 clean_NOT_intree(NODE * node)
 {
-	if (node->valnode->type == VAL)
+	if (node->valnode->type == QI_VAL)
 		return node;
 
-	if (node->valnode->val == (int4) '!')
+	if (node->valnode->operator.oper == OP_NOT)
 	{
 		freetree(node);
 		return NULL;
 	}
 
 	/* operator & or | */
-	if (node->valnode->val == (int4) '|')
+	if (node->valnode->operator.oper == OP_OR)
 	{
 		if ((node->left = clean_NOT_intree(node->left)) == NULL ||
 			(node->right = clean_NOT_intree(node->right)) == NULL)
@@ -144,6 +147,8 @@ clean_NOT_intree(NODE * node)
 	else
 	{
 		NODE	   *res = node;
+		
+		Assert(node->valnode->operator.oper == OP_AND);
 
 		node->left = clean_NOT_intree(node->left);
 		node->right = clean_NOT_intree(node->right);
@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node)
 }
 
 QueryItem *
-clean_NOT(QueryItem * ptr, int4 *len)
+clean_NOT(QueryItem * ptr, int *len)
 {
 	NODE	   *root = maketree(ptr);
 
@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len)
 #undef V_UNKNOWN
 #endif
 
-#define V_UNKNOWN	0
-#define V_TRUE		1
-#define V_FALSE		2
-#define V_STOP		3
+/*
+ * output values for result output parameter of clean_fakeval_intree
+ */
+#define V_UNKNOWN	0 /* the expression can't be evaluated statically */
+#define V_TRUE		1 /* the expression is always true (not implemented) */
+#define V_FALSE		2 /* the expression is always false (not implemented) */
+#define V_STOP		3 /* the expression is a stop word */
 
 /*
  * Clean query tree from values which is always in
@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result)
 	char		lresult = V_UNKNOWN,
 				rresult = V_UNKNOWN;
 
-	if (node->valnode->type == VAL)
+	if (node->valnode->type == QI_VAL)
 		return node;
-	else if (node->valnode->type == VALSTOP)
+	else 
+	if (node->valnode->type == QI_VALSTOP)
 	{
 		pfree(node);
 		*result = V_STOP;
 		return NULL;
 	}
 
+	Assert(node->valnode->type == QI_OPR);
 
-	if (node->valnode->val == (int4) '!')
+	if (node->valnode->operator.oper == OP_NOT)
 	{
 		node->right = clean_fakeval_intree(node->right, &rresult);
 		if (!node->right)
@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result)
 
 		node->left = clean_fakeval_intree(node->left, &lresult);
 		node->right = clean_fakeval_intree(node->right, &rresult);
+
 		if (lresult == V_STOP && rresult == V_STOP)
 		{
 			freetree(node);
@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result)
 }
 
 QueryItem *
-clean_fakeval(QueryItem * ptr, int4 *len)
+clean_fakeval(QueryItem * ptr, int *len)
 {
 	NODE	   *root = maketree(ptr);
 	char		result = V_UNKNOWN;
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c
index fd97c2796df..cbf06f7adeb 100644
--- a/src/backend/utils/adt/tsquery_op.c
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS)
 }
 
 static QTNode *
-join_tsqueries(TSQuery a, TSQuery b)
+join_tsqueries(TSQuery a, TSQuery b, int8 operator)
 {
 	QTNode	   *res = (QTNode *) palloc0(sizeof(QTNode));
 
 	res->flags |= QTN_NEEDFREE;
 
 	res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
-	res->valnode->type = OPR;
+	res->valnode->type = QI_OPR;
+	res->valnode->operator.oper = operator;
 
 	res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
 	res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS)
 		PG_RETURN_POINTER(a);
 	}
 
-	res = join_tsqueries(a, b);
-
-	res->valnode->val = '&';
+	res = join_tsqueries(a, b, OP_AND);
 
 	query = QTN2QT(res);
 
@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS)
 		PG_RETURN_POINTER(a);
 	}
 
-	res = join_tsqueries(a, b);
-
-	res->valnode->val = '|';
+	res = join_tsqueries(a, b, OP_OR);
 
 	query = QTN2QT(res);
 
@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS)
 	res->flags |= QTN_NEEDFREE;
 
 	res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
-	res->valnode->type = OPR;
-	res->valnode->val = '!';
+	res->valnode->type = QI_OPR;
+	res->valnode->operator.oper = OP_NOT;
 
 	res->child = (QTNode **) palloc0(sizeof(QTNode *));
 	res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a)
 
 	for (i = 0; i < a->size; i++)
 	{
-		if (ptr->type == VAL)
-			sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
+		if (ptr->type == QI_VAL)
+			sign |= ((TSQuerySign) 1) << (ptr->operand.valcrc % TSQS_SIGLEN);
 		ptr++;
 	}
 
@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
 	for (i = 0; i < ex->size; i++)
 	{
 		iq = GETQUERY(query);
-		if (ie[i].type != VAL)
+		if (ie[i].type != QI_VAL)
 			continue;
 		for (j = 0; j < query->size; j++)
-			if (iq[j].type == VAL && ie[i].val == iq[j].val)
+			if (iq[j].type == QI_VAL && ie[i].operand.valcrc == iq[j].operand.valcrc)
 			{
 				j = query->size + 1;
 				break;
diff --git a/src/backend/utils/adt/tsquery_rewrite.c b/src/backend/utils/adt/tsquery_rewrite.c
index f0d22c644ae..db2fe6c53ef 100644
--- a/src/backend/utils/adt/tsquery_rewrite.c
+++ b/src/backend/utils/adt/tsquery_rewrite.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,18 +34,26 @@ addone(int *counters, int last, int total)
 	return 1;
 }
 
+/*
+ * If node is equal to ex, replace it with subs. Replacement is actually done
+ * by returning either node or a copy of subs.
+ */
 static QTNode *
 findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
 {
 
-	if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
+	if ((node->sign & ex->sign) != ex->sign || 
+		node->valnode->type != ex->valnode->type)
 		return node;
 
 	if (node->flags & QTN_NOCHANGE)
 		return node;
-
-	if (node->valnode->type == OPR)
+	
+	if (node->valnode->type == QI_OPR)
 	{
+		if (node->valnode->operator.oper != ex->valnode->operator.oper)
+			return node;
+
 		if (node->nchild == ex->nchild)
 		{
 			if (QTNEq(node, ex))
@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
 		}
 		else if (node->nchild > ex->nchild)
 		{
+			/*
+			 * AND and NOT are commutative, so we check if a subset of the
+			 * children match. For example, if tnode is A | B | C, and 
+			 * ex is B | C, we have a match after we convert tnode to
+			 * A | (B | C).
+			 */
 			int		   *counters = (int *) palloc(sizeof(int) * node->nchild);
 			int			i;
 			QTNode	   *tnode = (QTNode *) palloc(sizeof(QTNode));
@@ -131,19 +145,26 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
 			pfree(counters);
 		}
 	}
-	else if (QTNEq(node, ex))
+	else 
 	{
-		QTNFree(node);
-		if (subs)
-		{
-			node = QTNCopy(subs);
-			node->flags |= QTN_NOCHANGE;
-		}
-		else
+		Assert(node->valnode->type == QI_VAL);
+
+		if (node->valnode->operand.valcrc != ex->valnode->operand.valcrc)
+			return node;
+		else if (QTNEq(node, ex))
 		{
-			node = NULL;
+			QTNFree(node);
+			if (subs)
+			{
+				node = QTNCopy(subs);
+				node->flags |= QTN_NOCHANGE;
+			}
+			else
+			{
+				node = NULL;
+			}
+			*isfind = true;
 		}
-		*isfind = true;
 	}
 
 	return node;
@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
 {
 	root = findeq(root, ex, subs, isfind);
 
-	if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
+	if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
 	{
 		int			i;
 
@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root)
 	if (!root)
 		return NULL;
 
-	if (root->valnode->type == OPR)
+	if (root->valnode->type == QI_OPR)
 	{
 		int			i,
 					j = 0;
@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root)
 
 		root->nchild = j;
 
-		if (root->valnode->val == (int4) '!' && root->nchild == 0)
+		if (root->valnode->operator.oper == OP_NOT && root->nchild == 0)
 		{
 			QTNFree(root);
 			root = NULL;
@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS)
 		elog(ERROR, "array must be one-dimensional, not %d dimensions",
 			 ARR_NDIM(qa));
 	if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
-		elog(ERROR, "array should have only three elements");
+		elog(ERROR, "array must have three elements");
 	if (ARR_ELEMTYPE(qa) != TSQUERYOID)
-		elog(ERROR, "array should contain tsquery type");
+		elog(ERROR, "array must contain tsquery elements");
 
 	deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
 
@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
 		subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
 
 	tree = findsubquery(tree, qex, subs, NULL);
+
 	QTNFree(qex);
 	QTNFree(subs);
 
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c
index ae8cc318da9..e378661488b 100644
--- a/src/backend/utils/adt/tsquery_util.c
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,7 +17,6 @@
 #include "tsearch/ts_type.h"
 #include "tsearch/ts_utils.h"
 
-
 QTNode *
 QT2QTN(QueryItem * in, char *operand)
 {
@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand)
 
 	node->valnode = in;
 
-	if (in->type == OPR)
+	if (in->type == QI_OPR)
 	{
 		node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
 		node->child[0] = QT2QTN(in + 1, operand);
 		node->sign = node->child[0]->sign;
-		if (in->val == (int4) '!')
+		if (in->operator.oper == OP_NOT)
 			node->nchild = 1;
 		else
 		{
 			node->nchild = 2;
-			node->child[1] = QT2QTN(in + in->left, operand);
+			node->child[1] = QT2QTN(in + in->operator.left, operand);
 			node->sign |= node->child[1]->sign;
 		}
 	}
 	else if (operand)
 	{
-		node->word = operand + in->distance;
-		node->sign = 1 << (in->val % 32);
+		node->word = operand + in->operand.distance;
+		node->sign = 1 << (in->operand.valcrc % 32);
 	}
 
 	return node;
@@ -54,14 +53,14 @@ QTNFree(QTNode * in)
 	if (!in)
 		return;
 
-	if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
+	if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
 		pfree(in->word);
 
 	if (in->child)
 	{
 		if (in->valnode)
 		{
-			if (in->valnode->type == OPR && in->nchild > 0)
+			if (in->valnode->type == QI_OPR && in->nchild > 0)
 			{
 				int			i;
 
@@ -82,30 +81,45 @@ QTNodeCompare(QTNode * an, QTNode * bn)
 {
 	if (an->valnode->type != bn->valnode->type)
 		return (an->valnode->type > bn->valnode->type) ? -1 : 1;
-	else if (an->valnode->val != bn->valnode->val)
-		return (an->valnode->val > bn->valnode->val) ? -1 : 1;
-	else if (an->valnode->type == VAL)
-	{
-		if (an->valnode->length == bn->valnode->length)
-			return strncmp(an->word, bn->word, an->valnode->length);
-		else
-			return (an->valnode->length > bn->valnode->length) ? -1 : 1;
-	}
-	else if (an->nchild != bn->nchild)
+	
+	if (an->valnode->type == QI_OPR)
 	{
-		return (an->nchild > bn->nchild) ? -1 : 1;
+		QueryOperator *ao = &an->valnode->operator;
+		QueryOperator *bo = &bn->valnode->operator;
+
+		if(ao->oper != bo->oper)
+			return (ao->oper > bo->oper) ? -1 : 1;
+
+		if (an->nchild != bn->nchild)
+			return (an->nchild > bn->nchild) ? -1 : 1;
+
+		{
+			int			i,
+						res;
+
+			for (i = 0; i < an->nchild; i++)
+				if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
+					return res;
+		}
+		return 0;
 	}
 	else
 	{
-		int			i,
-					res;
+		QueryOperand *ao = &an->valnode->operand;
+		QueryOperand *bo = &bn->valnode->operand;
 
-		for (i = 0; i < an->nchild; i++)
-			if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
-				return res;
-	}
+		Assert(an->valnode->type == QI_VAL);
+
+		if (ao->valcrc != bo->valcrc)
+		{
+			return (ao->valcrc > bo->valcrc) ? -1 : 1;
+		}
 
-	return 0;
+		if (ao->length == bo->length)
+			return strncmp(an->word, bn->word, ao->length);
+		else
+			return (ao->length > bo->length) ? -1 : 1;
+	}
 }
 
 static int
@@ -119,7 +133,7 @@ QTNSort(QTNode * in)
 {
 	int			i;
 
-	if (in->valnode->type != OPR)
+	if (in->valnode->type != QI_OPR)
 		return;
 
 	for (i = 0; i < in->nchild; i++)
@@ -139,12 +153,19 @@ QTNEq(QTNode * a, QTNode * b)
 	return (QTNodeCompare(a, b) == 0) ? true : false;
 }
 
+/*
+ * Remove unnecessary intermediate nodes. For example:
+ *
+ *  OR          OR
+ * a  OR    -> a b c
+ *   b  c      
+ */
 void
 QTNTernary(QTNode * in)
 {
 	int			i;
 
-	if (in->valnode->type != OPR)
+	if (in->valnode->type != QI_OPR)
 		return;
 
 	for (i = 0; i < in->nchild; i++)
@@ -152,9 +173,10 @@ QTNTernary(QTNode * in)
 
 	for (i = 0; i < in->nchild; i++)
 	{
-		if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
+		QTNode	   *cc = in->child[i];
+
+		if (cc->valnode->type == QI_OPR && in->valnode->operator.oper == cc->valnode->operator.oper)
 		{
-			QTNode	   *cc = in->child[i];
 			int			oldnchild = in->nchild;
 
 			in->nchild += cc->nchild - 1;
@@ -167,17 +189,23 @@ QTNTernary(QTNode * in)
 			memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
 			i += cc->nchild - 1;
 
+			if(cc->flags & QTN_NEEDFREE)
+				pfree(cc->valnode);
 			pfree(cc);
 		}
 	}
 }
 
+/*
+ * Convert a tree to binary tree by inserting intermediate nodes.
+ * (Opposite of QTNTernary)
+ */
 void
 QTNBinary(QTNode * in)
 {
 	int			i;
 
-	if (in->valnode->type != OPR)
+	if (in->valnode->type != QI_OPR)
 		return;
 
 	for (i = 0; i < in->nchild; i++)
@@ -201,7 +229,7 @@ QTNBinary(QTNode * in)
 		nn->sign = nn->child[0]->sign | nn->child[1]->sign;
 
 		nn->valnode->type = in->valnode->type;
-		nn->valnode->val = in->valnode->val;
+		nn->valnode->operator.oper = in->valnode->operator.oper;
 
 		in->child[0] = nn;
 		in->child[1] = in->child[in->nchild - 1];
@@ -209,11 +237,15 @@ QTNBinary(QTNode * in)
 	}
 }
 
+/*
+ * Count the total length of operand string in tree, including '\0'-
+ * terminators.
+ */
 static void
-cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
+cntsize(QTNode * in, int *sumlen, int *nnode)
 {
 	*nnode += 1;
-	if (in->valnode->type == OPR)
+	if (in->valnode->type == QI_OPR)
 	{
 		int			i;
 
@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
 	}
 	else
 	{
-		*sumlen += in->valnode->length + 1;
+		*sumlen += in->valnode->operand.length + 1;
 	}
 }
 
@@ -234,22 +266,26 @@ typedef struct
 } QTN2QTState;
 
 static void
-fillQT(QTN2QTState * state, QTNode * in)
+fillQT(QTN2QTState *state, QTNode *in)
 {
-	*(state->curitem) = *(in->valnode);
-
-	if (in->valnode->type == VAL)
+	if (in->valnode->type == QI_VAL)
 	{
-		memcpy(state->curoperand, in->word, in->valnode->length);
-		state->curitem->distance = state->curoperand - state->operand;
-		state->curoperand[in->valnode->length] = '\0';
-		state->curoperand += in->valnode->length + 1;
+		memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
+
+		memcpy(state->curoperand, in->word, in->valnode->operand.length);
+		state->curitem->operand.distance = state->curoperand - state->operand;
+		state->curoperand[in->valnode->operand.length] = '\0';
+		state->curoperand += in->valnode->operand.length + 1;
 		state->curitem++;
 	}
 	else
 	{
 		QueryItem  *curitem = state->curitem;
 
+		Assert(in->valnode->type == QI_OPR);
+
+		memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
+
 		Assert(in->nchild <= 2);
 		state->curitem++;
 
@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in)
 
 		if (in->nchild == 2)
 		{
-			curitem->left = state->curitem - curitem;
+			curitem->operator.left = state->curitem - curitem;
 			fillQT(state, in->child[1]);
 		}
 	}
@@ -296,11 +332,11 @@ QTNCopy(QTNode *in)
 	*(out->valnode) = *(in->valnode);
 	out->flags |= QTN_NEEDFREE;
 
-	if (in->valnode->type == VAL)
+	if (in->valnode->type == QI_VAL)
 	{
-		out->word = palloc(in->valnode->length + 1);
-		memcpy(out->word, in->word, in->valnode->length);
-		out->word[in->valnode->length] = '\0';
+		out->word = palloc(in->valnode->operand.length + 1);
+		memcpy(out->word, in->word, in->valnode->operand.length);
+		out->word[in->valnode->operand.length] = '\0';
 		out->flags |= QTN_WORDFREE;
 	}
 	else
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 8b2ab884c8c..d48e9b4a470 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -68,7 +68,7 @@ cnt_length(TSVector t)
 }
 
 static int4
-WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
+WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
 {
 	if (ptr->len == item->length)
 		return strncmp(
@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
 }
 
 static WordEntry *
-find_wordentry(TSVector t, TSQuery q, QueryItem * item)
+find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
 {
 	WordEntry  *StopLow = ARRPTR(t);
 	WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item)
 }
 
 
+/*
+ * sort QueryOperands by (length, word)
+ */
 static int
-compareQueryItem(const void *a, const void *b, void *arg)
+compareQueryOperand(const void *a, const void *b, void *arg)
 {
 	char	   *operand = (char *) arg;
+	QueryOperand *qa = (*(QueryOperand **) a);
+	QueryOperand *qb = (*(QueryOperand **) b);
 
-	if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
-		return strncmp(operand + (*(QueryItem **) a)->distance,
-					   operand + (*(QueryItem **) b)->distance,
-					   (*(QueryItem **) b)->length);
+	if (qa->length == qb->length)
+		return strncmp(operand + qa->distance,
+					   operand + qb->distance,
+					   qb->length);
 
-	return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
+	return (qa->length > qb->length) ? 1 : -1;
 }
 
-static QueryItem **
-SortAndUniqItems(char *operand, QueryItem * item, int *size)
+/*
+ * Returns a sorted, de-duplicated array of QueryOperands in a query.
+ * The returned QueryOperands are pointers to the original QueryOperands
+ * in the query.
+ *
+ * Length of the returned array is stored in *size
+ */
+static QueryOperand **
+SortAndUniqItems(TSQuery q, int *size)
 {
-	QueryItem **res,
+	char *operand = GETOPERAND(q);
+	QueryItem * item = GETQUERY(q);
+	QueryOperand **res,
 			  **ptr,
 			  **prevptr;
 
-	ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
+	ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
 
+	/* Collect all operands from the tree to res */
 	while ((*size)--)
 	{
-		if (item->type == VAL)
+		if (item->type == QI_VAL)
 		{
-			*ptr = item;
+			*ptr = (QueryOperand *) item;
 			ptr++;
 		}
 		item++;
@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size)
 	if (*size < 2)
 		return res;
 
-	qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
+	qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
 
 	ptr = res + 1;
 	prevptr = res;
 
+	/* remove duplicates */
 	while (ptr - res < *size)
 	{
-		if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
+		if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
 		{
 			prevptr++;
 			*prevptr = *ptr;
@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
 				lenct,
 				dist;
 	float		res = -1.0;
-	QueryItem **item;
+	QueryOperand **item;
 	int			size = q->size;
 
-	item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+	item = SortAndUniqItems(q, &size);
 	if (size < 2)
 	{
 		pfree(item);
@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
 				j,
 				i;
 	float		res = 0.0;
-	QueryItem **item;
+	QueryOperand **item;
 	int			size = q->size;
 
 	*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
-	item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+	item = SortAndUniqItems(q, &size);
 
 	for (i = 0; i < size; i++)
 	{
@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method)
 	if (!t->size || !q->size)
 		return 0.0;
 
-	res = (item->type != VAL && item->val == (int4) '&') ?
+	/* XXX: What about NOT? */
+	res = (item->type == QI_OPR && item->operator.oper == OP_AND) ?
 		calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
 
 	if (res < 0)
@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b)
 }
 
 static bool
-checkcondition_QueryItem(void *checkval, QueryItem * val)
+checkcondition_QueryOperand(void *checkval, QueryOperand *val)
 {
 	return (bool) (val->istrue);
 }
@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query)
 	/* reset istrue flag */
 	for (i = 0; i < query->size; i++)
 	{
-		if (item->type == VAL)
-			item->istrue = 0;
+		if (item->type == QI_VAL)
+			item->operand.istrue = 0;
 		item++;
 	}
 }
@@ -484,7 +501,7 @@ typedef struct
 
 
 static bool
-Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
+Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
 {
 	DocRepresentation *ptr;
 	int			lastpos = ext->pos;
@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
 	while (ptr - doc < len)
 	{
 		for (i = 0; i < ptr->nitem; i++)
-			ptr->item[i]->istrue = 1;
-		if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
+		{
+			if(ptr->item[i]->type == QI_VAL)
+				ptr->item[i]->operand.istrue = 1;
+		}
+		if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryOperand))
 		{
 			if (ptr->pos > ext->q)
 			{
@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
 	while (ptr >= doc + ext->pos)
 	{
 		for (i = 0; i < ptr->nitem; i++)
-			ptr->item[i]->istrue = 1;
-		if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
+			if(ptr->item[i]->type  == QI_VAL) /* XXX */
+				ptr->item[i]->operand.istrue = 1;
+		if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
 		{
 			if (ptr->pos < ext->p)
 			{
@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 
 	for (i = 0; i < query->size; i++)
 	{
-		if (item[i].type != VAL || item[i].istrue)
+		QueryOperand *curoperand;
+
+		if (item[i].type != QI_VAL)
+			continue;
+		
+		curoperand = &item[i].operand;
+		
+		if(item[i].operand.istrue)
 			continue;
 
-		entry = find_wordentry(txt, query, &(item[i]));
+		entry = find_wordentry(txt, query, curoperand);
 		if (!entry)
 			continue;
 
@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 		{
 			if (j == 0)
 			{
-				QueryItem  *kptr,
-						   *iptr = item + i;
 				int			k;
 
 				doc[cur].needfree = false;
@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 
 				for (k = 0; k < query->size; k++)
 				{
-					kptr = item + k;
+					QueryOperand *kptr = &item[k].operand;
+					QueryOperand *iptr = &item[i].operand;
+
 					if (k == i ||
-						(item[k].type == VAL &&
-						 compareQueryItem(&kptr, &iptr, operand) == 0))
+						(item[k].type == QI_VAL &&
+						 compareQueryOperand(&kptr, &iptr, operand) == 0))
 					{
+						/* if k == i, we've already checked above that it's type == Q_VAL */
 						doc[cur].item[doc[cur].nitem] = item + k;
 						doc[cur].nitem++;
-						kptr->istrue = 1;
+						item[k].operand.istrue = 1;
 					}
 				}
 			}
@@ -640,8 +669,7 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 
 	if (cur > 0)
 	{
-		if (cur > 1)
-			qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+		qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
 		return doc;
 	}
 
@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS)
 {
 	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
 	TSVector	txt = PG_GETARG_TSVECTOR(1);
-	TSQuery		query = PG_GETARG_TSQUERY_COPY(2);
+	TSQuery		query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
 	int			method = PG_GETARG_INT32(3);
 	float		res;
 
@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS)
 {
 	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
 	TSVector	txt = PG_GETARG_TSVECTOR(1);
-	TSQuery		query = PG_GETARG_TSQUERY_COPY(2);
+	TSQuery		query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
 	float		res;
 
 	res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
@@ -778,7 +806,7 @@ Datum
 ts_rankcd_ttf(PG_FUNCTION_ARGS)
 {
 	TSVector	txt = PG_GETARG_TSVECTOR(0);
-	TSQuery		query = PG_GETARG_TSQUERY_COPY(1);
+	TSQuery		query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
 	int			method = PG_GETARG_INT32(2);
 	float		res;
 
@@ -793,7 +821,7 @@ Datum
 ts_rankcd_tt(PG_FUNCTION_ARGS)
 {
 	TSVector	txt = PG_GETARG_TSVECTOR(0);
-	TSQuery		query = PG_GETARG_TSQUERY_COPY(1);
+	TSQuery		query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
 	float		res;
 
 	res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 8ab024650f7..2866e028da0 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.2 2007/08/21 01:45:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,22 +20,37 @@
 #include "tsearch/ts_utils.h"
 #include "utils/memutils.h"
 
+typedef struct
+{
+	WordEntry	entry;			/* should be first ! */
+	WordEntryPos *pos;
+	int			poslen;			/* number of elements in pos */
+} WordEntryIN;
 
 static int
 comparePos(const void *a, const void *b)
 {
-	if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
+	int apos = WEP_GETPOS(*(WordEntryPos *) a);
+	int bpos = WEP_GETPOS(*(WordEntryPos *) b);
+
+	if (apos == bpos)
 		return 0;
-	return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
+	return (apos > bpos) ? 1 : -1;
 }
 
+/*
+ * Removes duplicate pos entries. If there's two entries with same pos
+ * but different weight, the higher weight is retained.
+ *
+ * Returns new length.
+ */
 static int
-uniquePos(WordEntryPos * a, int4 l)
+uniquePos(WordEntryPos * a, int l)
 {
 	WordEntryPos *ptr,
 			   *res;
 
-	if (l == 1)
+	if (l <= 1)
 		return l;
 
 	res = a;
@@ -75,21 +90,23 @@ compareentry(const void *a, const void *b, void *arg)
 }
 
 static int
-uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
 {
 	WordEntryIN *ptr,
 			   *res;
 
-	res = a;
+	Assert(l >= 1);
+
 	if (l == 1)
 	{
 		if (a->entry.haspos)
 		{
-			*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
-			*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
+			a->poslen = uniquePos(a->pos, a->poslen);
+			*outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
 		}
 		return l;
 	}
+	res = a;
 
 	ptr = a + 1;
 	qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
@@ -101,8 +118,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
 		{
 			if (res->entry.haspos)
 			{
-				*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
-				*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+				res->poslen = uniquePos(res->pos, res->poslen);
+				*outbuflen += res->poslen * sizeof(WordEntryPos);
 			}
 			*outbuflen += SHORTALIGN(res->entry.len);
 			res++;
@@ -112,12 +129,14 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
 		{
 			if (res->entry.haspos)
 			{
-				int4		len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
+				int	newlen = ptr->poslen + res->poslen;
+
+				/* Append res to pos */
 
-				res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
-				memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
-					   &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
-				*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
+				res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos));
+				memcpy(&res->pos[res->poslen],
+					   ptr->pos, ptr->poslen * sizeof(WordEntryPos));
+				res->poslen = newlen;
 				pfree(ptr->pos);
 			}
 			else
@@ -130,8 +149,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
 	}
 	if (res->entry.haspos)
 	{
-		*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
-		*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+		res->poslen = uniquePos(res->pos, res->poslen);
+		*outbuflen += res->poslen * sizeof(WordEntryPos);
 	}
 	*outbuflen += SHORTALIGN(res->entry.len);
 
@@ -144,248 +163,6 @@ WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
 	return compareentry(a, b, buf);
 }
 
-#define WAITWORD		1
-#define WAITENDWORD		2
-#define WAITNEXTCHAR	3
-#define WAITENDCMPLX	4
-#define WAITPOSINFO		5
-#define INPOSINFO		6
-#define WAITPOSDELIM	7
-#define WAITCHARCMPLX	8
-
-#define RESIZEPRSBUF \
-do { \
-	if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
-	{ \
-		int4 clen = state->curpos - state->word; \
-		state->len *= 2; \
-		state->word = (char*)repalloc( (void*)state->word, state->len ); \
-		state->curpos = state->word + clen; \
-	} \
-} while (0)
-
-bool
-gettoken_tsvector(TSVectorParseState *state)
-{
-	int4		oldstate = 0;
-
-	state->curpos = state->word;
-	state->state = WAITWORD;
-	state->alen = 0;
-
-	while (1)
-	{
-		if (state->state == WAITWORD)
-		{
-			if (*(state->prsbuf) == '\0')
-				return false;
-			else if (t_iseq(state->prsbuf, '\''))
-				state->state = WAITENDCMPLX;
-			else if (t_iseq(state->prsbuf, '\\'))
-			{
-				state->state = WAITNEXTCHAR;
-				oldstate = WAITENDWORD;
-			}
-			else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("syntax error in tsvector")));
-			else if (!t_isspace(state->prsbuf))
-			{
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-				state->state = WAITENDWORD;
-			}
-		}
-		else if (state->state == WAITNEXTCHAR)
-		{
-			if (*(state->prsbuf) == '\0')
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("there is no escaped character")));
-			else
-			{
-				RESIZEPRSBUF;
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-				state->state = oldstate;
-			}
-		}
-		else if (state->state == WAITENDWORD)
-		{
-			if (t_iseq(state->prsbuf, '\\'))
-			{
-				state->state = WAITNEXTCHAR;
-				oldstate = WAITENDWORD;
-			}
-			else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
-					 (state->oprisdelim && ISOPERATOR(state->prsbuf)))
-			{
-				RESIZEPRSBUF;
-				if (state->curpos == state->word)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				*(state->curpos) = '\0';
-				return true;
-			}
-			else if (t_iseq(state->prsbuf, ':'))
-			{
-				if (state->curpos == state->word)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				*(state->curpos) = '\0';
-				if (state->oprisdelim)
-					return true;
-				else
-					state->state = INPOSINFO;
-			}
-			else
-			{
-				RESIZEPRSBUF;
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-			}
-		}
-		else if (state->state == WAITENDCMPLX)
-		{
-			if (t_iseq(state->prsbuf, '\''))
-			{
-				state->state = WAITCHARCMPLX;
-			}
-			else if (t_iseq(state->prsbuf, '\\'))
-			{
-				state->state = WAITNEXTCHAR;
-				oldstate = WAITENDCMPLX;
-			}
-			else if (*(state->prsbuf) == '\0')
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("syntax error in tsvector")));
-			else
-			{
-				RESIZEPRSBUF;
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-			}
-		}
-		else if (state->state == WAITCHARCMPLX)
-		{
-			if (t_iseq(state->prsbuf, '\''))
-			{
-				RESIZEPRSBUF;
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-				state->state = WAITENDCMPLX;
-			}
-			else
-			{
-				RESIZEPRSBUF;
-				*(state->curpos) = '\0';
-				if (state->curpos == state->word)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				if (state->oprisdelim)
-				{
-					/* state->prsbuf+=pg_mblen(state->prsbuf); */
-					return true;
-				}
-				else
-					state->state = WAITPOSINFO;
-				continue;		/* recheck current character */
-			}
-		}
-		else if (state->state == WAITPOSINFO)
-		{
-			if (t_iseq(state->prsbuf, ':'))
-				state->state = INPOSINFO;
-			else
-				return true;
-		}
-		else if (state->state == INPOSINFO)
-		{
-			if (t_isdigit(state->prsbuf))
-			{
-				if (state->alen == 0)
-				{
-					state->alen = 4;
-					state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
-					*(uint16 *) (state->pos) = 0;
-				}
-				else if (*(uint16 *) (state->pos) + 1 >= state->alen)
-				{
-					state->alen *= 2;
-					state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
-				}
-				(*(uint16 *) (state->pos))++;
-				WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
-				if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("wrong position info in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
-				state->state = WAITPOSDELIM;
-			}
-			else
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("syntax error in tsvector")));
-		}
-		else if (state->state == WAITPOSDELIM)
-		{
-			if (t_iseq(state->prsbuf, ','))
-				state->state = INPOSINFO;
-			else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
-			{
-				if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
-			}
-			else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
-			{
-				if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
-			}
-			else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
-			{
-				if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
-			}
-			else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
-			{
-				if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
-			}
-			else if (t_isspace(state->prsbuf) ||
-					 *(state->prsbuf) == '\0')
-				return true;
-			else if (!t_isdigit(state->prsbuf))
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("syntax error in tsvector")));
-		}
-		else					/* internal error */
-			elog(ERROR, "internal error in gettoken_tsvector");
-
-		/* get next char */
-		state->prsbuf += pg_mblen(state->prsbuf);
-	}
-
-	return false;
-}
 
 Datum
 tsvectorin(PG_FUNCTION_ARGS)
@@ -393,70 +170,82 @@ tsvectorin(PG_FUNCTION_ARGS)
 	char	   *buf = PG_GETARG_CSTRING(0);
 	TSVectorParseState state;
 	WordEntryIN *arr;
+	int			totallen;
+	int			arrlen;  /* allocated size of arr */
 	WordEntry  *inarr;
-	int4		len = 0,
-				totallen = 64;
+	int			len = 0;
 	TSVector	in;
-	char	   *tmpbuf,
-			   *cur;
-	int4		i,
-				buflen = 256;
+	int			i;
+	char	   *token;
+	int			toklen;
+	WordEntryPos *pos;
+	int			poslen;
+
+	/*
+	 * Tokens are appended to tmpbuf, cur is a pointer
+	 * to the end of used space in tmpbuf.
+	 */
+	char	   *tmpbuf;
+	char	   *cur;
+	int			buflen = 256; /* allocated size of tmpbuf */
 
 	pg_verifymbstr(buf, strlen(buf), false);
-	state.prsbuf = buf;
-	state.len = 32;
-	state.word = (char *) palloc(state.len);
-	state.oprisdelim = false;
 
-	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+	state = init_tsvector_parser(buf, false);
+	
+	arrlen = 64;
+	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
 	cur = tmpbuf = (char *) palloc(buflen);
 
-	while (gettoken_tsvector(&state))
+	while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
 	{
-		/*
-		 * Realloc buffers if it's needed
-		 */
-		if (len >= totallen)
-		{
-			totallen *= 2;
-			arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
-		}
-
-		while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
-		{
-			int4		dist = cur - tmpbuf;
-
-			buflen *= 2;
-			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
-			cur = tmpbuf + dist;
-		}
 
-		if (state.curpos - state.word >= MAXSTRLEN)
+		if (toklen >= MAXSTRLEN)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("word is too long (%ld bytes, max %ld bytes)",
-							(long) (state.curpos - state.word),
+							(long) toklen,
 							(long) MAXSTRLEN)));
 
-		arr[len].entry.len = state.curpos - state.word;
+
 		if (cur - tmpbuf > MAXSTRPOS)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("position value too large")));
+
+		/*
+		 * Enlarge buffers if needed
+		 */
+		if (len >= arrlen)
+		{
+			arrlen *= 2;
+			arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
+		}
+		while ((cur - tmpbuf) + toklen >= buflen)
+		{
+			int	dist = cur - tmpbuf;
+
+			buflen *= 2;
+			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+			cur = tmpbuf + dist;
+		}
+		arr[len].entry.len = toklen;
 		arr[len].entry.pos = cur - tmpbuf;
-		memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
-		cur += arr[len].entry.len;
+		memcpy((void *) cur, (void *) token, toklen);
+		cur += toklen;
 
-		if (state.alen)
+		if (poslen != 0)
 		{
 			arr[len].entry.haspos = 1;
-			arr[len].pos = state.pos;
+			arr[len].pos = pos;
+			arr[len].poslen = poslen;
 		}
 		else
 			arr[len].entry.haspos = 0;
 		len++;
 	}
-	pfree(state.word);
+
+	close_tsvector_parser(state);
 
 	if (len > 0)
 		len = uniqueentry(arr, len, tmpbuf, &buflen);
@@ -476,8 +265,21 @@ tsvectorin(PG_FUNCTION_ARGS)
 		cur += SHORTALIGN(arr[i].entry.len);
 		if (arr[i].entry.haspos)
 		{
-			memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
-			cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
+			uint16 tmplen;
+
+			if(arr[i].poslen > 0xFFFF)
+				elog(ERROR, "positions array too long");
+
+			tmplen = (uint16) arr[i].poslen;
+
+			/* Copy length to output struct */
+			memcpy(cur, &tmplen, sizeof(uint16));
+			cur += sizeof(uint16);
+
+			/* Copy positions */
+			memcpy(cur, arr[i].pos, (arr[i].poslen) * sizeof(WordEntryPos));
+			cur += arr[i].poslen * sizeof(WordEntryPos);
+
 			pfree(arr[i].pos);
 		}
 		inarr[i] = arr[i].entry;
@@ -604,26 +406,26 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 	TSVector	vec;
-	int			i,
-				size,
-				len = DATAHDRSIZE;
+	int			i;
+	uint32		size;
 	WordEntry  *weptr;
 	int			datalen = 0;
+	Size		len;
 
 	size = pq_getmsgint(buf, sizeof(uint32));
 	if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
 		elog(ERROR, "invalid size of tsvector");
 
-	len += sizeof(WordEntry) * size;
+	len = DATAHDRSIZE + sizeof(WordEntry) * size;
 
-	len *= 2;
+	len = len * 2; /* times two to make room for lexemes */
 	vec = (TSVector) palloc0(len);
 	vec->size = size;
 
 	weptr = ARRPTR(vec);
 	for (i = 0; i < size; i++)
 	{
-		int			tmp;
+		int32 tmp;
 
 		weptr = ARRPTR(vec) + i;
 
@@ -654,7 +456,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 						npos;
 			WordEntryPos *wepptr;
 
-			npos = (uint16) pq_getmsgint(buf, sizeof(int16));
+			npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
 			if (npos > MAXNUMPOS)
 				elog(ERROR, "unexpected number of positions");
 
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 8567172c64f..d34ab1fcf0b 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,6 +66,9 @@ typedef struct
 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
 
 
+/*
+ * Order: haspos, len, word, for all positions (pos, weight)
+ */
 static int
 silly_cmp_tsvector(const TSVector a, const TSVector b)
 {
@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
  * compare 2 string values
  */
 static int4
-ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
+ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
 {
 	if (ptr->len == item->length)
 		return strncmp(
@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
  * check weight info
  */
 static bool
-checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
+checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
 {
 	WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
 	uint16		len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
  * is there value 'val' in array or not ?
  */
 static bool
-checkcondition_str(void *checkval, QueryItem * val)
+checkcondition_str(void *checkval, QueryOperand * val)
 {
-	WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
-	WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+	CHKVAL *chkval = (CHKVAL *) checkval;
+	WordEntry  *StopLow = chkval->arrb;
+	WordEntry  *StopHigh = chkval->arre;
 	WordEntry  *StopMiddle;
 	int			difference;
 
@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val)
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-		difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+		difference = ValCompare(chkval, StopMiddle, val);
 		if (difference == 0)
 			return (val->weight && StopMiddle->haspos) ?
-				checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
+				checkclass_str(chkval, StopMiddle, val) : true;
 		else if (difference < 0)
 			StopLow = StopMiddle + 1;
 		else
@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val)
 }
 
 /*
- * check for boolean condition
+ * check for boolean condition.
+ *
+ * if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
+ * checkval can be used to pass information to the callback. TS_execute doesn't
+ * do anything with it.
+ * chkcond is a callback function used to evaluate each VAL node in the query.
+ *
  */
 bool
 TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
-		   bool (*chkcond) (void *checkval, QueryItem * val))
+		   bool (*chkcond) (void *checkval, QueryOperand * val))
 {
 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();
 
-	if (curitem->type == VAL)
-		return chkcond(checkval, curitem);
-	else if (curitem->val == (int4) '!')
-	{
-		return (calcnot) ?
-			!TS_execute(curitem + 1, checkval, calcnot, chkcond)
-			: true;
-	}
-	else if (curitem->val == (int4) '&')
+	if (curitem->type == QI_VAL)
+		return chkcond(checkval, (QueryOperand *) curitem);
+
+	switch(curitem->operator.oper)
 	{
-		if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
-			return TS_execute(curitem + 1, checkval, calcnot, chkcond);
-		else
-			return false;
-	}
-	else
-	{							/* |-operator */
-		if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
-			return true;
-		else
-			return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+		case OP_NOT:
+			if (calcnot)
+				return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
+			else
+				return true;
+		case OP_AND:
+			if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
+				return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+			else
+				return false;
+
+		case OP_OR:
+			if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
+				return true;
+			else
+				return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+
+		default:
+			elog(ERROR, "unknown operator %d", curitem->operator.oper);
 	}
+
+	/* not reachable, but keep compiler quiet */
 	return false;
 }
 
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
new file mode 100644
index 00000000000..26a271679d4
--- /dev/null
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -0,0 +1,357 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsvector_parser.c
+ *	  Parser for tsvector
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/memutils.h"
+
+struct TSVectorParseStateData
+{
+	char   *prsbuf;
+	char   *word;		/* buffer to hold the current word */
+	int		len;		/* size in bytes allocated for 'word' */
+	bool	oprisdelim;
+};
+
+/*
+ * Initializes parser for the input string. If oprisdelim is set, the
+ * following characters are treated as delimiters in addition to whitespace:
+ * ! | & ( )
+ */
+TSVectorParseState
+init_tsvector_parser(char *input, bool oprisdelim)
+{
+	TSVectorParseState state;
+
+	state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
+	state->prsbuf = input;
+	state->len = 32;
+	state->word = (char *) palloc(state->len);
+	state->oprisdelim = oprisdelim;
+
+	return state;
+}
+
+/*
+ * Reinitializes parser for parsing 'input', instead of previous input.
+ */
+void
+reset_tsvector_parser(TSVectorParseState state, char *input)
+{
+	state->prsbuf = input;	
+}
+
+/*
+ * Shuts down a tsvector parser.
+ */
+void
+close_tsvector_parser(TSVectorParseState state)
+{
+	pfree(state->word);
+	pfree(state);
+}
+
+#define RESIZEPRSBUF \
+do { \
+	if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
+	{ \
+		int clen = curpos - state->word; \
+		state->len *= 2; \
+		state->word = (char*)repalloc( (void*)state->word, state->len ); \
+		curpos = state->word + clen; \
+	} \
+} while (0)
+
+
+#define ISOPERATOR(x)	( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+
+/* Fills the output parameters, and returns true */
+#define RETURN_TOKEN \
+do { \
+	if (pos_ptr != NULL) \
+	{ \
+		*pos_ptr = pos; \
+		*poslen = npos; \
+	} \
+	else if (pos != NULL) \
+		pfree(pos); \
+	\
+	if (strval != NULL) \
+		*strval = state->word; \
+	if (lenval != NULL) \
+		*lenval = curpos - state->word; \
+	if (endptr != NULL) \
+		*endptr = state->prsbuf; \
+	return true; \
+} while(0)
+
+
+/* State codes used in gettoken_tsvector */
+#define WAITWORD		1
+#define WAITENDWORD		2
+#define WAITNEXTCHAR	3
+#define WAITENDCMPLX	4
+#define WAITPOSINFO		5
+#define INPOSINFO		6
+#define WAITPOSDELIM	7
+#define WAITCHARCMPLX	8
+
+/*
+ * Get next token from string being parsed. Returns false if
+ * end of input string is reached, otherwise strval, lenval, pos_ptr
+ * and poslen output parameters are filled in:
+ * 
+ * *strval 		token
+ * *lenval 		length of*strval
+ * *pos_ptr		pointer to a palloc'd array of positions and weights
+ * 				associated with the token. If the caller is not interested
+ *				in the information, NULL can be supplied. Otherwise
+ *				the caller is responsible for pfreeing the array.
+ * *poslen		number of elements in *pos_ptr
+ */
+bool
+gettoken_tsvector(TSVectorParseState state, 
+				  char **strval, int *lenval,
+				  WordEntryPos **pos_ptr, int *poslen,
+				  char **endptr)
+{
+	int	oldstate	= 0;
+	char *curpos	= state->word;
+	int	statecode	= WAITWORD;
+
+	/* pos is for collecting the comma delimited list of positions followed
+	 * by the actual token. 
+	 */
+	WordEntryPos *pos = NULL;
+	int npos		= 0; /* elements of pos used */
+	int posalen		= 0; /* allocated size of pos */
+
+	while (1)
+	{
+		if (statecode == WAITWORD)
+		{
+			if (*(state->prsbuf) == '\0')
+				return false;
+			else if (t_iseq(state->prsbuf, '\''))
+				statecode = WAITENDCMPLX;
+			else if (t_iseq(state->prsbuf, '\\'))
+			{
+				statecode = WAITNEXTCHAR;
+				oldstate = WAITENDWORD;
+			}
+			else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("syntax error in tsvector")));
+			else if (!t_isspace(state->prsbuf))
+			{
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+				statecode = WAITENDWORD;
+			}
+		}
+		else if (statecode == WAITNEXTCHAR)
+		{
+			if (*(state->prsbuf) == '\0')
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("there is no escaped character")));
+			else
+			{
+				RESIZEPRSBUF;
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+				Assert(oldstate != 0);
+				statecode = oldstate;
+			}
+		}
+		else if (statecode == WAITENDWORD)
+		{
+			if (t_iseq(state->prsbuf, '\\'))
+			{
+				statecode = WAITNEXTCHAR;
+				oldstate = WAITENDWORD;
+			}
+			else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+					 (state->oprisdelim && ISOPERATOR(state->prsbuf)))
+			{
+				RESIZEPRSBUF;
+				if (curpos == state->word)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				*(curpos) = '\0';
+				RETURN_TOKEN;
+			}
+			else if (t_iseq(state->prsbuf, ':'))
+			{
+				if (curpos == state->word)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				*(curpos) = '\0';
+				if (state->oprisdelim)
+					RETURN_TOKEN;
+				else
+					statecode = INPOSINFO;
+			}
+			else
+			{
+				RESIZEPRSBUF;
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+			}
+		}
+		else if (statecode == WAITENDCMPLX)
+		{
+			if (t_iseq(state->prsbuf, '\''))
+			{
+				statecode = WAITCHARCMPLX;
+			}
+			else if (t_iseq(state->prsbuf, '\\'))
+			{
+				statecode = WAITNEXTCHAR;
+				oldstate = WAITENDCMPLX;
+			}
+			else if (*(state->prsbuf) == '\0')
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("syntax error in tsvector")));
+			else
+			{
+				RESIZEPRSBUF;
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+			}
+		}
+		else if (statecode == WAITCHARCMPLX)
+		{
+			if (t_iseq(state->prsbuf, '\''))
+			{
+				RESIZEPRSBUF;
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+				statecode = WAITENDCMPLX;
+			}
+			else
+			{
+				RESIZEPRSBUF;
+				*(curpos) = '\0';
+				if (curpos == state->word)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				if (state->oprisdelim)
+				{
+					/* state->prsbuf+=pg_mblen(state->prsbuf); */
+					RETURN_TOKEN;
+				}
+				else
+					statecode = WAITPOSINFO;
+				continue;		/* recheck current character */
+			}
+		}
+		else if (statecode == WAITPOSINFO)
+		{
+			if (t_iseq(state->prsbuf, ':'))
+				statecode = INPOSINFO;
+			else
+				RETURN_TOKEN;
+		}
+		else if (statecode == INPOSINFO)
+		{
+			if (t_isdigit(state->prsbuf))
+			{
+				if (posalen == 0)
+				{
+					posalen = 4;
+					pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
+					npos = 0;
+				}
+				else if (npos + 1 >= posalen)
+				{
+					posalen *= 2;
+					pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
+				}
+				npos++;
+				WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
+				if (WEP_GETPOS(pos[npos - 1]) == 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("wrong position info in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 0);
+				statecode = WAITPOSDELIM;
+			}
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("syntax error in tsvector")));
+		}
+		else if (statecode == WAITPOSDELIM)
+		{
+			if (t_iseq(state->prsbuf, ','))
+				statecode = INPOSINFO;
+			else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
+			{
+				if (WEP_GETWEIGHT(pos[npos - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 3);
+			}
+			else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
+			{
+				if (WEP_GETWEIGHT(pos[npos - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 2);
+			}
+			else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
+			{
+				if (WEP_GETWEIGHT(pos[npos - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 1);
+			}
+			else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
+			{
+				if (WEP_GETWEIGHT(pos[npos - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 0);
+			}
+			else if (t_isspace(state->prsbuf) ||
+					 *(state->prsbuf) == '\0')
+				RETURN_TOKEN;
+			else if (!t_isdigit(state->prsbuf))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("syntax error in tsvector")));
+		}
+		else					/* internal error */
+			elog(ERROR, "internal error in gettoken_tsvector");
+
+		/* get next char */
+		state->prsbuf += pg_mblen(state->prsbuf);
+	}
+
+	return false;
+}
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index 148129aa8bc..ab19de7924f 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.4 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -42,7 +42,7 @@ typedef struct
 				type:8,
 				len:16;
 	char	   *word;
-	QueryItem  *item;
+	QueryOperand  *item;
 } HeadlineWordEntry;
 
 typedef struct
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index ec22f96f59f..91d724ef1c6 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -13,6 +13,8 @@
 #define _PG_TSTYPE_H_
 
 #include "fmgr.h"
+#include "utils/pg_crc.h"
+
 
 /*
  * TSVector type.
@@ -27,8 +29,8 @@ typedef struct
 				pos:20;			/* MAX 1Mb */
 } WordEntry;
 
-#define MAXSTRLEN ( 1<<11 )
-#define MAXSTRPOS ( 1<<20 )
+#define MAXSTRLEN ( (1<<11) - 1)
+#define MAXSTRPOS ( (1<<20) - 1)
 
 /*
  * Equivalent to
@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos;
 typedef struct
 {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	int4		size;
+	uint32		size;
 	char		data[1];
 } TSVectorData;
 
@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
 
 /*
  * TSQuery
+ *
+ *
  */
 
+typedef int8 QueryItemType;
+
+/* Valid values for QueryItemType: */
+#define QI_VAL 1
+#define QI_OPR 2
+#define QI_VALSTOP 3	/* This is only used in an intermediate stack representation in parse_tsquery. It's not a legal type elsewhere. */
+
 /*
  * QueryItem is one node in tsquery - operator or operand.
  */
-
-typedef struct QueryItem
+typedef struct
 {
-	int8		type;			/* operand or kind of operator */
-	int8		weight;			/* weights of operand to search */
-	int2		left;			/* pointer to left operand Right operand is
-								 * item + 1, left operand is placed
-								 * item+item->left */
-	int4		val;			/* crc32 value of operand's value */
+	QueryItemType		type;	/* operand or kind of operator (ts_tokentype) */
+	int8		weight;			/* weights of operand to search. It's a bitmask of allowed weights.
+								 * if it =0 then any weight are allowed */
+	int32	valcrc;				/* XXX: pg_crc32 would be a more appropriate data type, 
+								 * but we use comparisons to signed integers in the code. 
+								 * They would need to be changed as well. */
+
 	/* pointer to text value of operand, must correlate with WordEntry */
 	uint32
 				istrue:1,		/* use for ranking in Cover */
 				length:11,
 				distance:20;
-} QueryItem;
+} QueryOperand;
+
+
+/* Legal values for QueryOperator.operator */
+#define	OP_NOT	1
+#define	OP_AND	2
+#define	OP_OR	3
+
+typedef struct 
+{
+	QueryItemType	type;
+	int8		oper;		/* see above */
+	int16		left;		/* pointer to left operand. Right operand is
+							 * item + 1, left operand is placed
+							 * item+item->left */
+} QueryOperator;
 
 /*
- * It's impossible to use offsetof(QueryItem, istrue)
+ * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
+ * inside QueryItem requiring 8-byte alignment, like int64.
  */
-#define HDRSIZEQI	( sizeof(int8) + sizeof(int8) + sizeof(int2) +	sizeof(int4) )
+typedef union
+{
+	QueryItemType	type;
+	QueryOperator operator;
+	QueryOperand operand;
+} QueryItem;
 
 /*
  * Storage:
- *	(len)(size)(array of ITEM)(array of operand in text form)
- *	operands are always finished by '\0'
+ *	(len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
  */
 
 typedef struct
@@ -182,13 +213,17 @@ typedef struct
 typedef TSQueryData *TSQuery;
 
 #define HDRSIZETQ	( VARHDRSZ + sizeof(int4) )
-#define COMPUTESIZE(size,lenofoperand)	( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
-#define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
-#define GETOPERAND(x)	( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
-#define OPERANDSSIZE(x)		( (x)->len - HDRSIZETQ - (x)->size * sizeof(QueryItem) )
 
-#define ISOPERATOR(x)	( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+/* Computes the size of header and all QueryItems. size is the number of
+ * QueryItems, and lenofoperand is the total length of all operands
+ */
+#define COMPUTESIZE(size, lenofoperand)	( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
 
+/* Returns a pointer to the first QueryItem in a TSVector */
+#define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
+
+/* Returns a pointer to the beginning of operands in a TSVector */
+#define GETOPERAND(x)	( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
 
 /*
  * fmgr interface macros
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index d2e5c8d8e49..31a76e50b6c 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -14,65 +14,41 @@
 
 #include "tsearch/ts_type.h"
 #include "tsearch/ts_public.h"
+#include "nodes/pg_list.h"
 
 /*
  * Common parse definitions for tsvector and tsquery
  */
 
-typedef struct
-{
-	WordEntry	entry;			/* should be first ! */
-	WordEntryPos *pos;
-} WordEntryIN;
-
-typedef struct
-{
-	char	   *prsbuf;
-	char	   *word;
-	char	   *curpos;
-	int4		len;
-	int4		state;
-	int4		alen;
-	WordEntryPos *pos;
-	bool		oprisdelim;
-} TSVectorParseState;
-
-extern bool gettoken_tsvector(TSVectorParseState *state);
+/* tsvector parser support. */
 
-struct ParseQueryNode;			/* private in backend/utils/adt/tsquery.c */
+struct TSVectorParseStateData;
+typedef struct TSVectorParseStateData *TSVectorParseState;
 
-typedef struct
-{
-	char	   *buffer;			/* entire string we are scanning */
-	char	   *buf;			/* current scan point */
-	int4		state;
-	int4		count;
+extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
+extern void reset_tsvector_parser(TSVectorParseState state, char *input);
+extern bool gettoken_tsvector(TSVectorParseState state, 
+							  char **token, int *len,
+							  WordEntryPos **pos, int *poslen,
+							  char **endptr);
+extern void close_tsvector_parser(TSVectorParseState state);
 
-	/* reverse polish notation in list (for temporary usage) */
-	struct ParseQueryNode *str;
+/* parse_tsquery */
 
-	/* number in str */
-	int4		num;
+struct TSQueryParserStateData;	/* private in backend/utils/adt/tsquery.c */
+typedef struct TSQueryParserStateData *TSQueryParserState;
 
-	/* text-form operand */
-	int4		lenop;
-	int4		sumlen;
-	char	   *op;
-	char	   *curop;
-
-	/* state for value's parser */
-	TSVectorParseState valstate;
-	/* tscfg */
-	Oid			cfg_id;
-} TSQueryParserState;
+typedef void (*PushFunction)(void *opaque, TSQueryParserState state, char *, int, int2);
 
 extern TSQuery parse_tsquery(char *buf,
-			  void (*pushval) (TSQueryParserState *, int, char *, int, int2),
-			  Oid cfg_id, bool isplain);
-extern void pushval_asis(TSQueryParserState * state,
-			 int type, char *strval, int lenval, int2 weight);
-extern void pushquery(TSQueryParserState * state, int4 type, int4 val,
-		  int4 distance, int4 lenval, int2 weight);
+			  PushFunction pushval,
+			  void *opaque, bool isplain);
+
+/* Functions for use by PushFunction implementations */
+extern void pushValue(TSQueryParserState state,
+			 char *strval, int lenval, int2 weight);
+extern void pushStop(TSQueryParserState state);
+extern void pushOperator(TSQueryParserState state, int8 operator);
 
 /*
  * parse plain text and lexize words
@@ -84,6 +60,11 @@ typedef struct
 	union
 	{
 		uint16		pos;
+		/*
+		 * When apos array is used, apos[0] is the number of elements
+		 * in the array (excluding apos[0]), and alen is the allocated
+		 * size of the array.
+		 */
 		uint16	   *apos;
 	}			pos;
 	char	   *word;
@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
 			char *buf, int4 buflen);
 extern text *generateHeadline(HeadlineParsedText * prs);
 
-/*
- * token/node types for parsing
- */
-#define END				0
-#define ERR				1
-#define VAL				2
-#define OPR				3
-#define OPEN			4
-#define CLOSE			5
-#define VALSTOP			6		/* for stop words */
-
 /*
  * Common check function for tsvector @@ tsquery
  */
 
 extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
-		   bool (*chkcond) (void *checkval, QueryItem * val));
+		   bool (*chkcond) (void *checkval, QueryOperand * val));
 
 /*
  * Useful conversion macros
-- 
GitLab