diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ee4b61d44bfe12fa3ab4830a0b80231337a9411a..d0b1bcc19def7f546644da8c2571c8467eb91ab1 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.2 2007/09/07 15:09:55 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS)
 
 
 /*
- * This function is used for morph parsing
+ * This function is used for morph parsing.
+ *
+ * The value is passed to parsetext which will call the right dictionary to
+ * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
+ * to the stack.
+ *
+ * All words belonging to the same variant are pushed as an ANDed list,
+ * and different variants are ORred together. 
  */
 static void
-pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight)
+pushval_morph(void *opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
 {
 	int4		count = 0;
 	ParsedText	prs;
@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
 				cntvar = 0,
 				cntpos = 0,
 				cnt = 0;
+	Oid cfg_id = (Oid) opaque; /* the input is actually an Oid, not a pointer */
 
 	prs.lenwords = 4;
 	prs.curwords = 0;
 	prs.pos = 0;
 	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
 
-	parsetext(state->cfg_id, &prs, strval, lenval);
+	parsetext(cfg_id, &prs, strval, lenval);
 
 	if (prs.curwords > 0)
 	{
@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
 				while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
 				{
 
-					pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+					pushValue(state, prs.words[count].word, prs.words[count].len, weight);
 					pfree(prs.words[count].word);
 					if (cnt)
-						pushquery(state, OPR, (int4) '&', 0, 0, 0);
+						pushOperator(state, OP_AND);
 					cnt++;
 					count++;
 				}
 
 				if (cntvar)
-					pushquery(state, OPR, (int4) '|', 0, 0, 0);
+					pushOperator(state, OP_OR);
 				cntvar++;
 			}
 
 			if (cntpos)
-				pushquery(state, OPR, (int4) '&', 0, 0, 0);
+				pushOperator(state, OP_AND);
 
 			cntpos++;
 		}
@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
 
 	}
 	else
-		pushval_asis(state, VALSTOP, NULL, 0, 0);
+		pushStop(state);
 }
 
 Datum
@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
 	QueryItem  *res;
 	int4		len;
 
-	query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false);
+	query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *) cfgid, false);
 
 	if (query->size == 0)
 		PG_RETURN_TSQUERY(query);
@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
 	QueryItem  *res;
 	int4		len;
 
-	query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true);
+	query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *)cfgid, true);
 
 	if (query->size == 0)
 		PG_RETURN_TSQUERY(query);
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c
index 47e18fc1ac5b3c75137208621c40676ae401d321..22c5f2b86eaf3b8dd37ea69ef078433bd92a7ea3 100644
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
 }
 
 /*
- * Parse string and lexize words
+ * Parse string and lexize words.
+ *
+ * prs will be filled in.
  */
 void
-parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
+parsetext(Oid cfgId, ParsedText * prs, char *buf, int buflen)
 {
 	int			type,
 				lenlemm;
@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
  * Headline framework
  */
 static void
-hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type)
+hladdword(HeadlineParsedText * prs, char *buf, int buflen, int type)
 {
 	while (prs->curwords >= prs->lenwords)
 	{
@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
 	word = &(prs->words[prs->curwords - 1]);
 	for (i = 0; i < query->size; i++)
 	{
-		if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
+		if (item->type == QI_VAL &&
+			item->operand.length == buflen &&
+			strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
 		{
 			if (word->item)
 			{
 				memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
-				prs->words[prs->curwords].item = item;
+				prs->words[prs->curwords].item = &item->operand;
 				prs->words[prs->curwords].repeated = 1;
 				prs->curwords++;
 			}
 			else
-				word->item = item;
+				word->item = &item->operand;
 		}
 		item++;
 	}
@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe
 }
 
 void
-hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen)
+hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
 {
 	int			type,
 				lenlemm;
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 5b47f66d07fdf77c93e21cf4bdbed3eeb0f8d3d6..5f65cbc9fb239a07aa15fb28fa8356c5f0553bb1 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1575,7 +1575,7 @@ typedef struct
 } hlCheck;
 
 static bool
-checkcondition_HL(void *checkval, QueryItem * val)
+checkcondition_HL(void *checkval, QueryOperand * val)
 {
 	int			i;
 
@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
 
 	for (j = 0; j < query->size; j++)
 	{
-		if (item->type != VAL)
+		if (item->type != QI_VAL)
 		{
 			item++;
 			continue;
 		}
 		for (i = pos; i < prs->curwords; i++)
 		{
-			if (prs->words[i].item == item)
+			if (prs->words[i].item == &item->operand)
 			{
 				if (i > *q)
 					*q = i;
@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
 	item = GETQUERY(query);
 	for (j = 0; j < query->size; j++)
 	{
-		if (item->type != VAL)
+		if (item->type != QI_VAL)
 		{
 			item++;
 			continue;
 		}
 		for (i = *q; i >= pos; i--)
 		{
-			if (prs->words[i].item == item)
+			if (prs->words[i].item == &item->operand)
 			{
 				if (i < *p)
 					*p = i;
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index a1f233dca8279ce0efa422ce0bfe9e301ee812ee..9a75c736df650dbf215a6c9f1bfcb35e70a4d1a8 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for utils/adt
 #
-# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.66 2007/08/27 01:39:24 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.67 2007/09/07 15:09:56 teodor Exp $
 #
 
 subdir = src/backend/utils/adt
@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
 	ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
 	tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
 	tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
-	tsvector.o tsvector_op.o \
+	tsvector.o tsvector_op.o tsvector_parser.o\
 	uuid.o xml.o
 
 like.o: like.c like_match.c
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 491dd21aa81b40fae9c88bf0f762e9e71cfbf49c..10b80dc9566d304ed5ab768f6bde609edf2b1523 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS)
 		item = GETQUERY(query);
 
 		for (i = 0; i < query->size; i++)
-			if (item[i].type == VAL)
+			if (item[i].type == QI_VAL)
 				(*nentries)++;
 
 		entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
 
 		for (i = 0; i < query->size; i++)
-			if (item[i].type == VAL)
+			if (item[i].type == QI_VAL)
 			{
 				text	   *txt;
+				QueryOperand *val = &item[i].operand;
 
-				txt = (text *) palloc(VARHDRSZ + item[i].length);
+				txt = (text *) palloc(VARHDRSZ + val->length);
 
-				SET_VARSIZE(txt, VARHDRSZ + item[i].length);
-				memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
+				SET_VARSIZE(txt, VARHDRSZ + val->length);
+				memcpy(VARDATA(txt), GETOPERAND(query) + val->distance, val->length);
 
 				entries[j++] = PointerGetDatum(txt);
 
-				if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
+				if (strategy != TSearchWithClassStrategyNumber && val->weight != 0)
 					ereport(ERROR,
 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 							 errmsg("@@ operator does not support lexeme class restrictions"),
@@ -116,11 +117,11 @@ typedef struct
 } GinChkVal;
 
 static bool
-checkcondition_gin(void *checkval, QueryItem * val)
+checkcondition_gin(void *checkval, QueryOperand * val)
 {
 	GinChkVal  *gcv = (GinChkVal *) checkval;
 
-	return gcv->mapped_check[val - gcv->frst];
+	return gcv->mapped_check[((QueryItem *) val) - gcv->frst];
 }
 
 Datum
@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
 		gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
 
 		for (i = 0; i < query->size; i++)
-			if (item[i].type == VAL)
+			if (item[i].type == QI_VAL)
 				gcv.mapped_check[i] = check[j++];
 
 		res = TS_execute(
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 6c262521ef49b56c2be2ee4d5c9e7aa740b0d65d..4fc51378b4bf5c70cbfc5e3e7d16195e7597f79c 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.2 2007/08/21 06:34:42 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -293,7 +293,7 @@ typedef struct
  * is there value 'val' in array or not ?
  */
 static bool
-checkcondition_arr(void *checkval, QueryItem * val)
+checkcondition_arr(void *checkval, QueryOperand * val)
 {
 	int4	   *StopLow = ((CHKVAL *) checkval)->arrb;
 	int4	   *StopHigh = ((CHKVAL *) checkval)->arre;
@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-		if (*StopMiddle == val->val)
+		if (*StopMiddle == val->valcrc)
 			return (true);
-		else if (*StopMiddle < val->val)
+		else if (*StopMiddle < val->valcrc)
 			StopLow = StopMiddle + 1;
 		else
 			StopHigh = StopMiddle;
@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
 }
 
 static bool
-checkcondition_bit(void *checkval, QueryItem * val)
+checkcondition_bit(void *checkval, QueryOperand * val)
 {
-	return GETBIT(checkval, HASHVAL(val->val));
+	return GETBIT(checkval, HASHVAL(val->valcrc));
 }
 
 Datum
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 83759728ff96c97e3134e9da1a624a09ca9823c9..27b93eb64d7725fa0ec9795a31fd576e188ae13d 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,6 +23,29 @@
 #include "utils/pg_crc.h"
 
 
+struct TSQueryParserStateData
+{
+	/* State for gettoken_query */
+	char	   *buffer;			/* entire string we are scanning */
+	char	   *buf;			/* current scan point */
+	int			state;
+	int			count;			/* nesting count, incremented by (, 
+								   decremented by ) */
+
+	/* polish (prefix) notation in list, filled in by push* functions */
+	List	   *polstr;
+
+	/* Strings from operands are collected in op. curop is a pointer to
+	 * the end of used space of op. */
+	char	   *op;
+	char	   *curop;
+	int			lenop; /* allocated size of op */
+	int			sumlen; /* used size of op */
+
+	/* state for value's parser */
+	TSVectorParseState valstate;
+};
+
 /* parser's states */
 #define WAITOPERAND 1
 #define WAITOPERATOR	2
@@ -30,21 +53,10 @@
 #define WAITSINGLEOPERAND 4
 
 /*
- * node of query tree, also used
- * for storing polish notation in parser
+ * subroutine to parse the weight part, like ':1AB' of a query.
  */
-typedef struct ParseQueryNode
-{
-	int2		weight;
-	int2		type;
-	int4		val;
-	int2		distance;
-	int2		length;
-	struct ParseQueryNode *next;
-} ParseQueryNode;
-
 static char *
-get_weight(char *buf, int2 *weight)
+get_weight(char *buf, int16 *weight)
 {
 	*weight = 0;
 
@@ -81,11 +93,28 @@ get_weight(char *buf, int2 *weight)
 	return buf;
 }
 
+/*
+ * token types for parsing
+ */
+typedef enum {
+	PT_END = 0,
+	PT_ERR = 1,
+	PT_VAL = 2,
+	PT_OPR = 3,
+	PT_OPEN = 4,
+	PT_CLOSE = 5,
+} ts_tokentype;
+
 /*
  * get token from query string
+ *
+ * *operator is filled in with OP_* when return values is PT_OPR
+ * *strval, *lenval and *weight are filled in when return value is PT_VAL
  */
-static int4
-gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+static ts_tokentype
+gettoken_query(TSQueryParserState state, 
+			   int8 *operator,
+			   int *lenval, char **strval, int16 *weight)
 {
 	while (1)
 	{
@@ -97,16 +126,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
 				{
 					(state->buf)++;		/* can safely ++, t_iseq guarantee
 										 * that pg_mblen()==1 */
-					*val = (int4) '!';
+					*operator = OP_NOT;
 					state->state = WAITOPERAND;
-					return OPR;
+					return PT_OPR;
 				}
 				else if (t_iseq(state->buf, '('))
 				{
 					state->count++;
 					(state->buf)++;
 					state->state = WAITOPERAND;
-					return OPEN;
+					return PT_OPEN;
 				}
 				else if (t_iseq(state->buf, ':'))
 				{
@@ -117,17 +146,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
 				}
 				else if (!t_isspace(state->buf))
 				{
-					state->valstate.prsbuf = state->buf;
-					if (gettoken_tsvector(&(state->valstate)))
+					/* We rely on the tsvector parser to parse the value for us */
+					reset_tsvector_parser(state->valstate, state->buf);
+					if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
 					{
-						*strval = state->valstate.word;
-						*lenval = state->valstate.curpos - state->valstate.word;
-						state->buf = get_weight(state->valstate.prsbuf, weight);
+						state->buf = get_weight(state->buf, weight);
 						state->state = WAITOPERATOR;
-						return VAL;
+						return PT_VAL;
 					}
 					else if (state->state == WAITFIRSTOPERAND)
-						return END;
+						return PT_END;
 					else
 						ereport(ERROR,
 								(errcode(ERRCODE_SYNTAX_ERROR),
@@ -136,52 +164,71 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
 				}
 				break;
 			case WAITOPERATOR:
-				if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
+				if (t_iseq(state->buf, '&'))
+				{
+					state->state = WAITOPERAND;
+					*operator = OP_AND;
+					(state->buf)++;
+					return PT_OPR;
+				}
+				if (t_iseq(state->buf, '|'))
 				{
 					state->state = WAITOPERAND;
-					*val = (int4) *(state->buf);
+					*operator = OP_OR;
 					(state->buf)++;
-					return OPR;
+					return PT_OPR;
 				}
 				else if (t_iseq(state->buf, ')'))
 				{
 					(state->buf)++;
 					state->count--;
-					return (state->count < 0) ? ERR : CLOSE;
+					return (state->count < 0) ? PT_ERR : PT_CLOSE;
 				}
 				else if (*(state->buf) == '\0')
-					return (state->count) ? ERR : END;
+					return (state->count) ? PT_ERR : PT_END;
 				else if (!t_isspace(state->buf))
-					return ERR;
+					return PT_ERR;
 				break;
 			case WAITSINGLEOPERAND:
 				if (*(state->buf) == '\0')
-					return END;
+					return PT_END;
 				*strval = state->buf;
 				*lenval = strlen(state->buf);
 				state->buf += strlen(state->buf);
 				state->count++;
-				return VAL;
+				return PT_VAL;
 			default:
-				return ERR;
+				return PT_ERR;
 				break;
 		}
 		state->buf += pg_mblen(state->buf);
 	}
-	return END;
+	return PT_END;
 }
 
 /*
- * push new one in polish notation reverse view
+ * Push an operator to state->polstr
  */
 void
-pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+pushOperator(TSQueryParserState state, int8 oper)
 {
-	ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
+	QueryOperator *tmp;
+
+	Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
+	
+	tmp = (QueryOperator *) palloc(sizeof(QueryOperator));
+	tmp->type = QI_OPR;
+	tmp->oper = oper;
+	/* left is filled in later with findoprnd */
+
+	state->polstr = lcons(tmp, state->polstr);
+}
+
+static void
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
+{
+	QueryOperand *tmp;
 
-	tmp->weight = weight;
-	tmp->type = type;
-	tmp->val = val;
 	if (distance >= MAXSTRPOS)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
@@ -192,20 +239,27 @@ pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 l
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("operand is too long in tsearch query: \"%s\"",
 						state->buffer)));
-	tmp->distance = distance;
+
+	tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+	tmp->type = QI_VAL;
+	tmp->weight = weight;
+	tmp->valcrc = (int32) valcrc;
 	tmp->length = lenval;
-	tmp->next = state->str;
-	state->str = tmp;
-	state->num++;
+	tmp->distance = distance;
+
+	state->polstr = lcons(tmp, state->polstr);
 }
 
 /*
- * This function is used for tsquery parsing
+ * Push an operand to state->polstr.
+ *
+ * strval must point to a string equal to state->curop. lenval is the length
+ * of the string.
  */
 void
-pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
+pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
 {
-	pg_crc32	c;
+	pg_crc32	valcrc;
 
 	if (lenval >= MAXSTRLEN)
 		ereport(ERROR,
@@ -213,162 +267,202 @@ pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int
 				 errmsg("word is too long in tsearch query: \"%s\"",
 						state->buffer)));
 
-	INIT_CRC32(c);
-	COMP_CRC32(c, strval, lenval);
-	FIN_CRC32(c);
-	pushquery(state, type, *(int4 *) &c,
-			  state->curop - state->op, lenval, weight);
+	INIT_CRC32(valcrc);
+	COMP_CRC32(valcrc, strval, lenval);
+	FIN_CRC32(valcrc);
+	pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
 
+	/* append the value string to state.op, enlarging buffer if needed first */
 	while (state->curop - state->op + lenval + 1 >= state->lenop)
 	{
-		int4		tmp = state->curop - state->op;
+		int	used = state->curop - state->op;
 
 		state->lenop *= 2;
 		state->op = (char *) repalloc((void *) state->op, state->lenop);
-		state->curop = state->op + tmp;
+		state->curop = state->op + used;
 	}
 	memcpy((void *) state->curop, (void *) strval, lenval);
 	state->curop += lenval;
 	*(state->curop) = '\0';
 	state->curop++;
 	state->sumlen += lenval + 1 /* \0 */ ;
-	return;
 }
 
+
+/*
+ * Push a stopword placeholder to state->polstr
+ */
+void
+pushStop(TSQueryParserState state)
+{
+	QueryOperand *tmp;
+
+	tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+	tmp->type = QI_VALSTOP;
+
+	state->polstr = lcons(tmp, state->polstr);
+}
+
+
 #define STACKDEPTH	32
 
 /*
- * make polish notation of query
+ * Make polish (prefix) notation of query.
+ *
+ * See parse_tsquery for explanation of pushval.
  */
-static int4
-makepol(TSQueryParserState * state,
-		void (*pushval) (TSQueryParserState *, int, char *, int, int2))
+static void
+makepol(TSQueryParserState state, 
+		PushFunction pushval,
+		void *opaque)
 {
-	int4		val = 0,
-				type;
-	int4		lenval = 0;
+	int8		operator = 0;
+	ts_tokentype type;
+	int			lenval = 0;
 	char	   *strval = NULL;
-	int4		stack[STACKDEPTH];
-	int4		lenstack = 0;
-	int2		weight = 0;
+	int8		opstack[STACKDEPTH];
+	int			lenstack = 0;
+	int16		weight = 0;
 
 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();
 
-	while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+	while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
 	{
 		switch (type)
 		{
-			case VAL:
-				pushval(state, VAL, strval, lenval, weight);
-				while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
-									stack[lenstack - 1] == (int4) '!'))
+			case PT_VAL:
+				pushval(opaque, state, strval, lenval, weight);
+				while (lenstack && (opstack[lenstack - 1] == OP_AND ||
+									opstack[lenstack - 1] == OP_NOT))
 				{
 					lenstack--;
-					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+					pushOperator(state, opstack[lenstack]);
 				}
 				break;
-			case OPR:
-				if (lenstack && val == (int4) '|')
-					pushquery(state, OPR, val, 0, 0, 0);
+			case PT_OPR:
+				if (lenstack && operator == OP_OR)
+					pushOperator(state, OP_OR);
 				else
 				{
 					if (lenstack == STACKDEPTH)			/* internal error */
 						elog(ERROR, "tsquery stack too small");
-					stack[lenstack] = val;
+					opstack[lenstack] = operator;
 					lenstack++;
 				}
 				break;
-			case OPEN:
-				if (makepol(state, pushval) == ERR)
-					return ERR;
-				if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
-								 stack[lenstack - 1] == (int4) '!'))
+			case PT_OPEN:
+				makepol(state, pushval, opaque);
+
+				if (lenstack && (opstack[lenstack - 1] == OP_AND ||
+								 opstack[lenstack - 1] == OP_NOT))
 				{
 					lenstack--;
-					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+					pushOperator(state, opstack[lenstack]);
 				}
 				break;
-			case CLOSE:
+			case PT_CLOSE:
 				while (lenstack)
 				{
 					lenstack--;
-					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+					pushOperator(state, opstack[lenstack]);
 				};
-				return END;
-				break;
-			case ERR:
+				return;
+			case PT_ERR:
 			default:
 				ereport(ERROR,
 						(errcode(ERRCODE_SYNTAX_ERROR),
 						 errmsg("syntax error in tsearch query: \"%s\"",
 								state->buffer)));
-				return ERR;
-
 		}
 	}
 	while (lenstack)
 	{
 		lenstack--;
-		pushquery(state, OPR, stack[lenstack], 0, 0, 0);
-	};
-	return END;
+		pushOperator(state, opstack[lenstack]);
+	}
 }
 
+/*
+ * Fills in the left-fields previously left unfilled. The input
+ * QueryItems must be in polish (prefix) notation. 
+ */
 static void
-findoprnd(QueryItem * ptr, int4 *pos)
+findoprnd(QueryItem *ptr, int *pos)
 {
-	if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
-	{
-		ptr[*pos].left = 0;
-		(*pos)++;
-	}
-	else if (ptr[*pos].val == (int4) '!')
+	/* since this function recurses, it could be driven to stack overflow. */
+	check_stack_depth();
+
+	if (ptr[*pos].type == QI_VAL ||
+		ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
+									   * they haven't been cleansed
+									   * away yet.
+									   */
 	{
-		ptr[*pos].left = 1;
 		(*pos)++;
-		findoprnd(ptr, pos);
 	}
-	else
+	else 
 	{
-		QueryItem  *curitem = &ptr[*pos];
-		int4		tmp = *pos;
+		Assert(ptr[*pos].type == QI_OPR);
 
-		(*pos)++;
-		findoprnd(ptr, pos);
-		curitem->left = *pos - tmp;
-		findoprnd(ptr, pos);
+		if (ptr[*pos].operator.oper == OP_NOT)
+		{
+			ptr[*pos].operator.left = 1;
+			(*pos)++;
+			findoprnd(ptr, pos);
+		}
+		else
+		{
+			QueryOperator  *curitem = &ptr[*pos].operator;
+			int	tmp = *pos;
+
+			Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
+
+			(*pos)++;
+			findoprnd(ptr, pos);
+			curitem->left = *pos - tmp;
+			findoprnd(ptr, pos);
+		}
 	}
 }
 
-
 /*
- * input
+ * Each value (operand) in the query is be passed to pushval. pushval can
+ * transform the simple value to an arbitrarily complex expression using
+ * pushValue and pushOperator. It must push a single value with pushValue,
+ * a complete expression with all operands, or a a stopword placeholder
+ * with pushStop, otherwise the prefix notation representation will be broken,
+ * having an operator with no operand.
+ *
+ * opaque is passed on to pushval as is, pushval can use it to store its 
+ * private state.
+ *
+ * The returned query might contain QI_STOPVAL nodes. The caller is responsible
+ * for cleaning them up (with clean_fakeval)
  */
 TSQuery
-parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
+parse_tsquery(char *buf, 
+			  PushFunction pushval,
+			  void *opaque,
+			  bool isplain)
 {
-	TSQueryParserState state;
-	int4		i;
+	struct TSQueryParserStateData state;
+	int			i;
 	TSQuery		query;
-	int4		commonlen;
+	int			commonlen;
 	QueryItem  *ptr;
-	ParseQueryNode *tmp;
-	int4		pos = 0;
+	int			pos = 0;
+	ListCell   *cell;
 
 	/* init state */
 	state.buffer = buf;
 	state.buf = buf;
 	state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
 	state.count = 0;
-	state.num = 0;
-	state.str = NULL;
-	state.cfg_id = cfg_id;
+	state.polstr = NIL;
 
 	/* init value parser's state */
-	state.valstate.oprisdelim = true;
-	state.valstate.len = 32;
-	state.valstate.word = (char *) palloc(state.valstate.len);
+	state.valstate = init_tsvector_parser(NULL, true);
 
 	/* init list of operand */
 	state.sumlen = 0;
@@ -377,9 +471,11 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
 	*(state.curop) = '\0';
 
 	/* parse query & make polish notation (postfix, but in reverse order) */
-	makepol(&state, pushval);
-	pfree(state.valstate.word);
-	if (!state.num)
+	makepol(&state, pushval, opaque);
+
+	close_tsvector_parser(state.valstate);
+
+	if (list_length(state.polstr) == 0)
 	{
 		ereport(NOTICE,
 				(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
@@ -390,37 +486,54 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
 		return query;
 	}
 
-	/* make finish struct */
-	commonlen = COMPUTESIZE(state.num, state.sumlen);
-	query = (TSQuery) palloc(commonlen);
+	/* Pack the QueryItems in the final TSQuery struct to return to caller */
+	commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
+	query = (TSQuery) palloc0(commonlen);
 	SET_VARSIZE(query, commonlen);
-	query->size = state.num;
+	query->size = list_length(state.polstr);
 	ptr = GETQUERY(query);
 
-	/* set item in polish notation */
-	for (i = 0; i < state.num; i++)
+	/* Copy QueryItems to TSQuery */
+	i = 0;
+	foreach(cell, state.polstr)
 	{
-		ptr[i].weight = state.str->weight;
-		ptr[i].type = state.str->type;
-		ptr[i].val = state.str->val;
-		ptr[i].distance = state.str->distance;
-		ptr[i].length = state.str->length;
-		tmp = state.str->next;
-		pfree(state.str);
-		state.str = tmp;
+		QueryItem *item = (QueryItem *) lfirst(cell);
+
+		switch(item->type)
+		{
+			case QI_VAL:
+				memcpy(&ptr[i], item, sizeof(QueryOperand));
+				break;
+			case QI_VALSTOP:
+				ptr[i].type = QI_VALSTOP;
+				break;
+			case QI_OPR:
+				memcpy(&ptr[i], item, sizeof(QueryOperator));
+				break;
+			default:
+				elog(ERROR, "unknown QueryItem type %d", item->type);
+		}
+		i++;
 	}
 
-	/* set user friendly-operand view */
+	/* Copy all the operand strings to TSQuery */
 	memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
 	pfree(state.op);
 
-	/* set left operand's position for every operator */
+	/* Set left operand pointers for every operator. */
 	pos = 0;
 	findoprnd(ptr, &pos);
 
 	return query;
 }
 
+static void
+pushval_asis(void *opaque, TSQueryParserState state, char *strval, int lenval,
+			 int16 weight)
+{
+	pushValue(state, strval, lenval, weight);
+}
+
 /*
  * in without morphology
  */
@@ -431,7 +544,7 @@ tsqueryin(PG_FUNCTION_ARGS)
 
 	pg_verifymbstr(in, strlen(in), false);
 
-	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
+	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, NULL, false));
 }
 
 /*
@@ -443,13 +556,14 @@ typedef struct
 	char	   *buf;
 	char	   *cur;
 	char	   *op;
-	int4		buflen;
+	int			buflen;
 } INFIX;
 
-#define RESIZEBUF(inf,addsize) \
+/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
+#define RESIZEBUF(inf, addsize) \
 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
 { \
-	int4 len = (inf)->cur - (inf)->buf; \
+	int len = (inf)->cur - (inf)->buf; \
 	(inf)->buflen *= 2; \
 	(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
 	(inf)->cur = (inf)->buf + len; \
@@ -462,12 +576,16 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
 static void
 infix(INFIX * in, bool first)
 {
-	if (in->curpol->type == VAL)
+	/* since this function recurses, it could be driven to stack overflow. */
+	check_stack_depth();
+
+	if (in->curpol->type == QI_VAL)
 	{
-		char	   *op = in->op + in->curpol->distance;
+		QueryOperand *curpol = &in->curpol->operand;
+		char	   *op = in->op + curpol->distance;
 		int			clen;
 
-		RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+		RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
 		*(in->cur) = '\'';
 		in->cur++;
 		while (*op)
@@ -485,26 +603,26 @@ infix(INFIX * in, bool first)
 		}
 		*(in->cur) = '\'';
 		in->cur++;
-		if (in->curpol->weight)
+		if (curpol->weight)
 		{
 			*(in->cur) = ':';
 			in->cur++;
-			if (in->curpol->weight & (1 << 3))
+			if (curpol->weight & (1 << 3))
 			{
 				*(in->cur) = 'A';
 				in->cur++;
 			}
-			if (in->curpol->weight & (1 << 2))
+			if (curpol->weight & (1 << 2))
 			{
 				*(in->cur) = 'B';
 				in->cur++;
 			}
-			if (in->curpol->weight & (1 << 1))
+			if (curpol->weight & (1 << 1))
 			{
 				*(in->cur) = 'C';
 				in->cur++;
 			}
-			if (in->curpol->weight & 1)
+			if (curpol->weight & 1)
 			{
 				*(in->cur) = 'D';
 				in->cur++;
@@ -513,7 +631,7 @@ infix(INFIX * in, bool first)
 		*(in->cur) = '\0';
 		in->curpol++;
 	}
-	else if (in->curpol->val == (int4) '!')
+	else if (in->curpol->operator.oper == OP_NOT)
 	{
 		bool		isopr = false;
 
@@ -522,13 +640,15 @@ infix(INFIX * in, bool first)
 		in->cur++;
 		*(in->cur) = '\0';
 		in->curpol++;
-		if (in->curpol->type == OPR)
+
+		if (in->curpol->type == QI_OPR)
 		{
 			isopr = true;
 			RESIZEBUF(in, 2);
 			sprintf(in->cur, "( ");
 			in->cur = strchr(in->cur, '\0');
 		}
+
 		infix(in, isopr);
 		if (isopr)
 		{
@@ -539,11 +659,11 @@ infix(INFIX * in, bool first)
 	}
 	else
 	{
-		int4		op = in->curpol->val;
+		int8		op = in->curpol->operator.oper;
 		INFIX		nrm;
 
 		in->curpol++;
-		if (op == (int4) '|' && !first)
+		if (op == OP_OR && !first)
 		{
 			RESIZEBUF(in, 2);
 			sprintf(in->cur, "( ");
@@ -564,11 +684,22 @@ infix(INFIX * in, bool first)
 
 		/* print operator & right operand */
 		RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
-		sprintf(in->cur, " %c %s", op, nrm.buf);
+		switch(op)
+		{
+			case OP_OR:
+				sprintf(in->cur, " | %s", nrm.buf);
+				break;
+			case OP_AND:
+				sprintf(in->cur, " & %s", nrm.buf);
+				break;
+			default:
+				/* OP_NOT is handled in above if-branch*/
+				elog(ERROR, "unexpected operator type %d", op);
+		}
 		in->cur = strchr(in->cur, '\0');
 		pfree(nrm.buf);
 
-		if (op == (int4) '|' && !first)
+		if (op == OP_OR && !first)
 		{
 			RESIZEBUF(in, 2);
 			sprintf(in->cur, " )");
@@ -615,28 +746,33 @@ tsquerysend(PG_FUNCTION_ARGS)
 	pq_sendint(&buf, query->size, sizeof(int32));
 	for (i = 0; i < query->size; i++)
 	{
-		int			tmp;
-
 		pq_sendint(&buf, item->type, sizeof(item->type));
-		pq_sendint(&buf, item->weight, sizeof(item->weight));
-		pq_sendint(&buf, item->left, sizeof(item->left));
-		pq_sendint(&buf, item->val, sizeof(item->val));
-
-		/*
-		 * We are sure that sizeof(WordEntry) == sizeof(int32), and about
-		 * layout of QueryItem
-		 */
-		tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
-		pq_sendint(&buf, tmp, sizeof(tmp));
 
+		switch(item->type)
+		{
+			case QI_VAL:
+				pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
+				pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
+				pq_sendint(&buf, item->operand.length, sizeof(int16));
+				/* istrue flag is just for temporary use in tsrank.c/Cover,
+				 * so we don't need to transfer that */
+				break;
+			case QI_OPR:
+				pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
+				if (item->operator.oper != OP_NOT)
+					pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
+				break;
+			default:
+				elog(ERROR, "unknown tsquery node type %d", item->type);
+		}
 		item++;
 	}
 
 	item = GETQUERY(query);
 	for (i = 0; i < query->size; i++)
 	{
-		if (item->type == VAL)
-			pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
+		if (item->type == QI_VAL)
+			pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
 		item++;
 	}
 
@@ -652,8 +788,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 	TSQuery		query;
 	int			i,
 				size,
-				tmp,
-				len = HDRSIZETQ;
+				len;
 	QueryItem  *item;
 	int			datalen = 0;
 	char	   *ptr;
@@ -661,7 +796,8 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 	size = pq_getmsgint(buf, sizeof(uint32));
 	if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
 		elog(ERROR, "invalid size of tsquery");
-	len += sizeof(QueryItem) * size;
+
+	len = HDRSIZETQ + sizeof(QueryItem) * size;
 
 	query = (TSQuery) palloc(len);
 	query->size = size;
@@ -670,32 +806,67 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 	for (i = 0; i < size; i++)
 	{
 		item->type = (int8) pq_getmsgint(buf, sizeof(int8));
-		item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
-		item->left = (int16) pq_getmsgint(buf, sizeof(int16));
-		item->val = (int32) pq_getmsgint(buf, sizeof(int32));
-		tmp = pq_getmsgint(buf, sizeof(int32));
-		memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
-
-		/*
-		 * Sanity checks
-		 */
-		if (item->type == VAL)
-		{
-			datalen += item->length + 1;		/* \0 */
-		}
-		else if (item->type == OPR)
+
+		switch(item->type)
 		{
-			if (item->val == '|' || item->val == '&')
-			{
-				if (item->left <= 0 || i + item->left >= size)
-					elog(ERROR, "invalid pointer to left operand");
-			}
+			case QI_VAL:
+				item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
+				item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
+				item->operand.length = pq_getmsgint(buf, sizeof(int16));
+
+				/*
+				 * Check that datalen doesn't grow too large. Without the
+				 * check, a malicious client could induce a buffer overflow
+				 * by sending a tsquery whose size exceeds 2GB. datalen
+				 * would overflow, we would allocate a too small buffer below,
+				 * and overflow the buffer. Because operand.length is a 20-bit
+				 * field, adding one such value to datalen must exceed
+				 * MaxAllocSize before wrapping over the 32-bit datalen field,
+				 * so this check will protect from it.
+				 */
+				if (datalen > MAXSTRLEN)
+					elog(ERROR, "invalid tsquery; total operand length exceeded");
+
+				/* We can calculate distance from datalen, no need to send it
+				 * through the wire. If we did, we would have to check that
+				 * it's valid anyway.
+				 */
+				item->operand.distance = datalen;
+
+				datalen += item->operand.length + 1;		/* \0 */
 
-			if (i == size - 1)
-				elog(ERROR, "invalid pointer to right operand");
+				break;
+			case QI_OPR:
+				item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
+				if (item->operator.oper != OP_NOT &&
+					item->operator.oper != OP_OR &&
+					item->operator.oper != OP_AND)
+					elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
+				if(item->operator.oper != OP_NOT)
+				{
+					item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
+					/*
+					 * Sanity checks
+					 */
+					if (item->operator.left <= 0 || i + item->operator.left >= size)
+						elog(ERROR, "invalid pointer to left operand");
+
+					/* XXX: Though there's no way to construct a TSQuery that's
+					 * not in polish notation, we don't enforce that for
+					 * queries received from client in binary mode. Is there
+					 * anything that relies on it?
+					 *
+					 * XXX: The tree could be malformed in other ways too,
+					 * a node could have two parents, for example.
+					 */
+				}
+
+				if (i == size - 1)
+					elog(ERROR, "invalid pointer to right operand");
+				break;
+			default:
+				elog(ERROR, "unknown tsquery node type %d", item->type);
 		}
-		else
-			elog(ERROR, "unknown tsquery node type");
 
 		item++;
 	}
@@ -706,13 +877,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 	ptr = GETOPERAND(query);
 	for (i = 0; i < size; i++)
 	{
-		if (item->type == VAL)
+		if (item->type == QI_VAL)
 		{
-			item->distance = ptr - GETOPERAND(query);
 			memcpy(ptr,
-				   pq_getmsgbytes(buf, item->length),
-				   item->length);
-			ptr += item->length;
+				   pq_getmsgbytes(buf, item->operand.length),
+				   item->operand.length);
+			ptr += item->operand.length;
 			*ptr++ = '\0';
 		}
 		item++;
@@ -736,7 +906,7 @@ tsquerytree(PG_FUNCTION_ARGS)
 	INFIX		nrm;
 	text	   *res;
 	QueryItem  *q;
-	int4		len;
+	int			len;
 
 	if (query->size == 0)
 	{
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c
index 7991a4ad198c2d838aef08f5e641360d531df848..22e6f7c8198918aac94ef73273fc9f90ef2bea77 100644
--- a/src/backend/utils/adt/tsquery_cleanup.c
+++ b/src/backend/utils/adt/tsquery_cleanup.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -35,20 +35,23 @@ maketree(QueryItem * in)
 
 	node->valnode = in;
 	node->right = node->left = NULL;
-	if (in->type == OPR)
+	if (in->type == QI_OPR)
 	{
 		node->right = maketree(in + 1);
-		if (in->val != (int4) '!')
-			node->left = maketree(in + in->left);
+		if (in->operator.oper != OP_NOT)
+			node->left = maketree(in + in->operator.left);
 	}
 	return node;
 }
 
+/*
+ * Internal state for plaintree and plainnode
+ */
 typedef struct
 {
 	QueryItem  *ptr;
-	int4		len;
-	int4		cur;
+	int		len; /* allocated size of ptr */
+	int		cur; /* number of elements in ptr */
 } PLAINTREE;
 
 static void
@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node)
 		state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
 	}
 	memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
-	if (node->valnode->type == VAL)
+	if (node->valnode->type == QI_VAL)
 		state->cur++;
-	else if (node->valnode->val == (int4) '!')
+	else if (node->valnode->operator.oper == OP_NOT)
 	{
-		state->ptr[state->cur].left = 1;
+		state->ptr[state->cur].operator.left = 1;
 		state->cur++;
 		plainnode(state, node->right);
 	}
 	else
 	{
-		int4		cur = state->cur;
+		int	cur = state->cur;
 
 		state->cur++;
 		plainnode(state, node->right);
-		state->ptr[cur].left = state->cur - cur;
+		state->ptr[cur].operator.left = state->cur - cur;
 		plainnode(state, node->left);
 	}
 	pfree(node);
 }
 
 /*
- * make plain view of tree from 'normal' view of tree
+ * make plain view of tree from a NODE-tree representation
  */
 static QueryItem *
-plaintree(NODE * root, int4 *len)
+plaintree(NODE * root, int *len)
 {
 	PLAINTREE	pl;
 
 	pl.cur = 0;
 	pl.len = 16;
-	if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+	if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
 	{
 		pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
 		plainnode(&pl, root);
@@ -122,17 +125,17 @@ freetree(NODE * node)
 static NODE *
 clean_NOT_intree(NODE * node)
 {
-	if (node->valnode->type == VAL)
+	if (node->valnode->type == QI_VAL)
 		return node;
 
-	if (node->valnode->val == (int4) '!')
+	if (node->valnode->operator.oper == OP_NOT)
 	{
 		freetree(node);
 		return NULL;
 	}
 
 	/* operator & or | */
-	if (node->valnode->val == (int4) '|')
+	if (node->valnode->operator.oper == OP_OR)
 	{
 		if ((node->left = clean_NOT_intree(node->left)) == NULL ||
 			(node->right = clean_NOT_intree(node->right)) == NULL)
@@ -144,6 +147,8 @@ clean_NOT_intree(NODE * node)
 	else
 	{
 		NODE	   *res = node;
+		
+		Assert(node->valnode->operator.oper == OP_AND);
 
 		node->left = clean_NOT_intree(node->left);
 		node->right = clean_NOT_intree(node->right);
@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node)
 }
 
 QueryItem *
-clean_NOT(QueryItem * ptr, int4 *len)
+clean_NOT(QueryItem * ptr, int *len)
 {
 	NODE	   *root = maketree(ptr);
 
@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len)
 #undef V_UNKNOWN
 #endif
 
-#define V_UNKNOWN	0
-#define V_TRUE		1
-#define V_FALSE		2
-#define V_STOP		3
+/*
+ * output values for result output parameter of clean_fakeval_intree
+ */
+#define V_UNKNOWN	0 /* the expression can't be evaluated statically */
+#define V_TRUE		1 /* the expression is always true (not implemented) */
+#define V_FALSE		2 /* the expression is always false (not implemented) */
+#define V_STOP		3 /* the expression is a stop word */
 
 /*
  * Clean query tree from values which is always in
@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result)
 	char		lresult = V_UNKNOWN,
 				rresult = V_UNKNOWN;
 
-	if (node->valnode->type == VAL)
+	if (node->valnode->type == QI_VAL)
 		return node;
-	else if (node->valnode->type == VALSTOP)
+	else 
+	if (node->valnode->type == QI_VALSTOP)
 	{
 		pfree(node);
 		*result = V_STOP;
 		return NULL;
 	}
 
+	Assert(node->valnode->type == QI_OPR);
 
-	if (node->valnode->val == (int4) '!')
+	if (node->valnode->operator.oper == OP_NOT)
 	{
 		node->right = clean_fakeval_intree(node->right, &rresult);
 		if (!node->right)
@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result)
 
 		node->left = clean_fakeval_intree(node->left, &lresult);
 		node->right = clean_fakeval_intree(node->right, &rresult);
+
 		if (lresult == V_STOP && rresult == V_STOP)
 		{
 			freetree(node);
@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result)
 }
 
 QueryItem *
-clean_fakeval(QueryItem * ptr, int4 *len)
+clean_fakeval(QueryItem * ptr, int *len)
 {
 	NODE	   *root = maketree(ptr);
 	char		result = V_UNKNOWN;
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c
index fd97c2796df771580739328c910f3b53cb2342a1..cbf06f7adeb8cc8419a907c9bb642556fa2e4b8a 100644
--- a/src/backend/utils/adt/tsquery_op.c
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS)
 }
 
 static QTNode *
-join_tsqueries(TSQuery a, TSQuery b)
+join_tsqueries(TSQuery a, TSQuery b, int8 operator)
 {
 	QTNode	   *res = (QTNode *) palloc0(sizeof(QTNode));
 
 	res->flags |= QTN_NEEDFREE;
 
 	res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
-	res->valnode->type = OPR;
+	res->valnode->type = QI_OPR;
+	res->valnode->operator.oper = operator;
 
 	res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
 	res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS)
 		PG_RETURN_POINTER(a);
 	}
 
-	res = join_tsqueries(a, b);
-
-	res->valnode->val = '&';
+	res = join_tsqueries(a, b, OP_AND);
 
 	query = QTN2QT(res);
 
@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS)
 		PG_RETURN_POINTER(a);
 	}
 
-	res = join_tsqueries(a, b);
-
-	res->valnode->val = '|';
+	res = join_tsqueries(a, b, OP_OR);
 
 	query = QTN2QT(res);
 
@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS)
 	res->flags |= QTN_NEEDFREE;
 
 	res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
-	res->valnode->type = OPR;
-	res->valnode->val = '!';
+	res->valnode->type = QI_OPR;
+	res->valnode->operator.oper = OP_NOT;
 
 	res->child = (QTNode **) palloc0(sizeof(QTNode *));
 	res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a)
 
 	for (i = 0; i < a->size; i++)
 	{
-		if (ptr->type == VAL)
-			sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
+		if (ptr->type == QI_VAL)
+			sign |= ((TSQuerySign) 1) << (ptr->operand.valcrc % TSQS_SIGLEN);
 		ptr++;
 	}
 
@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
 	for (i = 0; i < ex->size; i++)
 	{
 		iq = GETQUERY(query);
-		if (ie[i].type != VAL)
+		if (ie[i].type != QI_VAL)
 			continue;
 		for (j = 0; j < query->size; j++)
-			if (iq[j].type == VAL && ie[i].val == iq[j].val)
+			if (iq[j].type == QI_VAL && ie[i].operand.valcrc == iq[j].operand.valcrc)
 			{
 				j = query->size + 1;
 				break;
diff --git a/src/backend/utils/adt/tsquery_rewrite.c b/src/backend/utils/adt/tsquery_rewrite.c
index f0d22c644ae702b59a0df740631631aa40307e61..db2fe6c53ef91681bc9d39b98c1249d80ef3e103 100644
--- a/src/backend/utils/adt/tsquery_rewrite.c
+++ b/src/backend/utils/adt/tsquery_rewrite.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,18 +34,26 @@ addone(int *counters, int last, int total)
 	return 1;
 }
 
+/*
+ * If node is equal to ex, replace it with subs. Replacement is actually done
+ * by returning either node or a copy of subs.
+ */
 static QTNode *
 findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
 {
 
-	if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
+	if ((node->sign & ex->sign) != ex->sign || 
+		node->valnode->type != ex->valnode->type)
 		return node;
 
 	if (node->flags & QTN_NOCHANGE)
 		return node;
-
-	if (node->valnode->type == OPR)
+	
+	if (node->valnode->type == QI_OPR)
 	{
+		if (node->valnode->operator.oper != ex->valnode->operator.oper)
+			return node;
+
 		if (node->nchild == ex->nchild)
 		{
 			if (QTNEq(node, ex))
@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
 		}
 		else if (node->nchild > ex->nchild)
 		{
+			/*
+			 * AND and NOT are commutative, so we check if a subset of the
+			 * children match. For example, if tnode is A | B | C, and 
+			 * ex is B | C, we have a match after we convert tnode to
+			 * A | (B | C).
+			 */
 			int		   *counters = (int *) palloc(sizeof(int) * node->nchild);
 			int			i;
 			QTNode	   *tnode = (QTNode *) palloc(sizeof(QTNode));
@@ -131,19 +145,26 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
 			pfree(counters);
 		}
 	}
-	else if (QTNEq(node, ex))
+	else 
 	{
-		QTNFree(node);
-		if (subs)
-		{
-			node = QTNCopy(subs);
-			node->flags |= QTN_NOCHANGE;
-		}
-		else
+		Assert(node->valnode->type == QI_VAL);
+
+		if (node->valnode->operand.valcrc != ex->valnode->operand.valcrc)
+			return node;
+		else if (QTNEq(node, ex))
 		{
-			node = NULL;
+			QTNFree(node);
+			if (subs)
+			{
+				node = QTNCopy(subs);
+				node->flags |= QTN_NOCHANGE;
+			}
+			else
+			{
+				node = NULL;
+			}
+			*isfind = true;
 		}
-		*isfind = true;
 	}
 
 	return node;
@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
 {
 	root = findeq(root, ex, subs, isfind);
 
-	if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
+	if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
 	{
 		int			i;
 
@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root)
 	if (!root)
 		return NULL;
 
-	if (root->valnode->type == OPR)
+	if (root->valnode->type == QI_OPR)
 	{
 		int			i,
 					j = 0;
@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root)
 
 		root->nchild = j;
 
-		if (root->valnode->val == (int4) '!' && root->nchild == 0)
+		if (root->valnode->operator.oper == OP_NOT && root->nchild == 0)
 		{
 			QTNFree(root);
 			root = NULL;
@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS)
 		elog(ERROR, "array must be one-dimensional, not %d dimensions",
 			 ARR_NDIM(qa));
 	if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
-		elog(ERROR, "array should have only three elements");
+		elog(ERROR, "array must have three elements");
 	if (ARR_ELEMTYPE(qa) != TSQUERYOID)
-		elog(ERROR, "array should contain tsquery type");
+		elog(ERROR, "array must contain tsquery elements");
 
 	deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
 
@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
 		subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
 
 	tree = findsubquery(tree, qex, subs, NULL);
+
 	QTNFree(qex);
 	QTNFree(subs);
 
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c
index ae8cc318da93b340c16fa5ec441ade75b5e44c9d..e378661488bd8604958c9802b18b4c51333ad6f5 100644
--- a/src/backend/utils/adt/tsquery_util.c
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,7 +17,6 @@
 #include "tsearch/ts_type.h"
 #include "tsearch/ts_utils.h"
 
-
 QTNode *
 QT2QTN(QueryItem * in, char *operand)
 {
@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand)
 
 	node->valnode = in;
 
-	if (in->type == OPR)
+	if (in->type == QI_OPR)
 	{
 		node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
 		node->child[0] = QT2QTN(in + 1, operand);
 		node->sign = node->child[0]->sign;
-		if (in->val == (int4) '!')
+		if (in->operator.oper == OP_NOT)
 			node->nchild = 1;
 		else
 		{
 			node->nchild = 2;
-			node->child[1] = QT2QTN(in + in->left, operand);
+			node->child[1] = QT2QTN(in + in->operator.left, operand);
 			node->sign |= node->child[1]->sign;
 		}
 	}
 	else if (operand)
 	{
-		node->word = operand + in->distance;
-		node->sign = 1 << (in->val % 32);
+		node->word = operand + in->operand.distance;
+		node->sign = 1 << (in->operand.valcrc % 32);
 	}
 
 	return node;
@@ -54,14 +53,14 @@ QTNFree(QTNode * in)
 	if (!in)
 		return;
 
-	if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
+	if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
 		pfree(in->word);
 
 	if (in->child)
 	{
 		if (in->valnode)
 		{
-			if (in->valnode->type == OPR && in->nchild > 0)
+			if (in->valnode->type == QI_OPR && in->nchild > 0)
 			{
 				int			i;
 
@@ -82,30 +81,45 @@ QTNodeCompare(QTNode * an, QTNode * bn)
 {
 	if (an->valnode->type != bn->valnode->type)
 		return (an->valnode->type > bn->valnode->type) ? -1 : 1;
-	else if (an->valnode->val != bn->valnode->val)
-		return (an->valnode->val > bn->valnode->val) ? -1 : 1;
-	else if (an->valnode->type == VAL)
-	{
-		if (an->valnode->length == bn->valnode->length)
-			return strncmp(an->word, bn->word, an->valnode->length);
-		else
-			return (an->valnode->length > bn->valnode->length) ? -1 : 1;
-	}
-	else if (an->nchild != bn->nchild)
+	
+	if (an->valnode->type == QI_OPR)
 	{
-		return (an->nchild > bn->nchild) ? -1 : 1;
+		QueryOperator *ao = &an->valnode->operator;
+		QueryOperator *bo = &bn->valnode->operator;
+
+		if(ao->oper != bo->oper)
+			return (ao->oper > bo->oper) ? -1 : 1;
+
+		if (an->nchild != bn->nchild)
+			return (an->nchild > bn->nchild) ? -1 : 1;
+
+		{
+			int			i,
+						res;
+
+			for (i = 0; i < an->nchild; i++)
+				if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
+					return res;
+		}
+		return 0;
 	}
 	else
 	{
-		int			i,
-					res;
+		QueryOperand *ao = &an->valnode->operand;
+		QueryOperand *bo = &bn->valnode->operand;
 
-		for (i = 0; i < an->nchild; i++)
-			if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
-				return res;
-	}
+		Assert(an->valnode->type == QI_VAL);
+
+		if (ao->valcrc != bo->valcrc)
+		{
+			return (ao->valcrc > bo->valcrc) ? -1 : 1;
+		}
 
-	return 0;
+		if (ao->length == bo->length)
+			return strncmp(an->word, bn->word, ao->length);
+		else
+			return (ao->length > bo->length) ? -1 : 1;
+	}
 }
 
 static int
@@ -119,7 +133,7 @@ QTNSort(QTNode * in)
 {
 	int			i;
 
-	if (in->valnode->type != OPR)
+	if (in->valnode->type != QI_OPR)
 		return;
 
 	for (i = 0; i < in->nchild; i++)
@@ -139,12 +153,19 @@ QTNEq(QTNode * a, QTNode * b)
 	return (QTNodeCompare(a, b) == 0) ? true : false;
 }
 
+/*
+ * Remove unnecessary intermediate nodes. For example:
+ *
+ *  OR          OR
+ * a  OR    -> a b c
+ *   b  c      
+ */
 void
 QTNTernary(QTNode * in)
 {
 	int			i;
 
-	if (in->valnode->type != OPR)
+	if (in->valnode->type != QI_OPR)
 		return;
 
 	for (i = 0; i < in->nchild; i++)
@@ -152,9 +173,10 @@ QTNTernary(QTNode * in)
 
 	for (i = 0; i < in->nchild; i++)
 	{
-		if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
+		QTNode	   *cc = in->child[i];
+
+		if (cc->valnode->type == QI_OPR && in->valnode->operator.oper == cc->valnode->operator.oper)
 		{
-			QTNode	   *cc = in->child[i];
 			int			oldnchild = in->nchild;
 
 			in->nchild += cc->nchild - 1;
@@ -167,17 +189,23 @@ QTNTernary(QTNode * in)
 			memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
 			i += cc->nchild - 1;
 
+			if(cc->flags & QTN_NEEDFREE)
+				pfree(cc->valnode);
 			pfree(cc);
 		}
 	}
 }
 
+/*
+ * Convert a tree to binary tree by inserting intermediate nodes.
+ * (Opposite of QTNTernary)
+ */
 void
 QTNBinary(QTNode * in)
 {
 	int			i;
 
-	if (in->valnode->type != OPR)
+	if (in->valnode->type != QI_OPR)
 		return;
 
 	for (i = 0; i < in->nchild; i++)
@@ -201,7 +229,7 @@ QTNBinary(QTNode * in)
 		nn->sign = nn->child[0]->sign | nn->child[1]->sign;
 
 		nn->valnode->type = in->valnode->type;
-		nn->valnode->val = in->valnode->val;
+		nn->valnode->operator.oper = in->valnode->operator.oper;
 
 		in->child[0] = nn;
 		in->child[1] = in->child[in->nchild - 1];
@@ -209,11 +237,15 @@ QTNBinary(QTNode * in)
 	}
 }
 
+/*
+ * Count the total length of operand string in tree, including '\0'-
+ * terminators.
+ */
 static void
-cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
+cntsize(QTNode * in, int *sumlen, int *nnode)
 {
 	*nnode += 1;
-	if (in->valnode->type == OPR)
+	if (in->valnode->type == QI_OPR)
 	{
 		int			i;
 
@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
 	}
 	else
 	{
-		*sumlen += in->valnode->length + 1;
+		*sumlen += in->valnode->operand.length + 1;
 	}
 }
 
@@ -234,22 +266,26 @@ typedef struct
 } QTN2QTState;
 
 static void
-fillQT(QTN2QTState * state, QTNode * in)
+fillQT(QTN2QTState *state, QTNode *in)
 {
-	*(state->curitem) = *(in->valnode);
-
-	if (in->valnode->type == VAL)
+	if (in->valnode->type == QI_VAL)
 	{
-		memcpy(state->curoperand, in->word, in->valnode->length);
-		state->curitem->distance = state->curoperand - state->operand;
-		state->curoperand[in->valnode->length] = '\0';
-		state->curoperand += in->valnode->length + 1;
+		memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
+
+		memcpy(state->curoperand, in->word, in->valnode->operand.length);
+		state->curitem->operand.distance = state->curoperand - state->operand;
+		state->curoperand[in->valnode->operand.length] = '\0';
+		state->curoperand += in->valnode->operand.length + 1;
 		state->curitem++;
 	}
 	else
 	{
 		QueryItem  *curitem = state->curitem;
 
+		Assert(in->valnode->type == QI_OPR);
+
+		memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
+
 		Assert(in->nchild <= 2);
 		state->curitem++;
 
@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in)
 
 		if (in->nchild == 2)
 		{
-			curitem->left = state->curitem - curitem;
+			curitem->operator.left = state->curitem - curitem;
 			fillQT(state, in->child[1]);
 		}
 	}
@@ -296,11 +332,11 @@ QTNCopy(QTNode *in)
 	*(out->valnode) = *(in->valnode);
 	out->flags |= QTN_NEEDFREE;
 
-	if (in->valnode->type == VAL)
+	if (in->valnode->type == QI_VAL)
 	{
-		out->word = palloc(in->valnode->length + 1);
-		memcpy(out->word, in->word, in->valnode->length);
-		out->word[in->valnode->length] = '\0';
+		out->word = palloc(in->valnode->operand.length + 1);
+		memcpy(out->word, in->word, in->valnode->operand.length);
+		out->word[in->valnode->operand.length] = '\0';
 		out->flags |= QTN_WORDFREE;
 	}
 	else
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 8b2ab884c8cb6082787e217db6fe587b2d86c05e..d48e9b4a470be27d2843789fe27487c978e4e796 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -68,7 +68,7 @@ cnt_length(TSVector t)
 }
 
 static int4
-WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
+WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
 {
 	if (ptr->len == item->length)
 		return strncmp(
@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
 }
 
 static WordEntry *
-find_wordentry(TSVector t, TSQuery q, QueryItem * item)
+find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
 {
 	WordEntry  *StopLow = ARRPTR(t);
 	WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item)
 }
 
 
+/*
+ * sort QueryOperands by (length, word)
+ */
 static int
-compareQueryItem(const void *a, const void *b, void *arg)
+compareQueryOperand(const void *a, const void *b, void *arg)
 {
 	char	   *operand = (char *) arg;
+	QueryOperand *qa = (*(QueryOperand **) a);
+	QueryOperand *qb = (*(QueryOperand **) b);
 
-	if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
-		return strncmp(operand + (*(QueryItem **) a)->distance,
-					   operand + (*(QueryItem **) b)->distance,
-					   (*(QueryItem **) b)->length);
+	if (qa->length == qb->length)
+		return strncmp(operand + qa->distance,
+					   operand + qb->distance,
+					   qb->length);
 
-	return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
+	return (qa->length > qb->length) ? 1 : -1;
 }
 
-static QueryItem **
-SortAndUniqItems(char *operand, QueryItem * item, int *size)
+/*
+ * Returns a sorted, de-duplicated array of QueryOperands in a query.
+ * The returned QueryOperands are pointers to the original QueryOperands
+ * in the query.
+ *
+ * Length of the returned array is stored in *size
+ */
+static QueryOperand **
+SortAndUniqItems(TSQuery q, int *size)
 {
-	QueryItem **res,
+	char *operand = GETOPERAND(q);
+	QueryItem * item = GETQUERY(q);
+	QueryOperand **res,
 			  **ptr,
 			  **prevptr;
 
-	ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
+	ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
 
+	/* Collect all operands from the tree to res */
 	while ((*size)--)
 	{
-		if (item->type == VAL)
+		if (item->type == QI_VAL)
 		{
-			*ptr = item;
+			*ptr = (QueryOperand *) item;
 			ptr++;
 		}
 		item++;
@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size)
 	if (*size < 2)
 		return res;
 
-	qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
+	qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
 
 	ptr = res + 1;
 	prevptr = res;
 
+	/* remove duplicates */
 	while (ptr - res < *size)
 	{
-		if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
+		if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
 		{
 			prevptr++;
 			*prevptr = *ptr;
@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
 				lenct,
 				dist;
 	float		res = -1.0;
-	QueryItem **item;
+	QueryOperand **item;
 	int			size = q->size;
 
-	item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+	item = SortAndUniqItems(q, &size);
 	if (size < 2)
 	{
 		pfree(item);
@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
 				j,
 				i;
 	float		res = 0.0;
-	QueryItem **item;
+	QueryOperand **item;
 	int			size = q->size;
 
 	*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
-	item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+	item = SortAndUniqItems(q, &size);
 
 	for (i = 0; i < size; i++)
 	{
@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method)
 	if (!t->size || !q->size)
 		return 0.0;
 
-	res = (item->type != VAL && item->val == (int4) '&') ?
+	/* XXX: What about NOT? */
+	res = (item->type == QI_OPR && item->operator.oper == OP_AND) ?
 		calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
 
 	if (res < 0)
@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b)
 }
 
 static bool
-checkcondition_QueryItem(void *checkval, QueryItem * val)
+checkcondition_QueryOperand(void *checkval, QueryOperand *val)
 {
 	return (bool) (val->istrue);
 }
@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query)
 	/* reset istrue flag */
 	for (i = 0; i < query->size; i++)
 	{
-		if (item->type == VAL)
-			item->istrue = 0;
+		if (item->type == QI_VAL)
+			item->operand.istrue = 0;
 		item++;
 	}
 }
@@ -484,7 +501,7 @@ typedef struct
 
 
 static bool
-Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
+Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
 {
 	DocRepresentation *ptr;
 	int			lastpos = ext->pos;
@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
 	while (ptr - doc < len)
 	{
 		for (i = 0; i < ptr->nitem; i++)
-			ptr->item[i]->istrue = 1;
-		if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
+		{
+			if(ptr->item[i]->type == QI_VAL)
+				ptr->item[i]->operand.istrue = 1;
+		}
+		if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryOperand))
 		{
 			if (ptr->pos > ext->q)
 			{
@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
 	while (ptr >= doc + ext->pos)
 	{
 		for (i = 0; i < ptr->nitem; i++)
-			ptr->item[i]->istrue = 1;
-		if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
+			if(ptr->item[i]->type  == QI_VAL) /* XXX */
+				ptr->item[i]->operand.istrue = 1;
+		if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
 		{
 			if (ptr->pos < ext->p)
 			{
@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 
 	for (i = 0; i < query->size; i++)
 	{
-		if (item[i].type != VAL || item[i].istrue)
+		QueryOperand *curoperand;
+
+		if (item[i].type != QI_VAL)
+			continue;
+		
+		curoperand = &item[i].operand;
+		
+		if(item[i].operand.istrue)
 			continue;
 
-		entry = find_wordentry(txt, query, &(item[i]));
+		entry = find_wordentry(txt, query, curoperand);
 		if (!entry)
 			continue;
 
@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 		{
 			if (j == 0)
 			{
-				QueryItem  *kptr,
-						   *iptr = item + i;
 				int			k;
 
 				doc[cur].needfree = false;
@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 
 				for (k = 0; k < query->size; k++)
 				{
-					kptr = item + k;
+					QueryOperand *kptr = &item[k].operand;
+					QueryOperand *iptr = &item[i].operand;
+
 					if (k == i ||
-						(item[k].type == VAL &&
-						 compareQueryItem(&kptr, &iptr, operand) == 0))
+						(item[k].type == QI_VAL &&
+						 compareQueryOperand(&kptr, &iptr, operand) == 0))
 					{
+						/* if k == i, we've already checked above that it's type == Q_VAL */
 						doc[cur].item[doc[cur].nitem] = item + k;
 						doc[cur].nitem++;
-						kptr->istrue = 1;
+						item[k].operand.istrue = 1;
 					}
 				}
 			}
@@ -640,8 +669,7 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
 
 	if (cur > 0)
 	{
-		if (cur > 1)
-			qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+		qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
 		return doc;
 	}
 
@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS)
 {
 	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
 	TSVector	txt = PG_GETARG_TSVECTOR(1);
-	TSQuery		query = PG_GETARG_TSQUERY_COPY(2);
+	TSQuery		query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
 	int			method = PG_GETARG_INT32(3);
 	float		res;
 
@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS)
 {
 	ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
 	TSVector	txt = PG_GETARG_TSVECTOR(1);
-	TSQuery		query = PG_GETARG_TSQUERY_COPY(2);
+	TSQuery		query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
 	float		res;
 
 	res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
@@ -778,7 +806,7 @@ Datum
 ts_rankcd_ttf(PG_FUNCTION_ARGS)
 {
 	TSVector	txt = PG_GETARG_TSVECTOR(0);
-	TSQuery		query = PG_GETARG_TSQUERY_COPY(1);
+	TSQuery		query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
 	int			method = PG_GETARG_INT32(2);
 	float		res;
 
@@ -793,7 +821,7 @@ Datum
 ts_rankcd_tt(PG_FUNCTION_ARGS)
 {
 	TSVector	txt = PG_GETARG_TSVECTOR(0);
-	TSQuery		query = PG_GETARG_TSQUERY_COPY(1);
+	TSQuery		query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
 	float		res;
 
 	res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 8ab024650f72a6887bbe4a17453b4decd9115a16..2866e028da02b778c7592d909524b90413f2d612 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.2 2007/08/21 01:45:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,22 +20,37 @@
 #include "tsearch/ts_utils.h"
 #include "utils/memutils.h"
 
+typedef struct
+{
+	WordEntry	entry;			/* should be first ! */
+	WordEntryPos *pos;
+	int			poslen;			/* number of elements in pos */
+} WordEntryIN;
 
 static int
 comparePos(const void *a, const void *b)
 {
-	if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
+	int apos = WEP_GETPOS(*(WordEntryPos *) a);
+	int bpos = WEP_GETPOS(*(WordEntryPos *) b);
+
+	if (apos == bpos)
 		return 0;
-	return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
+	return (apos > bpos) ? 1 : -1;
 }
 
+/*
+ * Removes duplicate pos entries. If there's two entries with same pos
+ * but different weight, the higher weight is retained.
+ *
+ * Returns new length.
+ */
 static int
-uniquePos(WordEntryPos * a, int4 l)
+uniquePos(WordEntryPos * a, int l)
 {
 	WordEntryPos *ptr,
 			   *res;
 
-	if (l == 1)
+	if (l <= 1)
 		return l;
 
 	res = a;
@@ -75,21 +90,23 @@ compareentry(const void *a, const void *b, void *arg)
 }
 
 static int
-uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
 {
 	WordEntryIN *ptr,
 			   *res;
 
-	res = a;
+	Assert(l >= 1);
+
 	if (l == 1)
 	{
 		if (a->entry.haspos)
 		{
-			*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
-			*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
+			a->poslen = uniquePos(a->pos, a->poslen);
+			*outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
 		}
 		return l;
 	}
+	res = a;
 
 	ptr = a + 1;
 	qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
@@ -101,8 +118,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
 		{
 			if (res->entry.haspos)
 			{
-				*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
-				*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+				res->poslen = uniquePos(res->pos, res->poslen);
+				*outbuflen += res->poslen * sizeof(WordEntryPos);
 			}
 			*outbuflen += SHORTALIGN(res->entry.len);
 			res++;
@@ -112,12 +129,14 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
 		{
 			if (res->entry.haspos)
 			{
-				int4		len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
+				int	newlen = ptr->poslen + res->poslen;
+
+				/* Append res to pos */
 
-				res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
-				memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
-					   &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
-				*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
+				res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos));
+				memcpy(&res->pos[res->poslen],
+					   ptr->pos, ptr->poslen * sizeof(WordEntryPos));
+				res->poslen = newlen;
 				pfree(ptr->pos);
 			}
 			else
@@ -130,8 +149,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
 	}
 	if (res->entry.haspos)
 	{
-		*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
-		*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+		res->poslen = uniquePos(res->pos, res->poslen);
+		*outbuflen += res->poslen * sizeof(WordEntryPos);
 	}
 	*outbuflen += SHORTALIGN(res->entry.len);
 
@@ -144,248 +163,6 @@ WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
 	return compareentry(a, b, buf);
 }
 
-#define WAITWORD		1
-#define WAITENDWORD		2
-#define WAITNEXTCHAR	3
-#define WAITENDCMPLX	4
-#define WAITPOSINFO		5
-#define INPOSINFO		6
-#define WAITPOSDELIM	7
-#define WAITCHARCMPLX	8
-
-#define RESIZEPRSBUF \
-do { \
-	if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
-	{ \
-		int4 clen = state->curpos - state->word; \
-		state->len *= 2; \
-		state->word = (char*)repalloc( (void*)state->word, state->len ); \
-		state->curpos = state->word + clen; \
-	} \
-} while (0)
-
-bool
-gettoken_tsvector(TSVectorParseState *state)
-{
-	int4		oldstate = 0;
-
-	state->curpos = state->word;
-	state->state = WAITWORD;
-	state->alen = 0;
-
-	while (1)
-	{
-		if (state->state == WAITWORD)
-		{
-			if (*(state->prsbuf) == '\0')
-				return false;
-			else if (t_iseq(state->prsbuf, '\''))
-				state->state = WAITENDCMPLX;
-			else if (t_iseq(state->prsbuf, '\\'))
-			{
-				state->state = WAITNEXTCHAR;
-				oldstate = WAITENDWORD;
-			}
-			else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("syntax error in tsvector")));
-			else if (!t_isspace(state->prsbuf))
-			{
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-				state->state = WAITENDWORD;
-			}
-		}
-		else if (state->state == WAITNEXTCHAR)
-		{
-			if (*(state->prsbuf) == '\0')
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("there is no escaped character")));
-			else
-			{
-				RESIZEPRSBUF;
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-				state->state = oldstate;
-			}
-		}
-		else if (state->state == WAITENDWORD)
-		{
-			if (t_iseq(state->prsbuf, '\\'))
-			{
-				state->state = WAITNEXTCHAR;
-				oldstate = WAITENDWORD;
-			}
-			else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
-					 (state->oprisdelim && ISOPERATOR(state->prsbuf)))
-			{
-				RESIZEPRSBUF;
-				if (state->curpos == state->word)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				*(state->curpos) = '\0';
-				return true;
-			}
-			else if (t_iseq(state->prsbuf, ':'))
-			{
-				if (state->curpos == state->word)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				*(state->curpos) = '\0';
-				if (state->oprisdelim)
-					return true;
-				else
-					state->state = INPOSINFO;
-			}
-			else
-			{
-				RESIZEPRSBUF;
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-			}
-		}
-		else if (state->state == WAITENDCMPLX)
-		{
-			if (t_iseq(state->prsbuf, '\''))
-			{
-				state->state = WAITCHARCMPLX;
-			}
-			else if (t_iseq(state->prsbuf, '\\'))
-			{
-				state->state = WAITNEXTCHAR;
-				oldstate = WAITENDCMPLX;
-			}
-			else if (*(state->prsbuf) == '\0')
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("syntax error in tsvector")));
-			else
-			{
-				RESIZEPRSBUF;
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-			}
-		}
-		else if (state->state == WAITCHARCMPLX)
-		{
-			if (t_iseq(state->prsbuf, '\''))
-			{
-				RESIZEPRSBUF;
-				COPYCHAR(state->curpos, state->prsbuf);
-				state->curpos += pg_mblen(state->prsbuf);
-				state->state = WAITENDCMPLX;
-			}
-			else
-			{
-				RESIZEPRSBUF;
-				*(state->curpos) = '\0';
-				if (state->curpos == state->word)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				if (state->oprisdelim)
-				{
-					/* state->prsbuf+=pg_mblen(state->prsbuf); */
-					return true;
-				}
-				else
-					state->state = WAITPOSINFO;
-				continue;		/* recheck current character */
-			}
-		}
-		else if (state->state == WAITPOSINFO)
-		{
-			if (t_iseq(state->prsbuf, ':'))
-				state->state = INPOSINFO;
-			else
-				return true;
-		}
-		else if (state->state == INPOSINFO)
-		{
-			if (t_isdigit(state->prsbuf))
-			{
-				if (state->alen == 0)
-				{
-					state->alen = 4;
-					state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
-					*(uint16 *) (state->pos) = 0;
-				}
-				else if (*(uint16 *) (state->pos) + 1 >= state->alen)
-				{
-					state->alen *= 2;
-					state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
-				}
-				(*(uint16 *) (state->pos))++;
-				WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
-				if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("wrong position info in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
-				state->state = WAITPOSDELIM;
-			}
-			else
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("syntax error in tsvector")));
-		}
-		else if (state->state == WAITPOSDELIM)
-		{
-			if (t_iseq(state->prsbuf, ','))
-				state->state = INPOSINFO;
-			else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
-			{
-				if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
-			}
-			else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
-			{
-				if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
-			}
-			else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
-			{
-				if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
-			}
-			else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
-			{
-				if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
-							 errmsg("syntax error in tsvector")));
-				WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
-			}
-			else if (t_isspace(state->prsbuf) ||
-					 *(state->prsbuf) == '\0')
-				return true;
-			else if (!t_isdigit(state->prsbuf))
-				ereport(ERROR,
-						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("syntax error in tsvector")));
-		}
-		else					/* internal error */
-			elog(ERROR, "internal error in gettoken_tsvector");
-
-		/* get next char */
-		state->prsbuf += pg_mblen(state->prsbuf);
-	}
-
-	return false;
-}
 
 Datum
 tsvectorin(PG_FUNCTION_ARGS)
@@ -393,70 +170,82 @@ tsvectorin(PG_FUNCTION_ARGS)
 	char	   *buf = PG_GETARG_CSTRING(0);
 	TSVectorParseState state;
 	WordEntryIN *arr;
+	int			totallen;
+	int			arrlen;  /* allocated size of arr */
 	WordEntry  *inarr;
-	int4		len = 0,
-				totallen = 64;
+	int			len = 0;
 	TSVector	in;
-	char	   *tmpbuf,
-			   *cur;
-	int4		i,
-				buflen = 256;
+	int			i;
+	char	   *token;
+	int			toklen;
+	WordEntryPos *pos;
+	int			poslen;
+
+	/*
+	 * Tokens are appended to tmpbuf, cur is a pointer
+	 * to the end of used space in tmpbuf.
+	 */
+	char	   *tmpbuf;
+	char	   *cur;
+	int			buflen = 256; /* allocated size of tmpbuf */
 
 	pg_verifymbstr(buf, strlen(buf), false);
-	state.prsbuf = buf;
-	state.len = 32;
-	state.word = (char *) palloc(state.len);
-	state.oprisdelim = false;
 
-	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+	state = init_tsvector_parser(buf, false);
+	
+	arrlen = 64;
+	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
 	cur = tmpbuf = (char *) palloc(buflen);
 
-	while (gettoken_tsvector(&state))
+	while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
 	{
-		/*
-		 * Realloc buffers if it's needed
-		 */
-		if (len >= totallen)
-		{
-			totallen *= 2;
-			arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
-		}
-
-		while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
-		{
-			int4		dist = cur - tmpbuf;
-
-			buflen *= 2;
-			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
-			cur = tmpbuf + dist;
-		}
 
-		if (state.curpos - state.word >= MAXSTRLEN)
+		if (toklen >= MAXSTRLEN)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("word is too long (%ld bytes, max %ld bytes)",
-							(long) (state.curpos - state.word),
+							(long) toklen,
 							(long) MAXSTRLEN)));
 
-		arr[len].entry.len = state.curpos - state.word;
+
 		if (cur - tmpbuf > MAXSTRPOS)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("position value too large")));
+
+		/*
+		 * Enlarge buffers if needed
+		 */
+		if (len >= arrlen)
+		{
+			arrlen *= 2;
+			arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
+		}
+		while ((cur - tmpbuf) + toklen >= buflen)
+		{
+			int	dist = cur - tmpbuf;
+
+			buflen *= 2;
+			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+			cur = tmpbuf + dist;
+		}
+		arr[len].entry.len = toklen;
 		arr[len].entry.pos = cur - tmpbuf;
-		memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
-		cur += arr[len].entry.len;
+		memcpy((void *) cur, (void *) token, toklen);
+		cur += toklen;
 
-		if (state.alen)
+		if (poslen != 0)
 		{
 			arr[len].entry.haspos = 1;
-			arr[len].pos = state.pos;
+			arr[len].pos = pos;
+			arr[len].poslen = poslen;
 		}
 		else
 			arr[len].entry.haspos = 0;
 		len++;
 	}
-	pfree(state.word);
+
+	close_tsvector_parser(state);
 
 	if (len > 0)
 		len = uniqueentry(arr, len, tmpbuf, &buflen);
@@ -476,8 +265,21 @@ tsvectorin(PG_FUNCTION_ARGS)
 		cur += SHORTALIGN(arr[i].entry.len);
 		if (arr[i].entry.haspos)
 		{
-			memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
-			cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
+			uint16 tmplen;
+
+			if(arr[i].poslen > 0xFFFF)
+				elog(ERROR, "positions array too long");
+
+			tmplen = (uint16) arr[i].poslen;
+
+			/* Copy length to output struct */
+			memcpy(cur, &tmplen, sizeof(uint16));
+			cur += sizeof(uint16);
+
+			/* Copy positions */
+			memcpy(cur, arr[i].pos, (arr[i].poslen) * sizeof(WordEntryPos));
+			cur += arr[i].poslen * sizeof(WordEntryPos);
+
 			pfree(arr[i].pos);
 		}
 		inarr[i] = arr[i].entry;
@@ -604,26 +406,26 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 	TSVector	vec;
-	int			i,
-				size,
-				len = DATAHDRSIZE;
+	int			i;
+	uint32		size;
 	WordEntry  *weptr;
 	int			datalen = 0;
+	Size		len;
 
 	size = pq_getmsgint(buf, sizeof(uint32));
 	if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
 		elog(ERROR, "invalid size of tsvector");
 
-	len += sizeof(WordEntry) * size;
+	len = DATAHDRSIZE + sizeof(WordEntry) * size;
 
-	len *= 2;
+	len = len * 2; /* times two to make room for lexemes */
 	vec = (TSVector) palloc0(len);
 	vec->size = size;
 
 	weptr = ARRPTR(vec);
 	for (i = 0; i < size; i++)
 	{
-		int			tmp;
+		int32 tmp;
 
 		weptr = ARRPTR(vec) + i;
 
@@ -654,7 +456,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 						npos;
 			WordEntryPos *wepptr;
 
-			npos = (uint16) pq_getmsgint(buf, sizeof(int16));
+			npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
 			if (npos > MAXNUMPOS)
 				elog(ERROR, "unexpected number of positions");
 
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 8567172c64f6f3c7330fff535dd0ba5c1d893eae..d34ab1fcf0bd68872f76fb7043cf8b442f1c4921 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,6 +66,9 @@ typedef struct
 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
 
 
+/*
+ * Order: haspos, len, word, for all positions (pos, weight)
+ */
 static int
 silly_cmp_tsvector(const TSVector a, const TSVector b)
 {
@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
  * compare 2 string values
  */
 static int4
-ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
+ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
 {
 	if (ptr->len == item->length)
 		return strncmp(
@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
  * check weight info
  */
 static bool
-checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
+checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
 {
 	WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
 	uint16		len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
  * is there value 'val' in array or not ?
  */
 static bool
-checkcondition_str(void *checkval, QueryItem * val)
+checkcondition_str(void *checkval, QueryOperand * val)
 {
-	WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
-	WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+	CHKVAL *chkval = (CHKVAL *) checkval;
+	WordEntry  *StopLow = chkval->arrb;
+	WordEntry  *StopHigh = chkval->arre;
 	WordEntry  *StopMiddle;
 	int			difference;
 
@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val)
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-		difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+		difference = ValCompare(chkval, StopMiddle, val);
 		if (difference == 0)
 			return (val->weight && StopMiddle->haspos) ?
-				checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
+				checkclass_str(chkval, StopMiddle, val) : true;
 		else if (difference < 0)
 			StopLow = StopMiddle + 1;
 		else
@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val)
 }
 
 /*
- * check for boolean condition
+ * check for boolean condition.
+ *
+ * if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
+ * checkval can be used to pass information to the callback. TS_execute doesn't
+ * do anything with it.
+ * chkcond is a callback function used to evaluate each VAL node in the query.
+ *
  */
 bool
 TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
-		   bool (*chkcond) (void *checkval, QueryItem * val))
+		   bool (*chkcond) (void *checkval, QueryOperand * val))
 {
 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();
 
-	if (curitem->type == VAL)
-		return chkcond(checkval, curitem);
-	else if (curitem->val == (int4) '!')
-	{
-		return (calcnot) ?
-			!TS_execute(curitem + 1, checkval, calcnot, chkcond)
-			: true;
-	}
-	else if (curitem->val == (int4) '&')
+	if (curitem->type == QI_VAL)
+		return chkcond(checkval, (QueryOperand *) curitem);
+
+	switch(curitem->operator.oper)
 	{
-		if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
-			return TS_execute(curitem + 1, checkval, calcnot, chkcond);
-		else
-			return false;
-	}
-	else
-	{							/* |-operator */
-		if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
-			return true;
-		else
-			return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+		case OP_NOT:
+			if (calcnot)
+				return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
+			else
+				return true;
+		case OP_AND:
+			if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
+				return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+			else
+				return false;
+
+		case OP_OR:
+			if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
+				return true;
+			else
+				return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+
+		default:
+			elog(ERROR, "unknown operator %d", curitem->operator.oper);
 	}
+
+	/* not reachable, but keep compiler quiet */
 	return false;
 }
 
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
new file mode 100644
index 0000000000000000000000000000000000000000..26a271679d4cc95addaa9d5a5f75670b127ab790
--- /dev/null
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -0,0 +1,357 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsvector_parser.c
+ *	  Parser for tsvector
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/memutils.h"
+
+struct TSVectorParseStateData
+{
+	char   *prsbuf;
+	char   *word;		/* buffer to hold the current word */
+	int		len;		/* size in bytes allocated for 'word' */
+	bool	oprisdelim;
+};
+
+/*
+ * Initializes parser for the input string. If oprisdelim is set, the
+ * following characters are treated as delimiters in addition to whitespace:
+ * ! | & ( )
+ */
+TSVectorParseState
+init_tsvector_parser(char *input, bool oprisdelim)
+{
+	TSVectorParseState state;
+
+	state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
+	state->prsbuf = input;
+	state->len = 32;
+	state->word = (char *) palloc(state->len);
+	state->oprisdelim = oprisdelim;
+
+	return state;
+}
+
+/*
+ * Reinitializes parser for parsing 'input', instead of previous input.
+ */
+void
+reset_tsvector_parser(TSVectorParseState state, char *input)
+{
+	state->prsbuf = input;	
+}
+
+/*
+ * Shuts down a tsvector parser.
+ */
+void
+close_tsvector_parser(TSVectorParseState state)
+{
+	pfree(state->word);
+	pfree(state);
+}
+
+#define RESIZEPRSBUF \
+do { \
+	if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
+	{ \
+		int clen = curpos - state->word; \
+		state->len *= 2; \
+		state->word = (char*)repalloc( (void*)state->word, state->len ); \
+		curpos = state->word + clen; \
+	} \
+} while (0)
+
+
+#define ISOPERATOR(x)	( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+
+/* Fills the output parameters, and returns true */
+#define RETURN_TOKEN \
+do { \
+	if (pos_ptr != NULL) \
+	{ \
+		*pos_ptr = pos; \
+		*poslen = npos; \
+	} \
+	else if (pos != NULL) \
+		pfree(pos); \
+	\
+	if (strval != NULL) \
+		*strval = state->word; \
+	if (lenval != NULL) \
+		*lenval = curpos - state->word; \
+	if (endptr != NULL) \
+		*endptr = state->prsbuf; \
+	return true; \
+} while(0)
+
+
+/* State codes used in gettoken_tsvector */
+#define WAITWORD		1
+#define WAITENDWORD		2
+#define WAITNEXTCHAR	3
+#define WAITENDCMPLX	4
+#define WAITPOSINFO		5
+#define INPOSINFO		6
+#define WAITPOSDELIM	7
+#define WAITCHARCMPLX	8
+
+/*
+ * Get next token from string being parsed. Returns false if
+ * end of input string is reached, otherwise strval, lenval, pos_ptr
+ * and poslen output parameters are filled in:
+ * 
+ * *strval 		token
+ * *lenval 		length of*strval
+ * *pos_ptr		pointer to a palloc'd array of positions and weights
+ * 				associated with the token. If the caller is not interested
+ *				in the information, NULL can be supplied. Otherwise
+ *				the caller is responsible for pfreeing the array.
+ * *poslen		number of elements in *pos_ptr
+ */
+bool
+gettoken_tsvector(TSVectorParseState state, 
+				  char **strval, int *lenval,
+				  WordEntryPos **pos_ptr, int *poslen,
+				  char **endptr)
+{
+	int	oldstate	= 0;
+	char *curpos	= state->word;
+	int	statecode	= WAITWORD;
+
+	/* pos is for collecting the comma delimited list of positions followed
+	 * by the actual token. 
+	 */
+	WordEntryPos *pos = NULL;
+	int npos		= 0; /* elements of pos used */
+	int posalen		= 0; /* allocated size of pos */
+
+	while (1)
+	{
+		if (statecode == WAITWORD)
+		{
+			if (*(state->prsbuf) == '\0')
+				return false;
+			else if (t_iseq(state->prsbuf, '\''))
+				statecode = WAITENDCMPLX;
+			else if (t_iseq(state->prsbuf, '\\'))
+			{
+				statecode = WAITNEXTCHAR;
+				oldstate = WAITENDWORD;
+			}
+			else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("syntax error in tsvector")));
+			else if (!t_isspace(state->prsbuf))
+			{
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+				statecode = WAITENDWORD;
+			}
+		}
+		else if (statecode == WAITNEXTCHAR)
+		{
+			if (*(state->prsbuf) == '\0')
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("there is no escaped character")));
+			else
+			{
+				RESIZEPRSBUF;
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+				Assert(oldstate != 0);
+				statecode = oldstate;
+			}
+		}
+		else if (statecode == WAITENDWORD)
+		{
+			if (t_iseq(state->prsbuf, '\\'))
+			{
+				statecode = WAITNEXTCHAR;
+				oldstate = WAITENDWORD;
+			}
+			else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+					 (state->oprisdelim && ISOPERATOR(state->prsbuf)))
+			{
+				RESIZEPRSBUF;
+				if (curpos == state->word)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				*(curpos) = '\0';
+				RETURN_TOKEN;
+			}
+			else if (t_iseq(state->prsbuf, ':'))
+			{
+				if (curpos == state->word)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				*(curpos) = '\0';
+				if (state->oprisdelim)
+					RETURN_TOKEN;
+				else
+					statecode = INPOSINFO;
+			}
+			else
+			{
+				RESIZEPRSBUF;
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+			}
+		}
+		else if (statecode == WAITENDCMPLX)
+		{
+			if (t_iseq(state->prsbuf, '\''))
+			{
+				statecode = WAITCHARCMPLX;
+			}
+			else if (t_iseq(state->prsbuf, '\\'))
+			{
+				statecode = WAITNEXTCHAR;
+				oldstate = WAITENDCMPLX;
+			}
+			else if (*(state->prsbuf) == '\0')
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("syntax error in tsvector")));
+			else
+			{
+				RESIZEPRSBUF;
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+			}
+		}
+		else if (statecode == WAITCHARCMPLX)
+		{
+			if (t_iseq(state->prsbuf, '\''))
+			{
+				RESIZEPRSBUF;
+				COPYCHAR(curpos, state->prsbuf);
+				curpos += pg_mblen(state->prsbuf);
+				statecode = WAITENDCMPLX;
+			}
+			else
+			{
+				RESIZEPRSBUF;
+				*(curpos) = '\0';
+				if (curpos == state->word)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				if (state->oprisdelim)
+				{
+					/* state->prsbuf+=pg_mblen(state->prsbuf); */
+					RETURN_TOKEN;
+				}
+				else
+					statecode = WAITPOSINFO;
+				continue;		/* recheck current character */
+			}
+		}
+		else if (statecode == WAITPOSINFO)
+		{
+			if (t_iseq(state->prsbuf, ':'))
+				statecode = INPOSINFO;
+			else
+				RETURN_TOKEN;
+		}
+		else if (statecode == INPOSINFO)
+		{
+			if (t_isdigit(state->prsbuf))
+			{
+				if (posalen == 0)
+				{
+					posalen = 4;
+					pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
+					npos = 0;
+				}
+				else if (npos + 1 >= posalen)
+				{
+					posalen *= 2;
+					pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
+				}
+				npos++;
+				WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
+				if (WEP_GETPOS(pos[npos - 1]) == 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("wrong position info in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 0);
+				statecode = WAITPOSDELIM;
+			}
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("syntax error in tsvector")));
+		}
+		else if (statecode == WAITPOSDELIM)
+		{
+			if (t_iseq(state->prsbuf, ','))
+				statecode = INPOSINFO;
+			else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
+			{
+				if (WEP_GETWEIGHT(pos[npos - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 3);
+			}
+			else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
+			{
+				if (WEP_GETWEIGHT(pos[npos - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 2);
+			}
+			else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
+			{
+				if (WEP_GETWEIGHT(pos[npos - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 1);
+			}
+			else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
+			{
+				if (WEP_GETWEIGHT(pos[npos - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("syntax error in tsvector")));
+				WEP_SETWEIGHT(pos[npos - 1], 0);
+			}
+			else if (t_isspace(state->prsbuf) ||
+					 *(state->prsbuf) == '\0')
+				RETURN_TOKEN;
+			else if (!t_isdigit(state->prsbuf))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("syntax error in tsvector")));
+		}
+		else					/* internal error */
+			elog(ERROR, "internal error in gettoken_tsvector");
+
+		/* get next char */
+		state->prsbuf += pg_mblen(state->prsbuf);
+	}
+
+	return false;
+}
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index 148129aa8bc5b165959d8223c4b4f0d7d640d179..ab19de7924f05037e9e7a572d067b1070f2dfe9d 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.4 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -42,7 +42,7 @@ typedef struct
 				type:8,
 				len:16;
 	char	   *word;
-	QueryItem  *item;
+	QueryOperand  *item;
 } HeadlineWordEntry;
 
 typedef struct
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index ec22f96f59f91393f90aa06c1e986eeb63c80120..91d724ef1c67752e313e24fe22fbbbe16774e407 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -13,6 +13,8 @@
 #define _PG_TSTYPE_H_
 
 #include "fmgr.h"
+#include "utils/pg_crc.h"
+
 
 /*
  * TSVector type.
@@ -27,8 +29,8 @@ typedef struct
 				pos:20;			/* MAX 1Mb */
 } WordEntry;
 
-#define MAXSTRLEN ( 1<<11 )
-#define MAXSTRPOS ( 1<<20 )
+#define MAXSTRLEN ( (1<<11) - 1)
+#define MAXSTRPOS ( (1<<20) - 1)
 
 /*
  * Equivalent to
@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos;
 typedef struct
 {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	int4		size;
+	uint32		size;
 	char		data[1];
 } TSVectorData;
 
@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
 
 /*
  * TSQuery
+ *
+ *
  */
 
+typedef int8 QueryItemType;
+
+/* Valid values for QueryItemType: */
+#define QI_VAL 1
+#define QI_OPR 2
+#define QI_VALSTOP 3	/* This is only used in an intermediate stack representation in parse_tsquery. It's not a legal type elsewhere. */
+
 /*
  * QueryItem is one node in tsquery - operator or operand.
  */
-
-typedef struct QueryItem
+typedef struct
 {
-	int8		type;			/* operand or kind of operator */
-	int8		weight;			/* weights of operand to search */
-	int2		left;			/* pointer to left operand Right operand is
-								 * item + 1, left operand is placed
-								 * item+item->left */
-	int4		val;			/* crc32 value of operand's value */
+	QueryItemType		type;	/* operand or kind of operator (ts_tokentype) */
+	int8		weight;			/* weights of operand to search. It's a bitmask of allowed weights.
+								 * if it =0 then any weight are allowed */
+	int32	valcrc;				/* XXX: pg_crc32 would be a more appropriate data type, 
+								 * but we use comparisons to signed integers in the code. 
+								 * They would need to be changed as well. */
+
 	/* pointer to text value of operand, must correlate with WordEntry */
 	uint32
 				istrue:1,		/* use for ranking in Cover */
 				length:11,
 				distance:20;
-} QueryItem;
+} QueryOperand;
+
+
+/* Legal values for QueryOperator.operator */
+#define	OP_NOT	1
+#define	OP_AND	2
+#define	OP_OR	3
+
+typedef struct 
+{
+	QueryItemType	type;
+	int8		oper;		/* see above */
+	int16		left;		/* pointer to left operand. Right operand is
+							 * item + 1, left operand is placed
+							 * item+item->left */
+} QueryOperator;
 
 /*
- * It's impossible to use offsetof(QueryItem, istrue)
+ * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
+ * inside QueryItem requiring 8-byte alignment, like int64.
  */
-#define HDRSIZEQI	( sizeof(int8) + sizeof(int8) + sizeof(int2) +	sizeof(int4) )
+typedef union
+{
+	QueryItemType	type;
+	QueryOperator operator;
+	QueryOperand operand;
+} QueryItem;
 
 /*
  * Storage:
- *	(len)(size)(array of ITEM)(array of operand in text form)
- *	operands are always finished by '\0'
+ *	(len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
  */
 
 typedef struct
@@ -182,13 +213,17 @@ typedef struct
 typedef TSQueryData *TSQuery;
 
 #define HDRSIZETQ	( VARHDRSZ + sizeof(int4) )
-#define COMPUTESIZE(size,lenofoperand)	( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
-#define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
-#define GETOPERAND(x)	( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
-#define OPERANDSSIZE(x)		( (x)->len - HDRSIZETQ - (x)->size * sizeof(QueryItem) )
 
-#define ISOPERATOR(x)	( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+/* Computes the size of header and all QueryItems. size is the number of
+ * QueryItems, and lenofoperand is the total length of all operands
+ */
+#define COMPUTESIZE(size, lenofoperand)	( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
 
+/* Returns a pointer to the first QueryItem in a TSVector */
+#define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
+
+/* Returns a pointer to the beginning of operands in a TSVector */
+#define GETOPERAND(x)	( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
 
 /*
  * fmgr interface macros
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index d2e5c8d8e4957d231897fe4e9cad33072ce43e80..31a76e50b6cf23bf8c00ac7a4096f8c1daf91826 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.3 2007/09/07 15:09:56 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -14,65 +14,41 @@
 
 #include "tsearch/ts_type.h"
 #include "tsearch/ts_public.h"
+#include "nodes/pg_list.h"
 
 /*
  * Common parse definitions for tsvector and tsquery
  */
 
-typedef struct
-{
-	WordEntry	entry;			/* should be first ! */
-	WordEntryPos *pos;
-} WordEntryIN;
-
-typedef struct
-{
-	char	   *prsbuf;
-	char	   *word;
-	char	   *curpos;
-	int4		len;
-	int4		state;
-	int4		alen;
-	WordEntryPos *pos;
-	bool		oprisdelim;
-} TSVectorParseState;
-
-extern bool gettoken_tsvector(TSVectorParseState *state);
+/* tsvector parser support. */
 
-struct ParseQueryNode;			/* private in backend/utils/adt/tsquery.c */
+struct TSVectorParseStateData;
+typedef struct TSVectorParseStateData *TSVectorParseState;
 
-typedef struct
-{
-	char	   *buffer;			/* entire string we are scanning */
-	char	   *buf;			/* current scan point */
-	int4		state;
-	int4		count;
+extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
+extern void reset_tsvector_parser(TSVectorParseState state, char *input);
+extern bool gettoken_tsvector(TSVectorParseState state, 
+							  char **token, int *len,
+							  WordEntryPos **pos, int *poslen,
+							  char **endptr);
+extern void close_tsvector_parser(TSVectorParseState state);
 
-	/* reverse polish notation in list (for temporary usage) */
-	struct ParseQueryNode *str;
+/* parse_tsquery */
 
-	/* number in str */
-	int4		num;
+struct TSQueryParserStateData;	/* private in backend/utils/adt/tsquery.c */
+typedef struct TSQueryParserStateData *TSQueryParserState;
 
-	/* text-form operand */
-	int4		lenop;
-	int4		sumlen;
-	char	   *op;
-	char	   *curop;
-
-	/* state for value's parser */
-	TSVectorParseState valstate;
-	/* tscfg */
-	Oid			cfg_id;
-} TSQueryParserState;
+typedef void (*PushFunction)(void *opaque, TSQueryParserState state, char *, int, int2);
 
 extern TSQuery parse_tsquery(char *buf,
-			  void (*pushval) (TSQueryParserState *, int, char *, int, int2),
-			  Oid cfg_id, bool isplain);
-extern void pushval_asis(TSQueryParserState * state,
-			 int type, char *strval, int lenval, int2 weight);
-extern void pushquery(TSQueryParserState * state, int4 type, int4 val,
-		  int4 distance, int4 lenval, int2 weight);
+			  PushFunction pushval,
+			  void *opaque, bool isplain);
+
+/* Functions for use by PushFunction implementations */
+extern void pushValue(TSQueryParserState state,
+			 char *strval, int lenval, int2 weight);
+extern void pushStop(TSQueryParserState state);
+extern void pushOperator(TSQueryParserState state, int8 operator);
 
 /*
  * parse plain text and lexize words
@@ -84,6 +60,11 @@ typedef struct
 	union
 	{
 		uint16		pos;
+		/*
+		 * When apos array is used, apos[0] is the number of elements
+		 * in the array (excluding apos[0]), and alen is the allocated
+		 * size of the array.
+		 */
 		uint16	   *apos;
 	}			pos;
 	char	   *word;
@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
 			char *buf, int4 buflen);
 extern text *generateHeadline(HeadlineParsedText * prs);
 
-/*
- * token/node types for parsing
- */
-#define END				0
-#define ERR				1
-#define VAL				2
-#define OPR				3
-#define OPEN			4
-#define CLOSE			5
-#define VALSTOP			6		/* for stop words */
-
 /*
  * Common check function for tsvector @@ tsquery
  */
 
 extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
-		   bool (*chkcond) (void *checkval, QueryItem * val));
+		   bool (*chkcond) (void *checkval, QueryOperand * val));
 
 /*
  * Useful conversion macros