Skip to content
Snippets Groups Projects
Commit b8c798eb authored by Tom Lane's avatar Tom Lane
Browse files

Tweak tsmatchsel() so that it examines the structure of the tsquery whenever

possible (ie, whenever the tsquery is a constant), even when no statistics
are available for the tsvector.  For example, foo @@ 'a & b'::tsquery
can be expected to be more selective than foo @@ 'a'::tsquery, whether
or not we know anything about foo.  We use DEFAULT_TS_MATCH_SEL as the assumed
selectivity of individual query terms when no stats are available, then
combine the terms according to the query's AND/OR structure as usual.

Per experimentation with Artur Dabrowski's example.  (The fact that there
are no stats available in that example is a problem in itself, but
nonetheless tsmatchsel should be smarter about the case.)

Back-patch to 8.4 to keep all versions of tsmatchsel() in sync.
parent 2ab57e08
No related branches found
No related tags found
No related merge requests found
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.7 2010/01/04 02:44:39 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.8 2010/07/31 03:27:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -52,6 +52,9 @@ static Selectivity tsquery_opr_selec(QueryItem *item, char *operand, ...@@ -52,6 +52,9 @@ static Selectivity tsquery_opr_selec(QueryItem *item, char *operand,
TextFreq *lookup, int length, float4 minfreq); TextFreq *lookup, int length, float4 minfreq);
static int compare_lexeme_textfreq(const void *e1, const void *e2); static int compare_lexeme_textfreq(const void *e1, const void *e2);
#define tsquery_opr_selec_no_stats(query) \
tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), NULL, 0, 0)
/* /*
* tsmatchsel -- Selectivity of "@@" * tsmatchsel -- Selectivity of "@@"
...@@ -101,21 +104,20 @@ tsmatchsel(PG_FUNCTION_ARGS) ...@@ -101,21 +104,20 @@ tsmatchsel(PG_FUNCTION_ARGS)
} }
/* /*
* OK, there's a Var and a Const we're dealing with here. We need the Var * OK, there's a Var and a Const we're dealing with here. We need the
* to be a TSVector (or else we don't have any useful statistic for it). * Const to be a TSQuery, else we can't do anything useful. We have to
* We have to check this because the Var might be the TSQuery not the * check this because the Var might be the TSQuery not the TSVector.
* TSVector.
*/ */
if (vardata.vartype == TSVECTOROID) if (((Const *) other)->consttype == TSQUERYOID)
{ {
/* tsvector @@ tsquery or the other way around */ /* tsvector @@ tsquery or the other way around */
Assert(((Const *) other)->consttype == TSQUERYOID); Assert(vardata.vartype == TSVECTOROID);
selec = tsquerysel(&vardata, ((Const *) other)->constvalue); selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
} }
else else
{ {
/* The Var is something we don't have useful statistics for */ /* If we can't see the query structure, must punt */
selec = DEFAULT_TS_MATCH_SEL; selec = DEFAULT_TS_MATCH_SEL;
} }
...@@ -184,14 +186,14 @@ tsquerysel(VariableStatData *vardata, Datum constval) ...@@ -184,14 +186,14 @@ tsquerysel(VariableStatData *vardata, Datum constval)
} }
else else
{ {
/* No most-common-elements info, so we must punt */ /* No most-common-elements info, so do without */
selec = (Selectivity) DEFAULT_TS_MATCH_SEL; selec = tsquery_opr_selec_no_stats(query);
} }
} }
else else
{ {
/* No stats at all, so we must punt */ /* No stats at all, so do without */
selec = (Selectivity) DEFAULT_TS_MATCH_SEL; selec = tsquery_opr_selec_no_stats(query);
} }
return selec; return selec;
...@@ -214,7 +216,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem, ...@@ -214,7 +216,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
* cells are taken for minimal and maximal frequency. Punt if not. * cells are taken for minimal and maximal frequency. Punt if not.
*/ */
if (nnumbers != nmcelem + 2) if (nnumbers != nmcelem + 2)
return DEFAULT_TS_MATCH_SEL; return tsquery_opr_selec_no_stats(query);
/* /*
* Transpose the data into a single array so we can use bsearch(). * Transpose the data into a single array so we can use bsearch().
...@@ -258,9 +260,12 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem, ...@@ -258,9 +260,12 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
* freq[val] in VAL nodes, if the value is in MCELEM * freq[val] in VAL nodes, if the value is in MCELEM
* min(freq[MCELEM]) / 2 in VAL nodes, if it is not * min(freq[MCELEM]) / 2 in VAL nodes, if it is not
* *
*
* The MCELEM array is already sorted (see ts_typanalyze.c), so we can use * The MCELEM array is already sorted (see ts_typanalyze.c), so we can use
* binary search for determining freq[MCELEM]. * binary search for determining freq[MCELEM].
*
* If we don't have stats for the tsvector, we still use this logic,
* except we always use DEFAULT_TS_MATCH_SEL for VAL nodes. This case
* is signaled by lookup == NULL.
*/ */
static Selectivity static Selectivity
tsquery_opr_selec(QueryItem *item, char *operand, tsquery_opr_selec(QueryItem *item, char *operand,
...@@ -279,6 +284,10 @@ tsquery_opr_selec(QueryItem *item, char *operand, ...@@ -279,6 +284,10 @@ tsquery_opr_selec(QueryItem *item, char *operand,
{ {
QueryOperand *oper = (QueryOperand *) item; QueryOperand *oper = (QueryOperand *) item;
/* If no stats for the variable, use DEFAULT_TS_MATCH_SEL */
if (lookup == NULL)
return (Selectivity) DEFAULT_TS_MATCH_SEL;
/* /*
* Prepare the key for bsearch(). * Prepare the key for bsearch().
*/ */
...@@ -292,7 +301,7 @@ tsquery_opr_selec(QueryItem *item, char *operand, ...@@ -292,7 +301,7 @@ tsquery_opr_selec(QueryItem *item, char *operand,
if (searchres) if (searchres)
{ {
/* /*
* The element is in MCELEM. Return precise selectivity (or at * The element is in MCELEM. Return precise selectivity (or at
* least as precise as ANALYZE could find out). * least as precise as ANALYZE could find out).
*/ */
return (Selectivity) searchres->frequency; return (Selectivity) searchres->frequency;
...@@ -300,7 +309,7 @@ tsquery_opr_selec(QueryItem *item, char *operand, ...@@ -300,7 +309,7 @@ tsquery_opr_selec(QueryItem *item, char *operand,
else else
{ {
/* /*
* The element is not in MCELEM. Punt, but assert that the * The element is not in MCELEM. Punt, but assume that the
* selectivity cannot be more than minfreq / 2. * selectivity cannot be more than minfreq / 2.
*/ */
return (Selectivity) Min(DEFAULT_TS_MATCH_SEL, minfreq / 2); return (Selectivity) Min(DEFAULT_TS_MATCH_SEL, minfreq / 2);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment