diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 27998d688a8f2a48c0e27dcab7da5dd4378861c6..e474e48c28c97a328a8fa0a2083d9e55e757bdb3 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -180,7 +180,7 @@ newstate(struct nfa * nfa) /* * This is a handy place to check for operation cancel during regex * compilation, since no code path will go very long without making a new - * state. + * state or arc. */ if (CANCEL_REQUESTED(nfa->v->re)) { @@ -333,6 +333,17 @@ newarc(struct nfa * nfa, assert(from != NULL && to != NULL); + /* + * This is a handy place to check for operation cancel during regex + * compilation, since no code path will go very long without making a new + * state or arc. + */ + if (CANCEL_REQUESTED(nfa->v->re)) + { + NERR(REG_CANCEL); + return; + } + /* check for duplicates */ for (a = from->outs; a != NULL; a = a->outchain) if (a->to == to && a->co == co && a->type == t) diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c index d367a77e85469c58abfb036290f0ffd71c048195..a9305dd7ccd231e8af3ecf24b31be6991fa22b26 100644 --- a/src/backend/regex/rege_dfa.c +++ b/src/backend/regex/rege_dfa.c @@ -34,9 +34,12 @@ /* * longest - longest-preferred matching engine + * + * On success, returns match endpoint address. Returns NULL on no match. + * Internal errors also return NULL, with v->err set. */ -static chr * /* endpoint, or NULL */ -longest(struct vars * v, /* used only for debug and exec flags */ +static chr * +longest(struct vars * v, struct dfa * d, chr *start, /* where the match should start */ chr *stop, /* match must end at or before here */ @@ -51,11 +54,15 @@ longest(struct vars * v, /* used only for debug and exec flags */ int i; struct colormap *cm = d->cm; + /* prevent "uninitialized variable" warnings */ + if (hitstopp != NULL) + *hitstopp = 0; + /* initialize */ css = initialize(v, d, start); + if (css == NULL) + return NULL; cp = start; - if (hitstopp != NULL) - *hitstopp = 0; /* startup */ FDEBUG(("+++ startup +++\n")); @@ -74,8 +81,14 @@ longest(struct vars * v, /* used only for debug and exec flags */ return NULL; css->lastseen = cp; - /* main loop */ + /* + * This is the main text-scanning loop. It seems worth having two copies + * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG + * builds, when you're not actively tracing. + */ +#ifdef REG_DEBUG if (v->eflags & REG_FTRACE) + { while (cp < realstop) { FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets))); @@ -92,7 +105,10 @@ longest(struct vars * v, /* used only for debug and exec flags */ ss->lastseen = cp; css = ss; } + } else +#endif + { while (cp < realstop) { co = GETCOLOR(cm, *cp); @@ -107,6 +123,10 @@ longest(struct vars * v, /* used only for debug and exec flags */ ss->lastseen = cp; css = ss; } + } + + if (ISERR()) + return NULL; /* shutdown */ FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets))); @@ -117,6 +137,8 @@ longest(struct vars * v, /* used only for debug and exec flags */ co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; FDEBUG(("color %ld\n", (long) co)); ss = miss(v, d, css, co, cp, start); + if (ISERR()) + return NULL; /* special case: match ended at eol? */ if (ss != NULL && (ss->flags & POSTSTATE)) return cp; @@ -138,14 +160,17 @@ longest(struct vars * v, /* used only for debug and exec flags */ /* * shortest - shortest-preferred matching engine + * + * On success, returns match endpoint address. Returns NULL on no match. + * Internal errors also return NULL, with v->err set. */ -static chr * /* endpoint, or NULL */ +static chr * shortest(struct vars * v, struct dfa * d, chr *start, /* where the match should start */ chr *min, /* match must end at or after here */ chr *max, /* match must end at or before here */ - chr **coldp, /* store coldstart pointer here, if nonNULL */ + chr **coldp, /* store coldstart pointer here, if non-NULL */ int *hitstopp) /* record whether hit v->stop, if non-NULL */ { chr *cp; @@ -156,11 +181,17 @@ shortest(struct vars * v, struct sset *ss; struct colormap *cm = d->cm; + /* prevent "uninitialized variable" warnings */ + if (coldp != NULL) + *coldp = NULL; + if (hitstopp != NULL) + *hitstopp = 0; + /* initialize */ css = initialize(v, d, start); + if (css == NULL) + return NULL; cp = start; - if (hitstopp != NULL) - *hitstopp = 0; /* startup */ FDEBUG(("--- startup ---\n")); @@ -180,8 +211,14 @@ shortest(struct vars * v, css->lastseen = cp; ss = css; - /* main loop */ + /* + * This is the main text-scanning loop. It seems worth having two copies + * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG + * builds, when you're not actively tracing. + */ +#ifdef REG_DEBUG if (v->eflags & REG_FTRACE) + { while (cp < realmax) { FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets))); @@ -200,7 +237,10 @@ shortest(struct vars * v, if ((ss->flags & POSTSTATE) && cp >= realmin) break; /* NOTE BREAK OUT */ } + } else +#endif + { while (cp < realmax) { co = GETCOLOR(cm, *cp); @@ -217,6 +257,7 @@ shortest(struct vars * v, if ((ss->flags & POSTSTATE) && cp >= realmin) break; /* NOTE BREAK OUT */ } + } if (ss == NULL) return NULL; @@ -389,7 +430,7 @@ hash(unsigned *uv, * initialize - hand-craft a cache entry for startup, otherwise get ready */ static struct sset * -initialize(struct vars * v, /* used only for debug flags */ +initialize(struct vars * v, struct dfa * d, chr *start) { @@ -402,6 +443,8 @@ initialize(struct vars * v, /* used only for debug flags */ else { /* no, must (re)build it */ ss = getvacant(v, d, start, start); + if (ss == NULL) + return NULL; for (i = 0; i < d->wordsper; i++) ss->states[i] = 0; BSET(ss->states, d->cnfa->pre); @@ -420,10 +463,20 @@ initialize(struct vars * v, /* used only for debug flags */ } /* - * miss - handle a cache miss + * miss - handle a stateset cache miss + * + * css is the current stateset, co is the color of the current input character, + * cp points to the character after that (which is where we may need to test + * LACONs). start does not affect matching behavior but is needed for pickss' + * heuristics about which stateset cache entry to replace. + * + * Ordinarily, returns the address of the next stateset (the one that is + * valid after consuming the input character). Returns NULL if no valid + * NFA states remain, ie we have a certain match failure. + * Internal errors also return NULL, with v->err set. */ -static struct sset * /* NULL if goes to empty set */ -miss(struct vars * v, /* used only for debug flags */ +static struct sset * +miss(struct vars * v, struct dfa * d, struct sset * css, pcolor co, @@ -449,9 +502,23 @@ miss(struct vars * v, /* used only for debug flags */ } FDEBUG(("miss\n")); - /* first, what set of states would we end up in? */ + /* + * Checking for operation cancel in the inner text search loop seems + * unduly expensive. As a compromise, check during cache misses. + */ + if (CANCEL_REQUESTED(v->re)) + { + ERR(REG_CANCEL); + return NULL; + } + + /* + * What set of states would we end up in after consuming the co character? + * We first consider PLAIN arcs that consume the character, and then look + * to see what LACON arcs could be traversed after consuming it. + */ for (i = 0; i < d->wordsper; i++) - d->work[i] = 0; + d->work[i] = 0; /* build new stateset bitmap in d->work */ ispost = 0; noprogress = 1; gotstate = 0; @@ -468,22 +535,31 @@ miss(struct vars * v, /* used only for debug flags */ noprogress = 0; FDEBUG(("%d -> %d\n", i, ca->to)); } - dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0; + if (!gotstate) + return NULL; /* character cannot reach any new state */ + dolacons = (cnfa->flags & HASLACONS); sawlacons = 0; + /* outer loop handles transitive closure of reachable-by-LACON states */ while (dolacons) - { /* transitive closure */ + { dolacons = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(d->work, i)) for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) { if (ca->co < cnfa->ncolors) - continue; /* NOTE CONTINUE */ - sawlacons = 1; + continue; /* not a LACON arc */ if (ISBSET(d->work, ca->to)) - continue; /* NOTE CONTINUE */ + continue; /* arc would be a no-op anyway */ + sawlacons = 1; /* this LACON affects our result */ if (!lacon(v, cnfa, cp, ca->co)) - continue; /* NOTE CONTINUE */ + { + if (ISERR()) + return NULL; + continue; /* LACON arc cannot be traversed */ + } + if (ISERR()) + return NULL; BSET(d->work, ca->to); dolacons = 1; if (ca->to == cnfa->post) @@ -493,11 +569,9 @@ miss(struct vars * v, /* used only for debug flags */ FDEBUG(("%d :> %d\n", i, ca->to)); } } - if (!gotstate) - return NULL; h = HASH(d->work, d->wordsper); - /* next, is that in the cache? */ + /* Is this stateset already in the cache? */ for (p = d->ssets, i = d->nssused; i > 0; p++, i--) if (HIT(h, d->work, p, d->wordsper)) { @@ -507,6 +581,8 @@ miss(struct vars * v, /* used only for debug flags */ if (i == 0) { /* nope, need a new cache entry */ p = getvacant(v, d, cp, start); + if (p == NULL) + return NULL; assert(p != css); for (i = 0; i < d->wordsper; i++) p->states[i] = d->work[i]; @@ -517,8 +593,15 @@ miss(struct vars * v, /* used only for debug flags */ /* lastseen to be dealt with by caller */ } + /* + * Link new stateset to old, unless a LACON affected the result, in which + * case we don't create the link. That forces future transitions across + * this same arc (same prior stateset and character color) to come through + * miss() again, so that we can recheck the LACON(s), which might or might + * not pass since context will be different. + */ if (!sawlacons) - { /* lookahead conds. always cache miss */ + { FDEBUG(("c%d[%d]->c%d\n", (int) (css - d->ssets), co, (int) (p - d->ssets))); css->outs[co] = p; @@ -562,11 +645,12 @@ lacon(struct vars * v, /* * getvacant - get a vacant state set + * * This routine clears out the inarcs and outarcs, but does not otherwise * clear the innards of the state set -- that's up to the caller. */ static struct sset * -getvacant(struct vars * v, /* used only for debug flags */ +getvacant(struct vars * v, struct dfa * d, chr *cp, chr *start) @@ -578,6 +662,8 @@ getvacant(struct vars * v, /* used only for debug flags */ color co; ss = pickss(v, d, cp, start); + if (ss == NULL) + return NULL; assert(!(ss->flags & LOCKED)); /* clear out its inarcs, including self-referential ones */ @@ -635,7 +721,7 @@ getvacant(struct vars * v, /* used only for debug flags */ * pickss - pick the next stateset to be used */ static struct sset * -pickss(struct vars * v, /* used only for debug flags */ +pickss(struct vars * v, struct dfa * d, chr *cp, chr *start) @@ -691,7 +777,6 @@ pickss(struct vars * v, /* used only for debug flags */ /* nobody's old enough?!? -- something's really wrong */ FDEBUG(("cannot find victim to replace!\n")); - assert(NOTREACHED); ERR(REG_ASSERT); - return d->ssets; + return NULL; } diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 8505994747f367fb72342bf1fd9f852f28a9703e..694a03965c074f68a0835dc3f45a5bdc9791bf9a 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -450,6 +450,11 @@ cfindloop(struct vars * v, { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); + if (ISERR()) + { + *coldp = cold; + return v->err; + } if (close == NULL) break; /* NOTE BREAK */ assert(cold != NULL); @@ -469,6 +474,11 @@ cfindloop(struct vars * v, else end = longest(v, d, begin, estop, &hitend); + if (ISERR()) + { + *coldp = cold; + return v->err; + } if (hitend && cold == NULL) cold = begin; if (end == NULL) @@ -681,6 +691,7 @@ ccondissect(struct vars * v, /* pick a tentative midpoint */ mid = longest(v, d, begin, end, (int *) NULL); + NOERR(); if (mid == NULL) return REG_NOMATCH; MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); @@ -705,6 +716,7 @@ ccondissect(struct vars * v, if (er != REG_NOMATCH) return er; } + NOERR(); /* that midpoint didn't work, find a new one */ if (mid == begin) @@ -714,6 +726,7 @@ ccondissect(struct vars * v, return REG_NOMATCH; } mid = longest(v, d, begin, mid - 1, (int *) NULL); + NOERR(); if (mid == NULL) { /* failed to find a new one */ @@ -756,6 +769,7 @@ crevcondissect(struct vars * v, /* pick a tentative midpoint */ mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); + NOERR(); if (mid == NULL) return REG_NOMATCH; MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); @@ -780,6 +794,7 @@ crevcondissect(struct vars * v, if (er != REG_NOMATCH) return er; } + NOERR(); /* that midpoint didn't work, find a new one */ if (mid == end) @@ -789,6 +804,7 @@ crevcondissect(struct vars * v, return REG_NOMATCH; } mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); + NOERR(); if (mid == NULL) { /* failed to find a new one */ @@ -914,6 +930,7 @@ caltdissect(struct vars * v, if (er != REG_NOMATCH) return er; } + NOERR(); t = t->right; } @@ -1005,6 +1022,11 @@ citerdissect(struct vars * v, { /* try to find an endpoint for the k'th sub-match */ endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL); + if (ISERR()) + { + FREE(endpts); + return v->err; + } if (endpts[k] == NULL) { /* no match possible, so see if we can shorten previous one */ @@ -1201,6 +1223,11 @@ creviterdissect(struct vars * v, /* try to find an endpoint for the k'th sub-match */ endpts[k] = shortest(v, d, endpts[k - 1], limit, end, (chr **) NULL, (int *) NULL); + if (ISERR()) + { + FREE(endpts); + return v->err; + } if (endpts[k] == NULL) { /* no match possible, so see if we can lengthen previous one */