diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 694a03965c074f68a0835dc3f45a5bdc9791bf9a..d18672c7c7c3b9bb32d84e37003fd16d5140c400 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -448,6 +448,7 @@ cfindloop(struct vars * v, close = v->search_start; do { + /* Search with the search RE for match range at/beyond "close" */ MDEBUG(("\ncsearch at %ld\n", LOFF(close))); close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); if (ISERR()) @@ -456,10 +457,11 @@ cfindloop(struct vars * v, return v->err; } if (close == NULL) - break; /* NOTE BREAK */ + break; /* no more possible match anywhere */ assert(cold != NULL); open = cold; cold = NULL; + /* Search for matches starting between "open" and "close" inclusive */ MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); for (begin = open; begin <= close; begin++) { @@ -468,6 +470,7 @@ cfindloop(struct vars * v, estop = v->stop; for (;;) { + /* Here we use the top node's detailed RE */ if (shorter) end = shortest(v, d, begin, estart, estop, (chr **) NULL, &hitend); @@ -482,8 +485,9 @@ cfindloop(struct vars * v, if (hitend && cold == NULL) cold = begin; if (end == NULL) - break; /* NOTE BREAK OUT */ + break; /* no match with this begin point, try next */ MDEBUG(("tentative end %ld\n", LOFF(end))); + /* Dissect the potential match to see if it really matches */ zapallsubs(v->pmatch, v->nmatch); er = cdissect(v, v->g->tree, begin, end); if (er == REG_OKAY) @@ -502,21 +506,28 @@ cfindloop(struct vars * v, *coldp = cold; return er; } - /* try next shorter/longer match with same begin point */ + /* Try next longer/shorter match with same begin point */ if (shorter) { if (end == estop) - break; /* NOTE BREAK OUT */ + break; /* no more, so try next begin point */ estart = end + 1; } else { if (end == begin) - break; /* NOTE BREAK OUT */ + break; /* no more, so try next begin point */ estop = end - 1; } } /* end loop over endpoint positions */ } /* end loop over beginning positions */ + + /* + * If we get here, there is no possible match starting at or before + * "close", so consider matches beyond that. We'll do a fresh search + * with the search RE to find a new promising match range. + */ + close++; } while (close < v->stop); *coldp = cold; @@ -963,17 +974,17 @@ citerdissect(struct vars * v, assert(begin <= end); /* - * If zero matches are allowed, and target string is empty, just declare - * victory. OTOH, if target string isn't empty, zero matches can't work - * so we pretend the min is 1. + * For the moment, assume the minimum number of matches is 1. If zero + * matches are allowed, and the target string is empty, we are allowed to + * match regardless of the contents of the iter node --- but we would + * prefer to match once, so that capturing parens get set. (An example of + * the concern here is a pattern like "()*\1", which historically this + * code has allowed to succeed.) Therefore, we deal with the zero-matches + * case at the bottom, after failing to find any other way to match. */ min_matches = t->min; if (min_matches <= 0) - { - if (begin == end) - return REG_OKAY; min_matches = 1; - } /* * We need workspace to track the endpoints of each sub-match. Normally @@ -1123,8 +1134,19 @@ backtrack: } /* all possibilities exhausted */ - MDEBUG(("%d failed\n", t->id)); FREE(endpts); + + /* + * Now consider the possibility that we can match to a zero-length string + * by using zero repetitions. + */ + if (t->min == 0 && begin == end) + { + MDEBUG(("%d allowing zero matches\n", t->id)); + return REG_OKAY; + } + + MDEBUG(("%d failed\n", t->id)); return REG_NOMATCH; } diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index 497ddcd4677be7ca819cb92e6cfecbdb388f3b89..69a2ed00e4b59b928a050b7e876f986753505f86 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -196,3 +196,29 @@ select regexp_matches('foo/bar/baz', {foo,bar,baz} (1 row) +-- Test for infinite loop in cfindloop with zero-length possible match +-- but no actual match (can only happen in the presence of backrefs) +select 'a' ~ '$()|^\1'; + ?column? +---------- + f +(1 row) + +select 'a' ~ '.. ()|\1'; + ?column? +---------- + f +(1 row) + +select 'a' ~ '()*\1'; + ?column? +---------- + t +(1 row) + +select 'a' ~ '()+\1'; + ?column? +---------- + t +(1 row) + diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index ceb9d699ce96e17ac913a33d7384477d755d5ac1..0a07eaf8a6508c45a5549a9172d3c7b049aa020f 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -50,3 +50,10 @@ select regexp_matches('Programmer', '(\w)(.*?\1)', 'g'); -- Test for proper matching of non-greedy iteration (bug #11478) select regexp_matches('foo/bar/baz', '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', ''); + +-- Test for infinite loop in cfindloop with zero-length possible match +-- but no actual match (can only happen in the presence of backrefs) +select 'a' ~ '$()|^\1'; +select 'a' ~ '.. ()|\1'; +select 'a' ~ '()*\1'; +select 'a' ~ '()+\1';