diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 4f9da5b0468d53b997ddb52eb0b5a66a17696318..6b80140e90940b4a348c342090e26fdd0bc82c8f 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1088,8 +1088,12 @@ parseqatom(struct vars * v, NOERR(); } - /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */ - if (m == 0) + /* + * It's quantifier time. If the atom is just a BACKREF, we'll let it deal + * with quantifiers internally. Otherwise, the first step is to turn + * x{0,...} into x{1,...}|empty + */ + if (m == 0 && atomtype != BACKREF) { EMPTYARC(s2, atom->end); /* the bypass */ assert(PREF(qprefer) != 0); diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index f8e31f8f4ade89d9a4bec2ab8287b4689c1d082b..224da5064b69b9577856b21793bd9a92afb03377 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -720,7 +720,7 @@ cdissect(struct vars * v, case '|': /* alternation */ assert(t->left != NULL); return caltdissect(v, t, begin, end); - case 'b': /* back ref -- shouldn't be calling us! */ + case 'b': /* back reference */ assert(t->left == NULL && t->right == NULL); return cbrdissect(v, t, begin, end); case '.': /* concatenation */ @@ -962,12 +962,12 @@ cbrdissect(struct vars * v, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { - int i; int n = t->subno; - size_t len; - chr *paren; + size_t numreps; + size_t tlen; + size_t brlen; + chr *brstring; chr *p; - chr *stop; int min = t->min; int max = t->max; @@ -978,46 +978,65 @@ cbrdissect(struct vars * v, MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); + /* get the backreferenced string */ if (v->pmatch[n].rm_so == -1) return REG_NOMATCH; - paren = v->start + v->pmatch[n].rm_so; - len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; + brstring = v->start + v->pmatch[n].rm_so; + brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; /* no room to maneuver -- retries are pointless */ if (v->mem[t->retry]) return REG_NOMATCH; v->mem[t->retry] = 1; - /* special-case zero-length string */ - if (len == 0) + /* special cases for zero-length strings */ + if (brlen == 0) + { + /* + * matches only if target is zero length, but any number of + * repetitions can be considered to be present + */ + if (begin == end && min <= max) + { + MDEBUG(("cbackref matched trivially\n")); + return REG_OKAY; + } + return REG_NOMATCH; + } + if (begin == end) { - if (begin == end) + /* matches only if zero repetitions are okay */ + if (min == 0) + { + MDEBUG(("cbackref matched trivially\n")); return REG_OKAY; + } return REG_NOMATCH; } - /* and too-short string */ - assert(end >= begin); - if ((size_t) (end - begin) < len) + /* + * check target length to see if it could possibly be an allowed number of + * repetitions of brstring + */ + assert(end > begin); + tlen = end - begin; + if (tlen % brlen != 0) + return REG_NOMATCH; + numreps = tlen / brlen; + if (numreps < min || (numreps > max && max != INFINITY)) return REG_NOMATCH; - stop = end - len; - /* count occurrences */ - i = 0; - for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) + /* okay, compare the actual string contents */ + p = begin; + while (numreps-- > 0) { - if ((*v->g->compare) (paren, p, len) != 0) - break; - i++; + if ((*v->g->compare) (brstring, p, brlen) != 0) + return REG_NOMATCH; + p += brlen; } - MDEBUG(("cbackref found %d\n", i)); - /* and sort it out */ - if (p != end) /* didn't consume all of it */ - return REG_NOMATCH; - if (min <= i && (i <= max || max == INFINITY)) - return REG_OKAY; - return REG_NOMATCH; /* out of range */ + MDEBUG(("cbackref matched\n")); + return REG_OKAY; } /* diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out new file mode 100644 index 0000000000000000000000000000000000000000..5694908163af856f78940a9e31d75a370b4bfe09 --- /dev/null +++ b/src/test/regress/expected/regex.out @@ -0,0 +1,36 @@ +-- +-- Regular expression tests +-- +-- Don't want to have to double backslashes in regexes +set standard_conforming_strings = on; +-- Test simple quantified backrefs +select 'bbbbb' ~ '^([bc])\1*$' as t; + t +--- + t +(1 row) + +select 'ccc' ~ '^([bc])\1*$' as t; + t +--- + t +(1 row) + +select 'xxx' ~ '^([bc])\1*$' as f; + f +--- + f +(1 row) + +select 'bbc' ~ '^([bc])\1*$' as f; + f +--- + f +(1 row) + +select 'b' ~ '^([bc])\1*$' as t; + t +--- + t +(1 row) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 862f5b20077a66d80aa0009522d310f875e93487..8852e0a40fc5ca9d0123bdda955e3a04fc71ce0a 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -30,7 +30,7 @@ test: point lseg box path polygon circle date time timetz timestamp timestamptz # geometry depends on point, lseg, box, path, polygon and circle # horology depends on interval, timetz, timestamp, timestamptz, reltime and abstime # ---------- -test: geometry horology oidjoins type_sanity opr_sanity +test: geometry horology regex oidjoins type_sanity opr_sanity # ---------- # These four each depend on the previous one diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 142fc9cf0d1a177fe5881e6065ab343f88ade78d..0bc5df7fe73f59b4868ca881247487eadc83107d 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -42,6 +42,7 @@ test: tstypes test: comments test: geometry test: horology +test: regex test: oidjoins test: type_sanity test: opr_sanity diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql new file mode 100644 index 0000000000000000000000000000000000000000..242a81ef3298a68d79cf4af971e17935f3630964 --- /dev/null +++ b/src/test/regress/sql/regex.sql @@ -0,0 +1,13 @@ +-- +-- Regular expression tests +-- + +-- Don't want to have to double backslashes in regexes +set standard_conforming_strings = on; + +-- Test simple quantified backrefs +select 'bbbbb' ~ '^([bc])\1*$' as t; +select 'ccc' ~ '^([bc])\1*$' as t; +select 'xxx' ~ '^([bc])\1*$' as f; +select 'bbc' ~ '^([bc])\1*$' as f; +select 'b' ~ '^([bc])\1*$' as t;