From bfb10db81ec9f4d2cb8de20c6af03463f89154c9 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Sat, 7 Nov 2015 12:43:24 -0500 Subject: [PATCH] Fix enforcement of restrictions inside regexp lookaround constraints. Lookahead and lookbehind constraints aren't allowed to contain backrefs, and parentheses within them are always considered non-capturing. Or so says the manual. But the regexp parser forgot about these rules once inside a parenthesized subexpression, so that constructs like (\w)(?=(\1)) were accepted (but then not correctly executed --- a case like this acted like (\w)(?=\w), without any enforcement that the two \w's match the same text). And in (?=((foo))) the innermost parentheses would be counted as capturing parentheses, though no text would ever be captured for them. To fix, properly pass down the "type" argument to the recursive invocation of parse(). Back-patch to all supported branches; it was agreed that silent misexecution of such patterns is worse than throwing an error, even though new errors in minor releases are generally not desirable. --- src/backend/regex/regcomp.c | 2 +- src/test/regress/expected/regex.out | 5 +++++ src/test/regress/sql/regex.sql | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index cb4dd2d3395..03133f4a048 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -944,7 +944,7 @@ parseqatom(struct vars * v, EMPTYARC(lp, s); EMPTYARC(s2, rp); NOERR(); - atom = parse(v, ')', PLAIN, s, s2); + atom = parse(v, ')', type, s, s2); assert(SEE(')') || ISERR()); NEXT(); NOERR(); diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index be151858a38..ba2923982f5 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -321,3 +321,8 @@ select 'a' ~ '()+\1'; t (1 row) +-- Error conditions +select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs +ERROR: invalid regular expression: invalid backreference number +select 'xyz' ~ 'x(\w)(?=(\1))'; +ERROR: invalid regular expression: invalid backreference number diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index c59fa35f24d..7cf5e599822 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -82,3 +82,7 @@ select 'a' ~ '$()|^\1'; select 'a' ~ '.. ()|\1'; select 'a' ~ '()*\1'; select 'a' ~ '()+\1'; + +-- Error conditions +select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs +select 'xyz' ~ 'x(\w)(?=(\1))'; -- GitLab