From b44d92b67b65a76f92448b5a282aae72820ac676 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 16 Sep 2015 15:25:25 -0400
Subject: [PATCH] Sync regex code with Tcl 8.6.4.

Sync our regex code with upstream changes since last time we did this,
which was Tcl 8.5.11 (see commit 08fd6ff37f71485e2fc04bc6ce07d2a483c36702).

The only functional change here is to disbelieve that an octal escape is
three digits long if it would exceed \377.  That's a bug fix, but it's
a minor one and could change the interpretation of working regexes, so
don't back-patch.

In addition to that, s/INFINITY/DUPINF/ to eliminate the risk of collisions
with <math.h>'s macro, and s/LOCAL/NOPROP/ because that also seems like
an unnecessarily collision-prone macro name.

There were some other cosmetic changes in their copy that I did not adopt,
notably a rather half-hearted attempt at renaming some of the C functions
in a more verbose style.  (I'm not necessarily against the concept, but
renaming just a few functions in the package is not an improvement.)
---
 src/backend/regex/regc_lex.c |  6 ++++++
 src/backend/regex/regcomp.c  | 14 +++++++-------
 src/backend/regex/regexec.c  |  6 +++---
 src/include/regex/regguts.h  | 13 +++++--------
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c
index 6f2c0cb3eb4..f6ed9f09ea4 100644
--- a/src/backend/regex/regc_lex.c
+++ b/src/backend/regex/regc_lex.c
@@ -860,6 +860,12 @@ lexescape(struct vars * v)
 			c = lexdigits(v, 8, 1, 3);
 			if (ISERR())
 				FAILW(REG_EESCAPE);
+			if (c > 0xff)
+			{
+				/* out of range, so we handled one digit too much */
+				v->now--;
+				c >>= 3;
+			}
 			RETV(PLAIN, c);
 			break;
 		default:
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 44a472fa69e..6b95975f2be 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -960,13 +960,13 @@ parseqatom(struct vars * v,
 	{
 		case '*':
 			m = 0;
-			n = INFINITY;
+			n = DUPINF;
 			qprefer = (v->nextvalue) ? LONGER : SHORTER;
 			NEXT();
 			break;
 		case '+':
 			m = 1;
-			n = INFINITY;
+			n = DUPINF;
 			qprefer = (v->nextvalue) ? LONGER : SHORTER;
 			NEXT();
 			break;
@@ -984,7 +984,7 @@ parseqatom(struct vars * v,
 				if (SEE(DIGIT))
 					n = scannum(v);
 				else
-					n = INFINITY;
+					n = DUPINF;
 				if (m > n)
 				{
 					ERR(REG_BADBR);
@@ -1146,8 +1146,8 @@ parseqatom(struct vars * v,
 		 * really care where its submatches are.
 		 */
 		dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
-		assert(m >= 1 && m != INFINITY && n >= 1);
-		repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1);
+		assert(m >= 1 && m != DUPINF && n >= 1);
+		repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1);
 		f = COMBINE(qprefer, atom->flags);
 		t = subre(v, '.', f, s, atom->end);		/* prefix and atom */
 		NOERR();
@@ -1268,7 +1268,7 @@ repeat(struct vars * v,
 #define  SOME	 2
 #define  INF	 3
 #define  PAIR(x, y)  ((x)*4 + (y))
-#define  REDUCE(x)	 ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
+#define  REDUCE(x)	 ( ((x) == DUPINF) ? INF : (((x) > 1) ? SOME : (x)) )
 	const int	rm = REDUCE(m);
 	const int	rn = REDUCE(n);
 	struct state *s;
@@ -2026,7 +2026,7 @@ stdump(struct subre * t,
 	if (t->min != 1 || t->max != 1)
 	{
 		fprintf(f, " {%d,", t->min);
-		if (t->max != INFINITY)
+		if (t->max != DUPINF)
 			fprintf(f, "%d", t->max);
 		fprintf(f, "}");
 	}
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index 5e78f8149c8..efd1e9ba022 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -865,7 +865,7 @@ cbrdissect(struct vars * v,
 	if (tlen % brlen != 0)
 		return REG_NOMATCH;
 	numreps = tlen / brlen;
-	if (numreps < min || (numreps > max && max != INFINITY))
+	if (numreps < min || (numreps > max && max != DUPINF))
 		return REG_NOMATCH;
 
 	/* okay, compare the actual string contents */
@@ -964,7 +964,7 @@ citerdissect(struct vars * v,
 	 * sub-match endpoints in endpts[1..max_matches].
 	 */
 	max_matches = end - begin;
-	if (max_matches > t->max && t->max != INFINITY)
+	if (max_matches > t->max && t->max != DUPINF)
 		max_matches = t->max;
 	if (max_matches < min_matches)
 		max_matches = min_matches;
@@ -1149,7 +1149,7 @@ creviterdissect(struct vars * v,
 	 * sub-match endpoints in endpts[1..max_matches].
 	 */
 	max_matches = end - begin;
-	if (max_matches > t->max && t->max != INFINITY)
+	if (max_matches > t->max && t->max != DUPINF)
 		max_matches = t->max;
 	if (max_matches < min_matches)
 		max_matches = min_matches;
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index 2f3be1aa0ae..94e06f041b7 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -78,9 +78,6 @@
 #endif
 
 /* want size of a char in bits, and max value in bounded quantifiers */
-#ifndef CHAR_BIT
-#include <limits.h>
-#endif
 #ifndef _POSIX2_RE_DUP_MAX
 #define _POSIX2_RE_DUP_MAX	255 /* normally from <limits.h> */
 #endif
@@ -95,7 +92,7 @@
 #define xxx		1
 
 #define DUPMAX	_POSIX2_RE_DUP_MAX
-#define INFINITY	(DUPMAX+1)
+#define DUPINF	(DUPMAX+1)
 
 #define REMAGIC 0xfed7			/* magic number for main struct */
 
@@ -419,15 +416,15 @@ struct subre
 #define  LONGER  01				/* prefers longer match */
 #define  SHORTER 02				/* prefers shorter match */
 #define  MIXED	 04				/* mixed preference below */
-#define  CAP 010				/* capturing parens below */
+#define  CAP	 010			/* capturing parens below */
 #define  BACKR	 020			/* back reference below */
 #define  INUSE	 0100			/* in use in final tree */
-#define  LOCAL	 03				/* bits which may not propagate up */
+#define  NOPROP  03				/* bits which may not propagate up */
 #define  LMIX(f) ((f)<<2)		/* LONGER -> MIXED */
 #define  SMIX(f) ((f)<<1)		/* SHORTER -> MIXED */
-#define  UP(f)	 (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
+#define  UP(f)	 (((f)&~NOPROP) | (LMIX(f) & SMIX(f) & MIXED))
 #define  MESSY(f)	 ((f)&(MIXED|CAP|BACKR))
-#define  PREF(f) ((f)&LOCAL)
+#define  PREF(f) ((f)&NOPROP)
 #define  PREF2(f1, f2)	 ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
 #define  COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
 	short		id;				/* ID of subre (1..ntree-1) */
-- 
GitLab