From e15d53e7a419dcff49e108c78c8f70be6b18598b Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 2 Jan 2010 20:59:16 +0000
Subject: [PATCH] Fix similar_escape() to convert parentheses to non-capturing
 style. This is needed to avoid unwanted interference with SUBSTRING behavior,
 as per bug #5257 from Roman Kononov.  Also, add some basic intelligence about
 character classes (bracket expressions) since we now have several behaviors
 that aren't appropriate inside a character class.

As with the previous patch in this area, I'm reluctant to back-patch
since it might affect applications that are relying on the prior
behavior.
---
 src/backend/utils/adt/regexp.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index ca61d5637f9..cbffcdb1835 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.85 2010/01/02 16:57:55 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.86 2010/01/02 20:59:16 tgl Exp $
  *
  *		Alistair Crooks added the code for the regex caching
  *		agc - cached the regular expressions used - there's a good chance
@@ -640,6 +640,7 @@ similar_escape(PG_FUNCTION_ARGS)
 	int			plen,
 				elen;
 	bool		afterescape = false;
+	bool		incharclass = false;
 	int			nquotes = 0;
 
 	/* This function is not strict, so must test explicitly */
@@ -682,10 +683,10 @@ similar_escape(PG_FUNCTION_ARGS)
 	 */
 
 	/*
-	 * We need room for the prefix/postfix plus as many as 2 output bytes per
-	 * input byte
+	 * We need room for the prefix/postfix plus as many as 3 output bytes per
+	 * input byte; since the input is at most 1GB this can't overflow
 	 */
-	result = (text *) palloc(VARHDRSZ + 6 + 2 * plen);
+	result = (text *) palloc(VARHDRSZ + 6 + 3 * plen);
 	r = VARDATA(result);
 
 	*r++ = '^';
@@ -699,7 +700,7 @@ similar_escape(PG_FUNCTION_ARGS)
 
 		if (afterescape)
 		{
-			if (pchar == '"')	/* for SUBSTRING patterns */
+			if (pchar == '"' && !incharclass)	/* for SUBSTRING patterns */
 				*r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
 			else
 			{
@@ -713,6 +714,19 @@ similar_escape(PG_FUNCTION_ARGS)
 			/* SQL99 escape character; do not send to output */
 			afterescape = true;
 		}
+		else if (incharclass)
+		{
+			if (pchar == '\\')
+				*r++ = '\\';
+			*r++ = pchar;
+			if (pchar == ']')
+				incharclass = false;
+		}
+		else if (pchar == '[')
+		{
+			*r++ = pchar;
+			incharclass = true;
+		}
 		else if (pchar == '%')
 		{
 			*r++ = '.';
@@ -720,6 +734,13 @@ similar_escape(PG_FUNCTION_ARGS)
 		}
 		else if (pchar == '_')
 			*r++ = '.';
+		else if (pchar == '(')
+		{
+			/* convert to non-capturing parenthesis */
+			*r++ = '(';
+			*r++ = '?';
+			*r++ = ':';
+		}
 		else if (pchar == '\\' || pchar == '.' ||
 				 pchar == '^' || pchar == '$')
 		{
-- 
GitLab