From b8f40ced2f7daebb13a3c9ab9aece5a969e2c28a Mon Sep 17 00:00:00 2001
From: Joe Conway <mail@joeconway.com>
Date: Sun, 30 Nov 2003 20:55:09 +0000
Subject: [PATCH] Make PQescapeBytea and byteaout consistent with each other,
 and octal escape all octets outside the range 0x20 to 0x7e. This fixes the
 problem pointed out by Sergey Yatskevich here:
 http://archives.postgresql.org/pgsql-bugs/2003-11/msg00140.php

---
 doc/src/sgml/datatype.sgml      | 28 ++++++++++++++++++++--------
 src/backend/utils/adt/varlena.c | 14 +++++++-------
 src/interfaces/libpq/fe-exec.c  |  9 +++++----
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index d47cee96338..7707aed2a47 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.132 2003/11/29 19:51:36 pgsql Exp $
+$PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.133 2003/11/30 20:55:09 joe Exp $
 -->
 
  <chapter id="datatype">
@@ -1076,9 +1076,10 @@ SELECT b, char_length(b) FROM test2;
     strings are distinguished from characters strings by two
     characteristics: First, binary strings specifically allow storing
     octets of value zero and other <quote>non-printable</quote>
-    octets.  Second, operations on binary strings process the actual
-    bytes, whereas the encoding and processing of character strings
-    depends on locale settings.
+    octets (defined as octets outside the range 32 to 126).
+    Second, operations on binary strings process the actual bytes,
+    whereas the encoding and processing of character strings depends
+    on locale settings.
    </para>
 
    <para>
@@ -1131,14 +1132,25 @@ SELECT b, char_length(b) FROM test2;
        <entry><literal>\\</literal></entry>
       </row>
 
+      <row>
+       <entry>0 to 31 and 127 to 255</entry>
+       <entry><quote>non-printable</quote> octets</entry>
+       <entry><literal>'\\<replaceable>xxx'</></literal> (octal value)</entry>
+       <entry><literal>SELECT '\\001'::bytea;</literal></entry>
+       <entry><literal>\001</literal></entry>
+      </row>
+
      </tbody>
     </tgroup>
    </table>
 
    <para>
-    Note that the result in each of the examples in <xref linkend="datatype-binary-sqlesc"> was exactly one
-    octet in length, even though the output representation of the zero
-    octet and backslash are more than one character.
+    The requirement to escape <quote>non-printable</quote> octets actually
+    varies depending on locale settings. In some instances you can get away
+    with leaving them unescaped. Note that the result in each of the examples
+    in <xref linkend="datatype-binary-sqlesc"> was exactly one octet in
+    length, even though the output representation of the zero octet and
+    backslash are more than one character.
    </para>
 
    <para>
@@ -1206,7 +1218,7 @@ SELECT b, char_length(b) FROM test2;
       <row>
        <entry>32 to 126</entry>
        <entry><quote>printable</quote> octets</entry>
-       <entry>ASCII representation</entry>
+       <entry>client character set representation</entry>
        <entry><literal>SELECT '\\176'::bytea;</literal></entry>
        <entry><literal>~</literal></entry>
       </row>
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 244e2fbc698..8e01f9f539e 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.107 2003/11/29 19:51:59 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.108 2003/11/30 20:55:09 joe Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -186,10 +186,10 @@ byteaout(PG_FUNCTION_ARGS)
 	{
 		if (*vp == '\\')
 			len += 2;
-		else if (isprint((unsigned char) *vp))
-			len++;
-		else
+		else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 			len += 4;
+		else
+			len++;
 	}
 	rp = result = (char *) palloc(len);
 	vp = VARDATA(vlena);
@@ -200,9 +200,7 @@ byteaout(PG_FUNCTION_ARGS)
 			*rp++ = '\\';
 			*rp++ = '\\';
 		}
-		else if (isprint((unsigned char) *vp))
-			*rp++ = *vp;
-		else
+		else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 		{
 			val = *vp;
 			rp[0] = '\\';
@@ -213,6 +211,8 @@ byteaout(PG_FUNCTION_ARGS)
 			rp[1] = DIG(val & 03);
 			rp += 4;
 		}
+		else
+			*rp++ = *vp;
 	}
 	*rp = '\0';
 	PG_RETURN_CSTRING(result);
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index 19cb840af71..84b7d2e1c19 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.154 2003/11/29 19:52:11 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.155 2003/11/30 20:55:09 joe Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2261,7 +2261,8 @@ PQescapeString(char *to, const char *from, size_t length)
  *		'\0' == ASCII  0 == \\000
  *		'\'' == ASCII 39 == \'
  *		'\\' == ASCII 92 == \\\\
- *		anything >= 0x80 ---> \\ooo (where ooo is an octal expression)
+ *		anything < 0x20, or > 0x7e ---> \\ooo
+ *                                      (where ooo is an octal expression)
  */
 unsigned char *
 PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
@@ -2280,7 +2281,7 @@ PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
 	vp = bintext;
 	for (i = binlen; i > 0; i--, vp++)
 	{
-		if (*vp == 0 || *vp >= 0x80)
+		if (*vp < 0x20 || *vp > 0x7e)
 			len += 5;			/* '5' is for '\\ooo' */
 		else if (*vp == '\'')
 			len += 2;
@@ -2299,7 +2300,7 @@ PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
 
 	for (i = binlen; i > 0; i--, vp++)
 	{
-		if (*vp == 0 || *vp >= 0x80)
+		if (*vp < 0x20 || *vp > 0x7e)
 		{
 			(void) sprintf(rp, "\\\\%03o", *vp);
 			rp += 5;
-- 
GitLab