From a63b63ff96293e153d25e8e054a830d70f69938a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 3 Dec 2007 00:03:05 +0000
Subject: [PATCH] Revert COPY OUT to follow the pre-8.3 handling of ASCII
 control characters, namely that \r, \n, \t, \b, \f, \v are dumped as those
 two-character representations rather than a backslash and the literal control
 character. I had made it do the other to save some code, but this was
 ill-advised, because dump files in which these characters appear literally
 are prone to newline mangling.  Fortunately, doing it the old way should only
 cost a few more lines of code, and not slow down the copy loop materially.
 Per bug #3795 from Lou Duchez.

---
 src/backend/commands/copy.c | 66 +++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 17 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index c68d828fea0..55ecf0098d4 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.289 2007/11/30 21:22:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.290 2007/12/03 00:03:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3102,27 +3102,43 @@ CopyAttributeOutText(CopyState cstate, char *string)
 			}
 			else if ((unsigned char) c < (unsigned char) 0x20)
 			{
+				/*
+				 * \r and \n must be escaped, the others are traditional.
+				 * We prefer to dump these using the C-like notation, rather
+				 * than a backslash and the literal character, because it
+				 * makes the dump file a bit more proof against Microsoftish
+				 * data mangling.
+				 */
 				switch (c)
 				{
-						/*
-						 * \r and \n must be escaped, the others are
-						 * traditional
-						 */
 					case '\b':
+						c = 'b';
+						break;
 					case '\f':
+						c = 'f';
+						break;
 					case '\n':
+						c = 'n';
+						break;
 					case '\r':
+						c = 'r';
+						break;
 					case '\t':
+						c = 't';
+						break;
 					case '\v':
-						DUMPSOFAR();
-						CopySendChar(cstate, '\\');
-						start = ptr++;	/* we include char in next run */
+						c = 'v';
 						break;
 					default:
 						/* All ASCII control chars are length 1 */
 						ptr++;
-						break;
+						continue;		/* fall to end of loop */
 				}
+				/* if we get here, we need to convert the control char */
+				DUMPSOFAR();
+				CopySendChar(cstate, '\\');
+				CopySendChar(cstate, c);
+				start = ++ptr;			/* do not include char in next run */
 			}
 			else if (IS_HIGHBIT_SET(c))
 				ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
@@ -3143,27 +3159,43 @@ CopyAttributeOutText(CopyState cstate, char *string)
 			}
 			else if ((unsigned char) c < (unsigned char) 0x20)
 			{
+				/*
+				 * \r and \n must be escaped, the others are traditional.
+				 * We prefer to dump these using the C-like notation, rather
+				 * than a backslash and the literal character, because it
+				 * makes the dump file a bit more proof against Microsoftish
+				 * data mangling.
+				 */
 				switch (c)
 				{
-						/*
-						 * \r and \n must be escaped, the others are
-						 * traditional
-						 */
 					case '\b':
+						c = 'b';
+						break;
 					case '\f':
+						c = 'f';
+						break;
 					case '\n':
+						c = 'n';
+						break;
 					case '\r':
+						c = 'r';
+						break;
 					case '\t':
+						c = 't';
+						break;
 					case '\v':
-						DUMPSOFAR();
-						CopySendChar(cstate, '\\');
-						start = ptr++;	/* we include char in next run */
+						c = 'v';
 						break;
 					default:
 						/* All ASCII control chars are length 1 */
 						ptr++;
-						break;
+						continue;		/* fall to end of loop */
 				}
+				/* if we get here, we need to convert the control char */
+				DUMPSOFAR();
+				CopySendChar(cstate, '\\');
+				CopySendChar(cstate, c);
+				start = ++ptr;			/* do not include char in next run */
 			}
 			else
 				ptr++;
-- 
GitLab