From 2e4cb7082ca547d017759996b09cea754ab97bcc Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 27 Dec 2007 18:28:58 +0000
Subject: [PATCH] Disallow digits and lower-case ASCII letters as the delimiter
 in non-CSV COPY.  We need a restriction here because when the delimiter
 occurs as a data character, it is emitted with a backslash, and that will
 only work as desired if CopyReadAttributesText() will interpret the backslash
 sequence as representing the second character literally.  This is currently
 untrue for 'b', 'f', 'n', 'r', 't', 'v', 'x', and octal digits.  For
 future-proofing and simplicity of explanation, it seems best to disallow a-z
 and 0-9. We must also disallow dot, since "\." by itself would look like copy
 EOF. Note: "\N" is by default the null print string, so N would also cause a
 problem, but that is already tested for.

---
 src/backend/commands/copy.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index ea90608c435..52e7e695273 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.292 2007/12/27 17:00:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.293 2007/12/27 18:28:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -872,11 +872,22 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("COPY null representation cannot use newline or carriage return")));
 
-	/* Disallow backslash in non-CSV mode */
-	if (!cstate->csv_mode && strchr(cstate->delim, '\\') != NULL)
+	/*
+	 * Disallow unsafe delimiter characters in non-CSV mode.  We can't allow
+	 * backslash because it would be ambiguous.  We can't allow the other
+	 * cases because data characters matching the delimiter must be
+	 * backslashed, and certain backslash combinations are interpreted
+	 * non-literally by COPY IN.  Disallowing all lower case ASCII letters
+	 * is more than strictly necessary, but seems best for consistency and
+	 * future-proofing.  Likewise we disallow all digits though only octal
+	 * digits are actually dangerous.
+	 */
+	if (!cstate->csv_mode &&
+		strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
+			   cstate->delim[0]) != NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("COPY delimiter cannot be backslash")));
+				 errmsg("COPY delimiter cannot be \"%s\"", cstate->delim)));
 
 	/* Check header */
 	if (!cstate->csv_mode && cstate->header_line)
-- 
GitLab