From bcaabc5698f6b81a990f1bb7cfa3a6bb9583d03e Mon Sep 17 00:00:00 2001
From: Bruce Momjian <bruce@momjian.us>
Date: Tue, 14 Dec 1999 00:08:21 +0000
Subject: [PATCH] Depending on my interpreting (and programming) skills, this
 might solve anywhere from zero to two TODO items.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Allow flag to control COPY input/output of NULLs

I got this:
COPY table .... [ WITH NULL AS 'string' ]
which does what you'd expect. The default is \N, otherwise you can use
empty strings, etc. On Copy In this acts like a filter: every data item
that looks like 'string' becomes a NULL. Pretty straightforward.

This also seems to be related to

* Make postgres user have a password by default

If I recall this discussion correctly, the problem was actually that the
default password for the postgres (or any) user is in fact "\N", because
of the way copy is used. With this change, the file pg_pwd is copied out
with nulls as empty strings, so if someone doesn't have a password, the
password is just '', which one would expect from a new account. I don't
think anyone really wants a hard-coded default password.

Peter Eisentraut                  Sernanders väg 10:115
---
 doc/src/sgml/ref/copy.sgml     | 32 +++++++++++++++++++++----
 src/backend/commands/copy.c    | 43 ++++++++++++++++++++--------------
 src/backend/commands/user.c    |  3 ++-
 src/backend/parser/gram.y      |  9 ++++---
 src/backend/tcop/utility.c     |  3 ++-
 src/include/commands/copy.h    |  4 ++--
 src/include/nodes/parsenodes.h |  3 ++-
 7 files changed, 67 insertions(+), 30 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 3653ae9ef60..adbaa4c8e91 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/copy.sgml,v 1.10 1999/10/29 23:52:20 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/copy.sgml,v 1.11 1999/12/14 00:08:12 momjian Exp $
 Postgres documentation
 -->
 
@@ -20,15 +20,17 @@ Postgres documentation
  </refnamediv>
  <refsynopsisdiv>
   <refsynopsisdivinfo>
-   <date>1999-07-20</date>
+   <date>1999-12-11</date>
   </refsynopsisdivinfo>
   <synopsis>
 COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
     FROM { '<replaceable class="parameter">filename</replaceable>' | <filename>stdin</filename> }
     [ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ]
+    [ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ]
 COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
     TO { '<replaceable class="parameter">filename</replaceable>' | <filename>stdout</filename> }
     [ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ]
+    [ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ]
   </synopsis>
   
   <refsect2 id="R2-SQL-COPY-1">
@@ -104,6 +106,25 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
        </para>
       </listitem>
      </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="parameter">null print</replaceable></term>
+      <listitem>
+       <para>
+        A string to represent NULL values. The default is
+        <quote><literal>\N</literal></quote> (backslash-N), for historical
+        reasons. You might prefer an empty string, for example.
+       </para>
+       <note>
+        <para>
+         On a copy in, any data item that matches this string will be stored as
+         a NULL value, so you should make sure that you use the same string
+         as you used on copy out.
+        </para>
+       </hote>
+      </listitem>
+     </varlistentry>
+
     </variablelist>
    </para>
   </refsect2>
@@ -287,15 +308,16 @@ ERROR: <replaceable>reason</replaceable>
     encountered before this special end-of-file pattern is found.
    </para>
    <para>
-    The backslash character has other special meanings.  NULL attributes are
-    represented as "\N".  A literal backslash character is represented as two
+    The backslash character has other special meanings.  A literal backslash
+    character is represented as two
     consecutive backslashes ("\\").  A literal tab character is represented
     as a backslash and a tab.  A literal newline character is
     represented as a backslash and a newline.  When loading text data
     not generated by <acronym>Postgres</acronym>,
     you will need to convert backslash
     characters ("\") to double-backslashes ("\\") to ensure that they are loaded
-    properly.
+    properly. (The sequence "\N" will always be interpreted as a backslash and
+    an "N", for compatibility. The more general solution is "\\N".)
    </para>
   </refsect2>
 
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 5f3f2455acd..04ee4fc8708 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -6,7 +6,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.92 1999/11/27 21:52:53 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.93 1999/12/14 00:08:13 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,8 +43,8 @@
 
 
 /* non-export function prototypes */
-static void CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim);
-static void CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim);
+static void CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_print);
+static void CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_print);
 static Oid	GetOutputFunction(Oid type);
 static Oid	GetTypeElement(Oid type);
 static Oid	GetInputFunction(Oid type);
@@ -54,7 +54,7 @@ static void GetIndexRelations(Oid main_relation_oid,
 				  Relation **index_rels);
 
 static void CopyReadNewline(FILE *fp, int *newline);
-static char *CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline);
+static char *CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_print);
 
 static void CopyAttributeOut(FILE *fp, char *string, char *delim);
 static int	CountTuples(Relation relation);
@@ -219,7 +219,7 @@ CopyDonePeek(FILE *fp, int c, int pickup)
 
 void
 DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
-	   char *filename, char *delim, int fileumask)
+	   char *filename, char *delim, char *null_print, int fileumask)
 {
 /*----------------------------------------------------------------------------
   Either unload or reload contents of class <relname>, depending on <from>.
@@ -232,7 +232,8 @@ DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
   Iff <binary>, unload or reload in the binary format, as opposed to the
   more wasteful but more robust and portable text format.
 
-  If in the text format, delimit columns with delimiter <delim>.
+  If in the text format, delimit columns with delimiter <delim> and print
+  NULL values as <null_print>.
 
   <fileumask> is the umask(2) setting to use while creating an output file.
   This should usually be more liberal than the backend's normal 077 umask,
@@ -304,7 +305,7 @@ DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
 						 "reading.  Errno = %s (%d).",
 						 geteuid(), filename, strerror(errno), errno);
 			}
-			CopyFrom(rel, binary, oids, fp, delim);
+			CopyFrom(rel, binary, oids, fp, delim, null_print);
 		}
 		else
 		{						/* copy from database to file */
@@ -336,7 +337,7 @@ DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
 						 "writing.  Errno = %s (%d).",
 						 geteuid(), filename, strerror(errno), errno);
 			}
-			CopyTo(rel, binary, oids, fp, delim);
+			CopyTo(rel, binary, oids, fp, delim, null_print);
 		}
 		if (!pipe)
 		{
@@ -362,7 +363,7 @@ DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
 
 
 static void
-CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim)
+CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_print)
 {
 	HeapTuple	tuple;
 	HeapScanDesc scandesc;
@@ -449,7 +450,7 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim)
 					pfree(string);
 				}
 				else
-					CopySendString("\\N", fp);	/* null indicator */
+					CopySendString(null_print, fp);	/* null indicator */
 
 				if (i == attr_count - 1)
 					CopySendChar('\n', fp);
@@ -520,7 +521,7 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim)
 }
 
 static void
-CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim)
+CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_print)
 {
 	HeapTuple	tuple;
 	AttrNumber	attr_count;
@@ -711,7 +712,7 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim)
 			lineno++;
 			if (oids)
 			{
-				string = CopyReadAttribute(fp, &isnull, delim, &newline);
+				string = CopyReadAttribute(fp, &isnull, delim, &newline, null_print);
 				if (string == NULL)
 					done = 1;
 				else
@@ -724,7 +725,7 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim)
 			}
 			for (i = 0; i < attr_count && !done; i++)
 			{
-				string = CopyReadAttribute(fp, &isnull, delim, &newline);
+				string = CopyReadAttribute(fp, &isnull, delim, &newline, null_print);
 				if (isnull)
 				{
 					values[i] = PointerGetDatum(NULL);
@@ -1122,10 +1123,11 @@ CopyReadNewline(FILE *fp, int *newline)
  *
  * delim is the string of acceptable delimiter characters(s).
  * *newline remembers whether we've seen a newline ending this tuple.
+ * null_print says how NULL values are represented
  */
 
 static char *
-CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline)
+CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_print)
 {
 	StringInfoData	attribute_buf;
 	char		c;
@@ -1207,6 +1209,13 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline)
 						c = val & 0377;
 					}
 					break;
+                    /* This is a special hack to parse `\N' as <backslash-N>
+                       rather then just 'N' to provide compatibility with
+                       the default NULL output. -- pe */
+                case 'N':
+                    appendStringInfoChar(&attribute_buf, '\\');
+                    c = 'N';
+                    break;
 				case 'b':
 					c = '\b';
 					break;
@@ -1225,9 +1234,6 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline)
 				case 'v':
 					c = '\v';
 					break;
-				case 'N':
-					*isnull = (bool) true;
-					break;
 				case '.':
 					c = CopyGetChar(fp);
 					if (c != '\n')
@@ -1266,6 +1272,9 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline)
 		return cvt;
 	}
 #endif
+    if (strcmp(attribute_buf.data, null_print)==0)
+        *isnull = true;
+
 	return attribute_buf.data;
 
 endOfFile:
diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c
index 1cf07325473..56dce87f7e0 100644
--- a/src/backend/commands/user.c
+++ b/src/backend/commands/user.c
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: user.c,v 1.41 1999/12/12 05:57:28 momjian Exp $
+ * $Id: user.c,v 1.42 1999/12/14 00:08:13 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -77,6 +77,7 @@ update_pg_pwd()
 		   false,				/* pipe */
 		   tempname,			/* filename */
 		   CRYPT_PWD_FILE_SEPSTR, /* delim */
+           "",                  /* nulls */
 		   0077);				/* fileumask */
 	/*
 	 * And rename the temp file to its final name, deleting the old pg_pwd.
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index d3fb6291806..25f8dd02bb8 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.121 1999/12/10 07:37:35 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.122 1999/12/14 00:08:15 momjian Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -147,7 +147,7 @@ static Node *doNegate(Node *n);
 
 %type <str>		TriggerEvents, TriggerFuncArg
 
-%type <str>		relation_name, copy_file_name, copy_delimiter, def_name,
+%type <str>		relation_name, copy_file_name, copy_delimiter, copy_null, def_name,
 		database_name, access_method_clause, access_method, attr_name,
 		class, index_name, name, func_name, file_name, aggr_argtype
 
@@ -802,7 +802,7 @@ opt_id:  ColId									{ $$ = $1; }
  *
  *****************************************************************************/
 
-CopyStmt:  COPY opt_binary relation_name opt_with_copy copy_dirn copy_file_name copy_delimiter
+CopyStmt:  COPY opt_binary relation_name opt_with_copy copy_dirn copy_file_name copy_delimiter copy_null
 				{
 					CopyStmt *n = makeNode(CopyStmt);
 					n->binary = $2;
@@ -811,6 +811,7 @@ CopyStmt:  COPY opt_binary relation_name opt_with_copy copy_dirn copy_file_name
 					n->direction = $5;
 					n->filename = $6;
 					n->delimiter = $7;
+                                        n->null_print = $8;
 					$$ = (Node *)n;
 				}
 		;
@@ -850,6 +851,8 @@ opt_using:	USING								{ $$ = TRUE; }
 		| /*EMPTY*/								{ $$ = TRUE; }
 		;
 
+copy_null:      WITH NULL_P AS Sconst { $$ = $4; }
+                | /*EMPTY*/         { $$ = "\\N"; }
 
 /*****************************************************************************
  *
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 6c22afa5458..bfc114ba2f7 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.73 1999/12/10 03:55:59 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.74 1999/12/14 00:08:17 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -269,6 +269,7 @@ ProcessUtility(Node *parsetree,
 				 */
 					   stmt->filename,
 					   stmt->delimiter,
+                       stmt->null_print,
 				/*
 				 * specify 022 umask while writing files with COPY.
 				 */
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 5e7355b7e91..1f2af72122b 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: copy.h,v 1.6 1999/11/21 04:16:17 tgl Exp $
+ * $Id: copy.h,v 1.7 1999/12/14 00:08:19 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -15,6 +15,6 @@
 
 
 void DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
-			char *filename, char *delim, int fileumask);
+			char *filename, char *delim, char *null_print, int fileumask);
 
 #endif	 /* COPY_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index df0cb5c4e54..45e586aad0c 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parsenodes.h,v 1.90 1999/12/10 07:37:32 tgl Exp $
+ * $Id: parsenodes.h,v 1.91 1999/12/14 00:08:21 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -130,6 +130,7 @@ typedef struct CopyStmt
 	int			direction;		/* TO or FROM */
 	char	   *filename;		/* if NULL, use stdin/stdout */
 	char	   *delimiter;		/* delimiter character, \t by default */
+    char       *null_print;     /* how to print NULLs, `\N' by default */
 } CopyStmt;
 
 /* ----------------------
-- 
GitLab