From a2367f89ffc8de48fe681d2187eb108e3f469adc Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 5 Sep 2009 23:58:01 +0000
Subject: [PATCH] Add a note warning that COPY BINARY is very
 datatype-specific. Per a complaint from Gordon Shannon.

---
 doc/src/sgml/ref/copy.sgml | 66 ++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 2ea68de912e..8ab9ff7ecae 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.86 2009/07/25 00:07:10 adunstan Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.87 2009/09/05 23:58:01 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -24,30 +24,30 @@ PostgreSQL documentation
 <synopsis>
 COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ]
     FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
-    [ [ WITH ] 
+    [ [ WITH ]
           [ BINARY ]
           [ OIDS ]
           [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
           [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
           [ CSV [ HEADER ]
-                [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] 
+                [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
                 [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
                 [ FORCE NOT NULL <replaceable class="parameter">column</replaceable> [, ...] ]
 
 COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
     TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
-    [ [ WITH ] 
+    [ [ WITH ]
           [ BINARY ]
           [ OIDS ]
           [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
           [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
           [ CSV [ HEADER ]
-                [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] 
+                [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
                 [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
                 [ FORCE QUOTE { <replaceable class="parameter">column</replaceable> [, ...] | * } ]
 </synopsis>
  </refsynopsisdiv>
- 
+
  <refsect1>
   <title>Description</title>
 
@@ -79,7 +79,7 @@ COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable c
    client and the server.
   </para>
  </refsect1>
-  
+
  <refsect1>
   <title>Parameters</title>
 
@@ -215,7 +215,7 @@ COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable c
     <listitem>
      <para>
       Specifies that the file contains a header line with the names of each
-      column in the file.  On output, the first line contains the column 
+      column in the file.  On output, the first line contains the column
       names from the table, and on input, the first line is ignored.
      </para>
     </listitem>
@@ -249,7 +249,7 @@ COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable c
       In <literal>CSV</> <command>COPY TO</> mode, forces quoting to be
       used for all non-<literal>NULL</> values in each specified column.
       <literal>NULL</> output is never quoted. If <literal>*</> is specified,
-      non-<literal>NULL</> values for all columns of the table will be 
+      non-<literal>NULL</> values for all columns of the table will be
       quoted.
      </para>
     </listitem>
@@ -300,6 +300,10 @@ COPY <replaceable class="parameter">count</replaceable>
     somewhat faster than the normal text mode, but a binary-format
     file is less portable across machine architectures and
     <productname>PostgreSQL</productname> versions.
+    Also, the binary format is very data type specific; for example
+    it will not work to output binary data from a <type>smallint</> column
+    and read it into an <type>integer</> column, even though that would work
+    fine in text format.
    </para>
 
    <para>
@@ -379,7 +383,7 @@ COPY <replaceable class="parameter">count</replaceable>
    </para>
 
  </refsect1>
- 
+
  <refsect1>
   <title>File Formats</title>
 
@@ -479,7 +483,7 @@ COPY <replaceable class="parameter">count</replaceable>
     </tgroup>
    </informaltable>
 
-    Presently, <command>COPY TO</command> will never emit an octal or 
+    Presently, <command>COPY TO</command> will never emit an octal or
     hex-digits backslash sequence, but it does use the other sequences
     listed above for those control characters.
    </para>
@@ -498,7 +502,7 @@ COPY <replaceable class="parameter">count</replaceable>
     data newlines and carriage returns to the <literal>\n</> and
     <literal>\r</> sequences respectively.  At present it is
     possible to represent a data carriage return by a backslash and carriage
-    return, and to represent a data newline by a backslash and newline.  
+    return, and to represent a data newline by a backslash and newline.
     However, these representations might not be accepted in future releases.
     They are also highly vulnerable to corruption if the <command>COPY</command> file is
     transferred across different machines (for example, from Unix to Windows
@@ -506,7 +510,7 @@ COPY <replaceable class="parameter">count</replaceable>
    </para>
 
    <para>
-    <command>COPY TO</command> will terminate each row with a Unix-style 
+    <command>COPY TO</command> will terminate each row with a Unix-style
     newline (<quote><literal>\n</></>).  Servers running on Microsoft Windows instead
     output carriage return/newline (<quote><literal>\r\n</></>), but only for
     <command>COPY</> to a server file; for consistency across platforms,
@@ -543,7 +547,7 @@ COPY <replaceable class="parameter">count</replaceable>
     non-<literal>NULL</> values in specific columns.
    </para>
 
-   <para> 
+   <para>
     The <literal>CSV</> format has no standard way to distinguish a
     <literal>NULL</> value from an empty string.
     <productname>PostgreSQL</>'s <command>COPY</> handles this by
@@ -557,28 +561,28 @@ COPY <replaceable class="parameter">count</replaceable>
     comparisons for specific columns.
    </para>
 
-   <para> 
+   <para>
     Because backslash is not a special character in the <literal>CSV</>
     format, <literal>\.</>, the end-of-data marker, could also appear
     as a data value.  To avoid any misinterpretation, a <literal>\.</>
-    data value appearing as a lone entry on a line is automatically 
-    quoted on output, and on input, if quoted, is not interpreted as the 
+    data value appearing as a lone entry on a line is automatically
+    quoted on output, and on input, if quoted, is not interpreted as the
     end-of-data marker.  If you are loading a file created by another
-    application that has a single unquoted column and might have a 
-    value of <literal>\.</>, you might need to quote that value in the 
+    application that has a single unquoted column and might have a
+    value of <literal>\.</>, you might need to quote that value in the
     input file.
    </para>
 
    <note>
     <para>
-     In <literal>CSV</> mode, all characters are significant. A quoted value 
-     surrounded by white space, or any characters other than 
-     <literal>DELIMITER</>, will include those characters. This can cause 
-     errors if  you import data from a system that pads <literal>CSV</> 
-     lines with white space out to some fixed width. If such a situation 
-     arises you might need to preprocess the <literal>CSV</> file to remove 
-     the trailing white space, before importing the data into 
-     <productname>PostgreSQL</>. 
+     In <literal>CSV</> mode, all characters are significant. A quoted value
+     surrounded by white space, or any characters other than
+     <literal>DELIMITER</>, will include those characters. This can cause
+     errors if you import data from a system that pads <literal>CSV</>
+     lines with white space out to some fixed width. If such a situation
+     arises you might need to preprocess the <literal>CSV</> file to remove
+     the trailing white space, before importing the data into
+     <productname>PostgreSQL</>.
     </para>
    </note>
 
@@ -600,7 +604,7 @@ COPY <replaceable class="parameter">count</replaceable>
      programs cannot process.
     </para>
    </note>
-    
+
   </refsect2>
 
   <refsect2>
@@ -747,7 +751,7 @@ OIDs to be shown as null if that ever proves desirable.
    </refsect3>
   </refsect2>
  </refsect1>
- 
+
  <refsect1>
   <title>Examples</title>
 
@@ -806,10 +810,10 @@ ZW      ZIMBABWE
 </programlisting>
   </para>
  </refsect1>
- 
+
  <refsect1>
   <title>Compatibility</title>
-  
+
   <para>
    There is no <command>COPY</command> statement in the SQL standard.
   </para>
-- 
GitLab