From a2367f89ffc8de48fe681d2187eb108e3f469adc Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Sat, 5 Sep 2009 23:58:01 +0000 Subject: [PATCH] Add a note warning that COPY BINARY is very datatype-specific. Per a complaint from Gordon Shannon. --- doc/src/sgml/ref/copy.sgml | 66 ++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 2ea68de912e..8ab9ff7ecae 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.86 2009/07/25 00:07:10 adunstan Exp $ +$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.87 2009/09/05 23:58:01 tgl Exp $ PostgreSQL documentation --> @@ -24,30 +24,30 @@ PostgreSQL documentation <synopsis> COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ] FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN } - [ [ WITH ] + [ [ WITH ] [ BINARY ] [ OIDS ] [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ] [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] [ CSV [ HEADER ] - [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] + [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ] [ FORCE NOT NULL <replaceable class="parameter">column</replaceable> [, ...] ] COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) } TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT } - [ [ WITH ] + [ [ WITH ] [ BINARY ] [ OIDS ] [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ] [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] [ CSV [ HEADER ] - [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] + [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ] [ FORCE QUOTE { <replaceable class="parameter">column</replaceable> [, ...] | * } ] </synopsis> </refsynopsisdiv> - + <refsect1> <title>Description</title> @@ -79,7 +79,7 @@ COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable c client and the server. </para> </refsect1> - + <refsect1> <title>Parameters</title> @@ -215,7 +215,7 @@ COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable c <listitem> <para> Specifies that the file contains a header line with the names of each - column in the file. On output, the first line contains the column + column in the file. On output, the first line contains the column names from the table, and on input, the first line is ignored. </para> </listitem> @@ -249,7 +249,7 @@ COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable c In <literal>CSV</> <command>COPY TO</> mode, forces quoting to be used for all non-<literal>NULL</> values in each specified column. <literal>NULL</> output is never quoted. If <literal>*</> is specified, - non-<literal>NULL</> values for all columns of the table will be + non-<literal>NULL</> values for all columns of the table will be quoted. </para> </listitem> @@ -300,6 +300,10 @@ COPY <replaceable class="parameter">count</replaceable> somewhat faster than the normal text mode, but a binary-format file is less portable across machine architectures and <productname>PostgreSQL</productname> versions. + Also, the binary format is very data type specific; for example + it will not work to output binary data from a <type>smallint</> column + and read it into an <type>integer</> column, even though that would work + fine in text format. </para> <para> @@ -379,7 +383,7 @@ COPY <replaceable class="parameter">count</replaceable> </para> </refsect1> - + <refsect1> <title>File Formats</title> @@ -479,7 +483,7 @@ COPY <replaceable class="parameter">count</replaceable> </tgroup> </informaltable> - Presently, <command>COPY TO</command> will never emit an octal or + Presently, <command>COPY TO</command> will never emit an octal or hex-digits backslash sequence, but it does use the other sequences listed above for those control characters. </para> @@ -498,7 +502,7 @@ COPY <replaceable class="parameter">count</replaceable> data newlines and carriage returns to the <literal>\n</> and <literal>\r</> sequences respectively. At present it is possible to represent a data carriage return by a backslash and carriage - return, and to represent a data newline by a backslash and newline. + return, and to represent a data newline by a backslash and newline. However, these representations might not be accepted in future releases. They are also highly vulnerable to corruption if the <command>COPY</command> file is transferred across different machines (for example, from Unix to Windows @@ -506,7 +510,7 @@ COPY <replaceable class="parameter">count</replaceable> </para> <para> - <command>COPY TO</command> will terminate each row with a Unix-style + <command>COPY TO</command> will terminate each row with a Unix-style newline (<quote><literal>\n</></>). Servers running on Microsoft Windows instead output carriage return/newline (<quote><literal>\r\n</></>), but only for <command>COPY</> to a server file; for consistency across platforms, @@ -543,7 +547,7 @@ COPY <replaceable class="parameter">count</replaceable> non-<literal>NULL</> values in specific columns. </para> - <para> + <para> The <literal>CSV</> format has no standard way to distinguish a <literal>NULL</> value from an empty string. <productname>PostgreSQL</>'s <command>COPY</> handles this by @@ -557,28 +561,28 @@ COPY <replaceable class="parameter">count</replaceable> comparisons for specific columns. </para> - <para> + <para> Because backslash is not a special character in the <literal>CSV</> format, <literal>\.</>, the end-of-data marker, could also appear as a data value. To avoid any misinterpretation, a <literal>\.</> - data value appearing as a lone entry on a line is automatically - quoted on output, and on input, if quoted, is not interpreted as the + data value appearing as a lone entry on a line is automatically + quoted on output, and on input, if quoted, is not interpreted as the end-of-data marker. If you are loading a file created by another - application that has a single unquoted column and might have a - value of <literal>\.</>, you might need to quote that value in the + application that has a single unquoted column and might have a + value of <literal>\.</>, you might need to quote that value in the input file. </para> <note> <para> - In <literal>CSV</> mode, all characters are significant. A quoted value - surrounded by white space, or any characters other than - <literal>DELIMITER</>, will include those characters. This can cause - errors if you import data from a system that pads <literal>CSV</> - lines with white space out to some fixed width. If such a situation - arises you might need to preprocess the <literal>CSV</> file to remove - the trailing white space, before importing the data into - <productname>PostgreSQL</>. + In <literal>CSV</> mode, all characters are significant. A quoted value + surrounded by white space, or any characters other than + <literal>DELIMITER</>, will include those characters. This can cause + errors if you import data from a system that pads <literal>CSV</> + lines with white space out to some fixed width. If such a situation + arises you might need to preprocess the <literal>CSV</> file to remove + the trailing white space, before importing the data into + <productname>PostgreSQL</>. </para> </note> @@ -600,7 +604,7 @@ COPY <replaceable class="parameter">count</replaceable> programs cannot process. </para> </note> - + </refsect2> <refsect2> @@ -747,7 +751,7 @@ OIDs to be shown as null if that ever proves desirable. </refsect3> </refsect2> </refsect1> - + <refsect1> <title>Examples</title> @@ -806,10 +810,10 @@ ZW ZIMBABWE </programlisting> </para> </refsect1> - + <refsect1> <title>Compatibility</title> - + <para> There is no <command>COPY</command> statement in the SQL standard. </para> -- GitLab