diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 2d8dc945716e4c9b2bb28109eb459c7b2cde9f80..becaecf5b82a2c7a05de9fa1a3dd03acd2811b7b 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.70 2005/10/15 20:12:33 neilc Exp $ +$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.71 2005/12/28 03:25:32 momjian Exp $ PostgreSQL documentation --> @@ -511,17 +511,28 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla comparisons for specific columns. </para> + <para> + Because backslash is not a special character in the <literal>CSV</> + format, <literal>\.</>, the end-of-data marker, could also appear + as a data value. To avoid any misinterpretation, a <literal>\.</> + data value appearing as a lone entry on a line is automatically + quoted on output, and on input, if quoted, is not interpreted as the + end-of-data marker. If you are loading a single-column table that + might have a column value of <literal>\.</>, you might need to quote + that value in the input file. + </para> + <note> - <para> - In <literal>CSV</> mode, all characters are significant. A quoted value - surrounded by white space, or any characters other than - <literal>DELIMITER</>, will include those characters. This can cause - errors if you import data from a system that pads <literal>CSV</> - lines with white space out to some fixed width. If such a situation - arises you might need to preprocess the <literal>CSV</> file to remove - the trailing white space, before importing the data into - <productname>PostgreSQL</>. - </para> + <para> + In <literal>CSV</> mode, all characters are significant. A quoted value + surrounded by white space, or any characters other than + <literal>DELIMITER</>, will include those characters. This can cause + errors if you import data from a system that pads <literal>CSV</> + lines with white space out to some fixed width. If such a situation + arises you might need to preprocess the <literal>CSV</> file to remove + the trailing white space, before importing the data into + <productname>PostgreSQL</>. + </para> </note> <note> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index ae1d40e2ef37288bbd8befd8b3bed05e1bd419c8..f97aafc2034bb24b420b067528995916f0b6ac47 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.256 2005/12/27 18:10:48 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.257 2005/12/28 03:25:32 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -244,7 +244,7 @@ static Datum CopyReadBinaryAttribute(CopyState cstate, bool *isnull); static void CopyAttributeOutText(CopyState cstate, char *server_string); static void CopyAttributeOutCSV(CopyState cstate, char *server_string, - bool use_quote); + bool use_quote, bool single_attr); static List *CopyGetAttnums(Relation rel, List *attnamelist); static char *limit_printout_length(const char *str); @@ -1284,7 +1284,8 @@ CopyTo(CopyState cstate) colname = NameStr(attr[attnum - 1]->attname); - CopyAttributeOutCSV(cstate, colname, false); + CopyAttributeOutCSV(cstate, colname, false, + list_length(cstate->attnumlist) == 1); } CopySendEndOfRow(cstate); @@ -1359,7 +1360,8 @@ CopyTo(CopyState cstate) value)); if (cstate->csv_mode) CopyAttributeOutCSV(cstate, string, - force_quote[attnum - 1]); + force_quote[attnum - 1], + list_length(cstate->attnumlist) == 1); else CopyAttributeOutText(cstate, string); } @@ -2968,7 +2970,7 @@ CopyAttributeOutText(CopyState cstate, char *server_string) */ static void CopyAttributeOutCSV(CopyState cstate, char *server_string, - bool use_quote) + bool use_quote, bool single_attr) { char *string; char c; @@ -2993,17 +2995,27 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string, */ if (!use_quote) { - for (tstring = string; (c = *tstring) != '\0'; tstring += mblen) - { - if (c == delimc || c == quotec || c == '\n' || c == '\r') + /* + * Because '\.' can be a data value, quote it if it appears + * alone on a line so it is not interpreted as the end-of-data + * marker. + */ + if (single_attr && strcmp(string, "\\.") == 0) + use_quote = true; + else + { + for (tstring = string; (c = *tstring) != '\0'; tstring += mblen) { - use_quote = true; - break; + if (c == delimc || c == quotec || c == '\n' || c == '\r') + { + use_quote = true; + break; + } + if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c)) + mblen = pg_encoding_mblen(cstate->client_encoding, tstring); + else + mblen = 1; } - if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c)) - mblen = pg_encoding_mblen(cstate->client_encoding, tstring); - else - mblen = 1; } } diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 78f20605702881203849c0fa49e65b7ec557b6c2..524e88cbae67bba87e55912e129f68a24820d9d2 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -194,6 +194,9 @@ COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\'; --test that we read consecutive LFs properly CREATE TEMP TABLE testnl (a int, b text, c int); COPY testnl FROM stdin CSV; +-- test end of copy marker +CREATE TEMP TABLE testeoc (a text); +COPY testeoc FROM stdin CSV; DROP TABLE x, y; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after(); diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index add8214d19dfb43157c7648455d4200232bfdea2..d962d2e048e9eb442ae968044c611e77b9a63c76 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -139,6 +139,16 @@ COPY testnl FROM stdin CSV; inside",2 \. +-- test end of copy marker +CREATE TEMP TABLE testeoc (a text); + +COPY testeoc FROM stdin CSV; +a\. +\.b +c\.d +"\." +\. + DROP TABLE x, y; DROP FUNCTION fn_x_before();