Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
postgres-lambda-diff
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jakob Huber
postgres-lambda-diff
Commits
5fdd9418
Commit
5fdd9418
authored
Mar 12, 2005
by
Bruce Momjian
Browse files
Options
Downloads
Patches
Plain Diff
Handle carriage returns and line feeds in COPY CSV mode.
Andrew Dunstan
parent
06a61d66
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/backend/commands/copy.c
+120
-76
120 additions, 76 deletions
src/backend/commands/copy.c
with
120 additions
and
76 deletions
src/backend/commands/copy.c
+
120
−
76
View file @
5fdd9418
...
...
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.23
6
200
4/12/31 21:59:41 pgsql
Exp $
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.23
7
200
5/03/12 05:41:34 momjian
Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -98,7 +98,6 @@ static bool fe_eof; /* true if detected end of copy data */
static
EolType
eol_type
;
/* EOL type of input */
static
int
client_encoding
;
/* remote side's character encoding */
static
int
server_encoding
;
/* local encoding */
static
bool
embedded_line_warning
;
/* these are just for error messages, see copy_in_error_callback */
static
bool
copy_binary
;
/* is it a binary copy? */
...
...
@@ -139,7 +138,7 @@ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
static
void
CopyFrom
(
Relation
rel
,
List
*
attnumlist
,
bool
binary
,
bool
oids
,
char
*
delim
,
char
*
null_print
,
bool
csv_mode
,
char
*
quote
,
char
*
escape
,
List
*
force_notnull_atts
);
static
bool
CopyReadLine
(
void
);
static
bool
CopyReadLine
(
char
*
quote
,
char
*
escape
);
static
char
*
CopyReadAttribute
(
const
char
*
delim
,
const
char
*
null_print
,
CopyReadResult
*
result
,
bool
*
isnull
);
static
char
*
CopyReadAttributeCSV
(
const
char
*
delim
,
const
char
*
null_print
,
...
...
@@ -1191,7 +1190,6 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
attr
=
tupDesc
->
attrs
;
num_phys_attrs
=
tupDesc
->
natts
;
attr_count
=
list_length
(
attnumlist
);
embedded_line_warning
=
false
;
/*
* Get info about the columns we need to process.
...
...
@@ -1718,7 +1716,8 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
ListCell
*
cur
;
/* Actually read the line into memory here */
done
=
CopyReadLine
();
done
=
csv_mode
?
CopyReadLine
(
quote
,
escape
)
:
CopyReadLine
(
NULL
,
NULL
);
/*
* EOF at start of line means we're done. If we see EOF after
...
...
@@ -2006,7 +2005,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
* by newline.
*/
static
bool
CopyReadLine
(
void
)
CopyReadLine
(
char
*
quote
,
char
*
escape
)
{
bool
result
;
bool
change_encoding
=
(
client_encoding
!=
server_encoding
);
...
...
@@ -2015,6 +2014,19 @@ CopyReadLine(void)
int
j
;
unsigned
char
s
[
2
];
char
*
cvt
;
bool
in_quote
=
false
,
last_was_esc
=
false
,
csv_mode
=
false
;
char
quotec
=
'\0'
,
escapec
=
'\0'
;
if
(
quote
)
{
csv_mode
=
true
;
quotec
=
quote
[
0
];
escapec
=
escape
[
0
];
/* ignore special escape processing if it's the same as quotec */
if
(
quotec
==
escapec
)
escapec
=
'\0'
;
}
s
[
1
]
=
0
;
...
...
@@ -2031,11 +2043,20 @@ CopyReadLine(void)
/*
* In this loop we only care for detecting newlines (\r and/or \n) and
* the end-of-copy marker (\.). For backwards compatibility we allow
* the end-of-copy marker (\.).
*
* In Text mode, for backwards compatibility we allow
* backslashes to escape newline characters. Backslashes other than
* the end marker get put into the line_buf, since CopyReadAttribute
* does its own escape processing. These four characters, and only
* these four, are assumed the same in frontend and backend encodings.
* does its own escape processing.
*
* In CSV mode, CR and NL inside q quoted field are just part of the
* data value and are put in line_buf. We keep just enough state
* to know if we are currently in a quoted field or not.
*
* These four characters, and only these four, are assumed the same in
* frontend and backend encodings.
*
* We do not assume that second and later bytes of a frontend
* multibyte character couldn't look like ASCII characters.
*/
...
...
@@ -2047,13 +2068,49 @@ CopyReadLine(void)
result
=
true
;
break
;
}
if
(
c
==
'\r'
)
if
(
csv_mode
)
{
/*
* Dealing with quotes and escapes here is mildly tricky. If the
* quote char is also the escape char, there's no problem - we
* just use the char as a toggle. If they are different, we need
* to ensure that we only take account of an escape inside a quoted
* field and immediately preceding a quote char, and not the
* second in a escape-escape sequence.
*/
if
(
in_quote
&&
c
==
escapec
)
last_was_esc
=
!
last_was_esc
;
if
(
c
==
quotec
&&
!
last_was_esc
)
in_quote
=
!
in_quote
;
if
(
c
!=
escapec
)
last_was_esc
=
false
;
/*
* updating the line count for embedded CR and/or LF chars is
* necessarily a little fragile - this test is probably about
* the best we can do.
*/
if
(
in_quote
&&
c
==
(
eol_type
==
EOL_CR
?
'\r'
:
'\n'
))
copy_lineno
++
;
}
if
(
!
in_quote
&&
c
==
'\r'
)
{
if
(
eol_type
==
EOL_NL
)
{
if
(
!
csv_mode
)
ereport
(
ERROR
,
(
errcode
(
ERRCODE_BAD_COPY_FILE_FORMAT
),
errmsg
(
"literal carriage return found in data"
),
errhint
(
"Use
\"\\
r
\"
to represent carriage return."
)));
else
ereport
(
ERROR
,
(
errcode
(
ERRCODE_BAD_COPY_FILE_FORMAT
),
errmsg
(
"unquoted carriage return found in CSV data"
),
errhint
(
"Use quoted CSV field to represent carriage return."
)));
}
/* Check for \r\n on first line, _and_ handle \r\n. */
if
(
eol_type
==
EOL_UNKNOWN
||
eol_type
==
EOL_CRNL
)
{
...
...
@@ -2068,10 +2125,19 @@ CopyReadLine(void)
{
/* found \r, but no \n */
if
(
eol_type
==
EOL_CRNL
)
{
if
(
!
csv_mode
)
ereport
(
ERROR
,
(
errcode
(
ERRCODE_BAD_COPY_FILE_FORMAT
),
errmsg
(
"literal carriage return found in data"
),
errhint
(
"Use
\"\\
r
\"
to represent carriage return."
)));
else
ereport
(
ERROR
,
(
errcode
(
ERRCODE_BAD_COPY_FILE_FORMAT
),
errmsg
(
"unquoted carriage return found in data"
),
errhint
(
"Use quoted CSV field to represent carriage return."
)));
}
/*
* if we got here, it is the first line and we didn't
...
...
@@ -2083,26 +2149,47 @@ CopyReadLine(void)
}
break
;
}
if
(
c
==
'\n'
)
if
(
!
in_quote
&&
c
==
'\n'
)
{
if
(
eol_type
==
EOL_CR
||
eol_type
==
EOL_CRNL
)
{
if
(
!
csv_mode
)
ereport
(
ERROR
,
(
errcode
(
ERRCODE_BAD_COPY_FILE_FORMAT
),
errmsg
(
"literal newline found in data"
),
errhint
(
"Use
\"\\
n
\"
to represent newline."
)));
else
ereport
(
ERROR
,
(
errcode
(
ERRCODE_BAD_COPY_FILE_FORMAT
),
errmsg
(
"unquoted newline found in data"
),
errhint
(
"Use quoted CSV field to represent newline."
)));
}
eol_type
=
EOL_NL
;
break
;
}
if
(
c
==
'\\'
)
if
((
line_buf
.
len
==
0
||
!
csv_mode
)
&&
c
==
'\\'
)
{
c
=
CopyGetChar
();
if
(
c
==
EOF
)
int
c2
;
if
(
csv_mode
)
c2
=
CopyPeekChar
();
else
c2
=
c
=
CopyGetChar
();
if
(
c2
==
EOF
)
{
result
=
true
;
if
(
csv_mode
)
CopyDonePeek
(
c2
,
true
);
break
;
}
if
(
c
==
'.'
)
if
(
c
2
==
'.'
)
{
if
(
csv_mode
)
CopyDonePeek
(
c2
,
true
);
/* allow keep calling GetChar() */
if
(
eol_type
==
EOL_CRNL
)
{
c
=
CopyGetChar
();
...
...
@@ -2140,6 +2227,10 @@ CopyReadLine(void)
result
=
true
;
/* report EOF */
break
;
}
if
(
csv_mode
)
CopyDonePeek
(
c2
,
false
);
/* not a dot, so put it back */
else
/* not EOF mark, so emit \ and following char literally */
appendStringInfoCharMacro
(
&
line_buf
,
'\\'
);
}
...
...
@@ -2369,34 +2460,6 @@ CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote,
for
(;;)
{
/* handle multiline quoted fields */
if
(
in_quote
&&
line_buf
.
cursor
>=
line_buf
.
len
)
{
bool
done
;
switch
(
eol_type
)
{
case
EOL_NL
:
appendStringInfoString
(
&
attribute_buf
,
"
\n
"
);
break
;
case
EOL_CR
:
appendStringInfoString
(
&
attribute_buf
,
"
\r
"
);
break
;
case
EOL_CRNL
:
appendStringInfoString
(
&
attribute_buf
,
"
\r\n
"
);
break
;
case
EOL_UNKNOWN
:
/* shouldn't happen - just keep going */
break
;
}
copy_lineno
++
;
done
=
CopyReadLine
();
if
(
done
&&
line_buf
.
len
==
0
)
break
;
start_cursor
=
line_buf
.
cursor
;
}
end_cursor
=
line_buf
.
cursor
;
if
(
line_buf
.
cursor
>=
line_buf
.
len
)
break
;
...
...
@@ -2629,25 +2692,6 @@ CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
!
use_quote
&&
(
c
=
*
test_string
)
!=
'\0'
;
test_string
+=
mblen
)
{
/*
* We don't know here what the surrounding line end characters
* might be. It might not even be under postgres' control. So
* we simple warn on ANY embedded line ending character.
*
* This warning will disappear when we make line parsing field-aware,
* so that we can reliably read in embedded line ending characters
* regardless of the file's line-end context.
*
*/
if
(
!
embedded_line_warning
&&
(
c
==
'\n'
||
c
==
'\r'
)
)
{
embedded_line_warning
=
true
;
elog
(
WARNING
,
"CSV fields with embedded linefeed or carriage return "
"characters might not be able to be reimported"
);
}
if
(
c
==
delimc
||
c
==
quotec
||
c
==
'\n'
||
c
==
'\r'
)
use_quote
=
true
;
if
(
!
same_encoding
)
...
...
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment