From a2a8c7a662ec96537b6d1faba0770c516b921911 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Tue, 4 Aug 2009 16:08:37 +0000 Subject: [PATCH] Support hex-string input and output for type BYTEA. Both hex format and the traditional "escape" format are automatically handled on input. The output format is selected by the new GUC variable bytea_output. As committed, bytea_output defaults to HEX, which is an *incompatible change*. We will keep it this way for awhile for testing purposes, but should consider whether to switch to the more backwards-compatible default of ESCAPE before 8.5 is released. Peter Eisentraut --- doc/src/sgml/config.sgml | 19 +++- doc/src/sgml/datatype.sgml | 92 +++++++++++++++--- src/backend/catalog/pg_largeobject.c | 4 +- src/backend/commands/trigger.c | 3 +- src/backend/optimizer/path/indxpath.c | 3 +- src/backend/utils/adt/encode.c | 10 +- src/backend/utils/adt/selfuncs.c | 3 +- src/backend/utils/adt/varlena.c | 74 +++++++++++---- src/backend/utils/misc/guc.c | 18 +++- src/backend/utils/misc/postgresql.conf.sample | 1 + src/bin/pg_dump/pg_dump.c | 60 ++++-------- src/include/utils/builtins.h | 30 ++---- src/include/utils/bytea.h | 50 ++++++++++ src/interfaces/libpq/fe-exec.c | 60 +++++++++++- src/test/regress/expected/conversion.out | 2 + src/test/regress/expected/strings.out | 93 +++++++++++++++++++ src/test/regress/input/largeobject.source | 3 + src/test/regress/output/largeobject.source | 2 + src/test/regress/output/largeobject_1.source | 2 + src/test/regress/sql/conversion.sql | 3 + src/test/regress/sql/strings.sql | 21 +++++ 21 files changed, 442 insertions(+), 111 deletions(-) create mode 100644 src/include/utils/bytea.h diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index a86ba6089a4..2b34921e70b 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.222 2009/07/16 20:55:44 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.223 2009/08/04 16:08:35 tgl Exp $ --> <chapter Id="runtime-config"> <title>Server Configuration</title> @@ -4060,6 +4060,23 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-bytea-output" xreflabel="bytea_output"> + <term><varname>bytea_output</varname> (<type>enum</type>)</term> + <indexterm> + <primary><varname>bytea_output</> configuration parameter</primary> + </indexterm> + <listitem> + <para> + Sets the output format for values of type <type>bytea</type>. + Valid values are <literal>hex</literal> (the default) + and <literal>escape</literal> (the traditional PostgreSQL + format). See <xref linkend="datatype-binary"> for more + information. The <type>bytea</type> type always + accepts both formats on input, regardless of this setting. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-xmlbinary" xreflabel="xmlbinary"> <term><varname>xmlbinary</varname> (<type>enum</type>)</term> <indexterm> diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 774e4dbfb04..abe747a6964 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.240 2009/07/08 17:21:55 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.241 2009/08/04 16:08:35 tgl Exp $ --> <chapter id="datatype"> <title id="datatype-title">Data Types</title> @@ -1177,7 +1177,7 @@ SELECT b, char_length(b) FROM test2; <para> A binary string is a sequence of octets (or bytes). Binary strings are distinguished from character strings in two - ways: First, binary strings specifically allow storing + ways. First, binary strings specifically allow storing octets of value zero and other <quote>non-printable</quote> octets (usually, octets outside the range 32 to 126). Character strings disallow zero octets, and also disallow any @@ -1191,13 +1191,82 @@ SELECT b, char_length(b) FROM test2; </para> <para> - When entering <type>bytea</type> values, octets of certain - values <emphasis>must</emphasis> be escaped (but all octet - values <emphasis>can</emphasis> be escaped) when used as part - of a string literal in an <acronym>SQL</acronym> statement. In + The <type>bytea</type> type supports two external formats for + input and output: <productname>PostgreSQL</productname>'s historical + <quote>escape</quote> format, and <quote>hex</quote> format. Both + of these are always accepted on input. The output format depends + on the configuration parameter <xref linkend="guc-bytea-output">; + the default is hex. (Note that the hex format was introduced in + <productname>PostgreSQL</productname> 8.5; earlier versions and some + tools don't understand it.) + </para> + + <para> + The <acronym>SQL</acronym> standard defines a different binary + string type, called <type>BLOB</type> or <type>BINARY LARGE + OBJECT</type>. The input format is different from + <type>bytea</type>, but the provided functions and operators are + mostly the same. + </para> + + <sect2> + <title><type>bytea</> hex format</title> + + <para> + The <quote>hex</> format encodes binary data as 2 hexadecimal digits + per byte, most significant nibble first. The entire string is + preceded by the sequence <literal>\x</literal> (to distinguish it + from the escape format). In some contexts, the initial backslash may + need to be escaped by doubling it, in the same cases in which backslashes + have to be doubled in escape format; details appear below. + The hexadecimal digits can + be either upper or lower case, and whitespace is permitted between + digit pairs (but not within a digit pair nor in the starting + <literal>\x</literal> sequence). + The hex format is compatible with a wide + range of external applications and protocols, and it tends to be + faster to convert than the escape format, so its use is preferred. + </para> + + <para> + Example: +<programlisting> +SELECT E'\\xDEADBEEF'; +</programlisting> + </para> + </sect2> + + <sect2> + <title><type>bytea</> escape format</title> + + <para> + The <quote>escape</quote> format is the traditional + <productname>PostgreSQL</productname> format for the <type>bytea</type> + type. It + takes the approach of representing a binary string as a sequence + of ASCII characters, while converting those bytes that cannot be + represented as an ASCII character into special escape sequences. + If, from the point of view of the application, representing bytes + as characters makes sense, then this representation can be + convenient. But in practice it is usually confusing becauses it + fuzzes up the distinction between binary strings and character + strings, and also the particular escape mechanism that was chosen is + somewhat unwieldy. So this format should probably be avoided + for most new applications. + </para> + + <para> + When entering <type>bytea</type> values in escape format, + octets of certain + values <emphasis>must</emphasis> be escaped, while all octet + values <emphasis>can</emphasis> be escaped. In general, to escape an octet, convert it into its three-digit octal value and precede it - by two backslashes. <xref linkend="datatype-binary-sqlesc"> + by a backslash (or two backslashes, if writing the value as a + literal using escape string syntax). + Backslash itself (octet value 92) can alternatively be represented by + double backslashes. + <xref linkend="datatype-binary-sqlesc"> shows the characters that must be escaped, and gives the alternative escape sequences where applicable. </para> @@ -1343,14 +1412,7 @@ SELECT b, char_length(b) FROM test2; have to escape line feeds and carriage returns if your interface automatically translates these. </para> - - <para> - The <acronym>SQL</acronym> standard defines a different binary - string type, called <type>BLOB</type> or <type>BINARY LARGE - OBJECT</type>. The input format is different from - <type>bytea</type>, but the provided functions and operators are - mostly the same. - </para> + </sect2> </sect1> diff --git a/src/backend/catalog/pg_largeobject.c b/src/backend/catalog/pg_largeobject.c index 925d21387b8..313ccdd3f07 100644 --- a/src/backend/catalog/pg_largeobject.c +++ b/src/backend/catalog/pg_largeobject.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/pg_largeobject.c,v 1.32 2009/01/01 17:23:37 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/pg_largeobject.c,v 1.33 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,7 +18,7 @@ #include "access/heapam.h" #include "catalog/indexing.h" #include "catalog/pg_largeobject.h" -#include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/rel.h" #include "utils/tqual.h" diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 7bc82127893..c1f55cfcf93 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.251 2009/07/30 02:45:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.252 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index b3f96eb773c..3930acf05a7 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.240 2009/06/11 14:48:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.241 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,6 +31,7 @@ #include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" #include "utils/selfuncs.h" diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 480b85cefcb..e581e3bc42d 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.23 2009/01/01 17:23:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.24 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,7 +109,7 @@ binary_decode(PG_FUNCTION_ARGS) * HEX */ -static const char *hextbl = "0123456789abcdef"; +static const char hextbl[] = "0123456789abcdef"; static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -122,7 +122,7 @@ static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; -static unsigned +unsigned hex_encode(const char *src, unsigned len, char *dst) { const char *end = src + len; @@ -136,7 +136,7 @@ hex_encode(const char *src, unsigned len, char *dst) return len * 2; } -static char +static inline char get_hex(char c) { int res = -1; @@ -152,7 +152,7 @@ get_hex(char c) return (char) res; } -static unsigned +unsigned hex_decode(const char *src, unsigned len, char *dst) { const char *s, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 3d60885a795..e85ab06819c 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.261 2009/06/11 14:49:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.262 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,6 +109,7 @@ #include "parser/parse_coerce.h" #include "parser/parsetree.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/date.h" #include "utils/datum.h" #include "utils/fmgroids.h" diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index b9b54e6db63..c524454432e 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.171 2009/06/11 14:49:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.172 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,10 +24,14 @@ #include "parser/scansup.h" #include "regex/regex.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + typedef struct varlena unknown; typedef struct @@ -186,10 +190,24 @@ byteain(PG_FUNCTION_ARGS) char *inputText = PG_GETARG_CSTRING(0); char *tp; char *rp; - int byte; + int bc; bytea *result; - for (byte = 0, tp = inputText; *tp != '\0'; byte ++) + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2)/2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode(inputText + 2, len - 2, VARDATA(result)); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) { if (tp[0] != '\\') tp++; @@ -204,7 +222,7 @@ byteain(PG_FUNCTION_ARGS) else { /* - * one backslash, not followed by 0 or ### valid octal + * one backslash, not followed by another or ### valid octal */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), @@ -212,10 +230,10 @@ byteain(PG_FUNCTION_ARGS) } } - byte +=VARHDRSZ; + bc += VARHDRSZ; - result = (bytea *) palloc(byte); - SET_VARSIZE(result, byte); + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); tp = inputText; rp = VARDATA(result); @@ -228,11 +246,11 @@ byteain(PG_FUNCTION_ARGS) (tp[2] >= '0' && tp[2] <= '7') && (tp[3] >= '0' && tp[3] <= '7')) { - byte = VAL(tp[1]); - byte <<=3; - byte +=VAL(tp[2]); - byte <<=3; - *rp++ = byte +VAL(tp[3]); + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); tp += 4; } @@ -259,21 +277,30 @@ byteain(PG_FUNCTION_ARGS) /* * byteaout - converts to printable representation of byte array * - * Non-printable characters are inserted as '\nnn' (octal) and '\' as - * '\\'. - * - * NULL vlena should be an error--returning string with NULL for now. + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. */ Datum byteaout(PG_FUNCTION_ARGS) { bytea *vlena = PG_GETARG_BYTEA_PP(0); char *result; - char *vp; char *rp; - int val; /* holds unprintable chars */ - int i; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; int len; + int i; len = 1; /* empty string has 1 char */ vp = VARDATA_ANY(vlena); @@ -297,6 +324,8 @@ byteaout(PG_FUNCTION_ARGS) } else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) { + int val; /* holds unprintable chars */ + val = *vp; rp[0] = '\\'; rp[3] = DIG(val & 07); @@ -309,6 +338,13 @@ byteaout(PG_FUNCTION_ARGS) else *rp++ = *vp; } + } + else + { + elog(ERROR, "unrecognized bytea_output setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } *rp = '\0'; PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 76d3ec9da65..264b45451a9 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut <peter_e@gmx.net>. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.509 2009/07/22 17:00:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.510 2009/08/04 16:08:36 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -61,6 +61,7 @@ #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/guc_tables.h" #include "utils/memutils.h" #include "utils/pg_locale.h" @@ -180,6 +181,12 @@ static char *config_enum_get_options(struct config_enum * record, * NOTE! Option values may not contain double quotes! */ +static const struct config_enum_entry bytea_output_options[] = { + {"escape", BYTEA_OUTPUT_ESCAPE, false}, + {"hex", BYTEA_OUTPUT_HEX, false}, + {NULL, 0, false} +}; + /* * We have different sets for client and server message level options because * they sort slightly different (see "log" level) @@ -2540,6 +2547,15 @@ static struct config_enum ConfigureNamesEnum[] = BACKSLASH_QUOTE_SAFE_ENCODING, backslash_quote_options, NULL, NULL }, + { + {"bytea_output", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the output format for bytea."), + NULL + }, + &bytea_output, + BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL + }, + { {"client_min_messages", PGC_USERSET, LOGGING_WHEN, gettext_noop("Sets the message levels that are sent to the client."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index e50d7a44f7b..41488e264f0 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -424,6 +424,7 @@ #statement_timeout = 0 # in milliseconds, 0 is disabled #vacuum_freeze_min_age = 50000000 #vacuum_freeze_table_age = 150000000 +#bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 360eaf1caf7..b46c068d4ec 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -12,7 +12,7 @@ * by PostgreSQL * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump.c,v 1.544 2009/08/02 22:14:52 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump.c,v 1.545 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -11008,6 +11008,8 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) TableInfo *tbinfo = tginfo->tgtable; PQExpBuffer query; PQExpBuffer delqry; + char *tgargs; + size_t lentgargs; const char *p; int findx; @@ -11109,53 +11111,29 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) appendPQExpBuffer(query, "EXECUTE PROCEDURE %s(", fmtId(tginfo->tgfname)); - p = tginfo->tgargs; + tgargs = (char *) PQunescapeBytea(tginfo->tgargs, &lentgargs); + p = tgargs; for (findx = 0; findx < tginfo->tgnargs; findx++) { - const char *s = p; + /* find the embedded null that terminates this trigger argument */ + size_t tlen = strlen(p); - /* Set 'p' to end of arg string. marked by '\000' */ - for (;;) + if (p + tlen >= tgargs + lentgargs) { - p = strchr(p, '\\'); - if (p == NULL) - { - write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", - tginfo->tgargs, - tginfo->dobj.name, - tbinfo->dobj.name); - exit_nicely(); - } - p++; - if (*p == '\\') /* is it '\\'? */ - { - p++; - continue; - } - if (p[0] == '0' && p[1] == '0' && p[2] == '0') /* is it '\000'? */ - break; + /* hm, not found before end of bytea value... */ + write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", + tginfo->tgargs, + tginfo->dobj.name, + tbinfo->dobj.name); + exit_nicely(); } - p--; - - appendPQExpBufferChar(query, '\''); - while (s < p) - { - if (*s == '\'') - appendPQExpBufferChar(query, '\''); - /* - * bytea unconditionally doubles backslashes, so we suppress the - * doubling for standard_conforming_strings. - */ - if (fout->std_strings && *s == '\\' && s[1] == '\\') - s++; - appendPQExpBufferChar(query, *s++); - } - appendPQExpBufferChar(query, '\''); - appendPQExpBuffer(query, - (findx < tginfo->tgnargs - 1) ? ", " : ""); - p = p + 4; + if (findx > 0) + appendPQExpBuffer(query, ", "); + appendStringLiteralAH(query, p, fout); + p += tlen + 1; } + free(tgargs); appendPQExpBuffer(query, ");\n"); if (tginfo->tgenabled != 't' && tginfo->tgenabled != 'O') diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 4b92cbcb608..b664799fc47 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.337 2009/08/03 21:11:39 joe Exp $ + * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.338 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -138,6 +138,12 @@ extern Datum char_text(PG_FUNCTION_ARGS); extern Datum domain_in(PG_FUNCTION_ARGS); extern Datum domain_recv(PG_FUNCTION_ARGS); +/* encode.c */ +extern Datum binary_encode(PG_FUNCTION_ARGS); +extern Datum binary_decode(PG_FUNCTION_ARGS); +extern unsigned hex_encode(const char *src, unsigned len, char *dst); +extern unsigned hex_decode(const char *src, unsigned len, char *dst); + /* enum.c */ extern Datum enum_in(PG_FUNCTION_ARGS); extern Datum enum_out(PG_FUNCTION_ARGS); @@ -711,28 +717,6 @@ extern Datum unknownout(PG_FUNCTION_ARGS); extern Datum unknownrecv(PG_FUNCTION_ARGS); extern Datum unknownsend(PG_FUNCTION_ARGS); -extern Datum byteain(PG_FUNCTION_ARGS); -extern Datum byteaout(PG_FUNCTION_ARGS); -extern Datum bytearecv(PG_FUNCTION_ARGS); -extern Datum byteasend(PG_FUNCTION_ARGS); -extern Datum byteaoctetlen(PG_FUNCTION_ARGS); -extern Datum byteaGetByte(PG_FUNCTION_ARGS); -extern Datum byteaGetBit(PG_FUNCTION_ARGS); -extern Datum byteaSetByte(PG_FUNCTION_ARGS); -extern Datum byteaSetBit(PG_FUNCTION_ARGS); -extern Datum binary_encode(PG_FUNCTION_ARGS); -extern Datum binary_decode(PG_FUNCTION_ARGS); -extern Datum byteaeq(PG_FUNCTION_ARGS); -extern Datum byteane(PG_FUNCTION_ARGS); -extern Datum bytealt(PG_FUNCTION_ARGS); -extern Datum byteale(PG_FUNCTION_ARGS); -extern Datum byteagt(PG_FUNCTION_ARGS); -extern Datum byteage(PG_FUNCTION_ARGS); -extern Datum byteacmp(PG_FUNCTION_ARGS); -extern Datum byteacat(PG_FUNCTION_ARGS); -extern Datum byteapos(PG_FUNCTION_ARGS); -extern Datum bytea_substr(PG_FUNCTION_ARGS); -extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); extern Datum pg_column_size(PG_FUNCTION_ARGS); /* version.c */ diff --git a/src/include/utils/bytea.h b/src/include/utils/bytea.h new file mode 100644 index 00000000000..8750d6d0e9c --- /dev/null +++ b/src/include/utils/bytea.h @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * bytea.h + * Declarations for BYTEA data type support. + * + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/utils/bytea.h,v 1.1 2009/08/04 16:08:36 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef BYTEA_H +#define BYTEA_H + +#include "fmgr.h" + + +typedef enum +{ + BYTEA_OUTPUT_ESCAPE, + BYTEA_OUTPUT_HEX +} ByteaOutputType; + +extern int bytea_output; /* ByteaOutputType, but int for GUC enum */ + +/* functions are in utils/adt/varlena.c */ +extern Datum byteain(PG_FUNCTION_ARGS); +extern Datum byteaout(PG_FUNCTION_ARGS); +extern Datum bytearecv(PG_FUNCTION_ARGS); +extern Datum byteasend(PG_FUNCTION_ARGS); +extern Datum byteaoctetlen(PG_FUNCTION_ARGS); +extern Datum byteaGetByte(PG_FUNCTION_ARGS); +extern Datum byteaGetBit(PG_FUNCTION_ARGS); +extern Datum byteaSetByte(PG_FUNCTION_ARGS); +extern Datum byteaSetBit(PG_FUNCTION_ARGS); +extern Datum byteaeq(PG_FUNCTION_ARGS); +extern Datum byteane(PG_FUNCTION_ARGS); +extern Datum bytealt(PG_FUNCTION_ARGS); +extern Datum byteale(PG_FUNCTION_ARGS); +extern Datum byteagt(PG_FUNCTION_ARGS); +extern Datum byteage(PG_FUNCTION_ARGS); +extern Datum byteacmp(PG_FUNCTION_ARGS); +extern Datum byteacat(PG_FUNCTION_ARGS); +extern Datum byteapos(PG_FUNCTION_ARGS); +extern Datum bytea_substr(PG_FUNCTION_ARGS); +extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); + +#endif /* BYTEA_H */ diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index c00f5eae6ab..f1318a4a942 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.203 2009/06/11 14:49:13 momjian Exp $ + * $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.204 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3167,6 +3167,29 @@ PQescapeBytea(const unsigned char *from, size_t from_length, size_t *to_length) } +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static inline char +get_hex(char c) +{ + int res = -1; + + if (c > 0 && c < 127) + res = hexlookup[(unsigned char) c]; + + return (char) res; +} + + #define ISFIRSTOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '3') #define ISOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '7') #define OCTVAL(CH) ((CH) - '0') @@ -3198,6 +3221,40 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) strtextlen = strlen((const char *) strtext); + if (strtext[0] == '\\' && strtext[1] == 'x') + { + const unsigned char *s; + unsigned char *p; + + buflen = (strtextlen - 2)/2; + /* Avoid unportable malloc(0) */ + buffer = (unsigned char *) malloc(buflen > 0 ? buflen : 1); + if (buffer == NULL) + return NULL; + + s = strtext + 2; + p = buffer; + while (*s) + { + char v1, + v2; + + /* + * Bad input is silently ignored. Note that this includes + * whitespace between hex pairs, which is allowed by byteain. + */ + v1 = get_hex(*s++); + if (!*s || v1 == (char) -1) + continue; + v2 = get_hex(*s++); + if (v2 != (char) -1) + *p++ = (v1 << 4) | v2; + } + + buflen = p - buffer; + } + else + { /* * Length of input is max length of output, but add one to avoid * unportable malloc(0) if input is zero-length. @@ -3244,6 +3301,7 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) } } buflen = j; /* buflen is the length of the dequoted data */ + } /* Shrink the buffer to be no larger than necessary */ /* +1 avoids unportable behavior when buflen==0 */ diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index e80e1a45343..82eca262f09 100644 --- a/src/test/regress/expected/conversion.out +++ b/src/test/regress/expected/conversion.out @@ -1,3 +1,5 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- -- create user defined conversion -- diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 1241a2ace63..392f48ef8c6 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -97,6 +97,99 @@ LINE 1: SELECT U&'wrong: +0061' UESCAPE '+'; ^ DETAIL: String constants with Unicode escapes cannot be used when standard_conforming_strings is off. RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\xDeAdBeE'::bytea; +ERROR: invalid hexadecimal data: odd number of digits +LINE 1: SELECT E'\\xDeAdBeE'::bytea; + ^ +SELECT E'\\xDeAdBeEx'::bytea; +ERROR: invalid hexadecimal digit: "x" +LINE 1: SELECT E'\\xDeAdBeEx'::bytea; + ^ +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------ + \xde00beef +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +-------------------- + \x4465416442654566 +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +-------------------- + \x4465006442654566 +(1 row) + +SELECT E'De\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\678dBeEf'::bytea; +ERROR: invalid input syntax for type bytea +LINE 1: SELECT E'De\\678dBeEf'::bytea; + ^ +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------------ + \336\000\276\357 +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +---------- + DeAdBeEf +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +------------- + De\000dBeEf +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +---------- + DeSdBeEf +(1 row) + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types diff --git a/src/test/regress/input/largeobject.source b/src/test/regress/input/largeobject.source index 46ba9261ac5..807cfd7cc46 100644 --- a/src/test/regress/input/largeobject.source +++ b/src/test/regress/input/largeobject.source @@ -2,6 +2,9 @@ -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject.source b/src/test/regress/output/largeobject.source index 9d69f6c913e..d7468bb5131 100644 --- a/src/test/regress/output/largeobject.source +++ b/src/test/regress/output/largeobject.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject_1.source b/src/test/regress/output/largeobject_1.source index 1fbc29c2517..84e916fea45 100644 --- a/src/test/regress/output/largeobject_1.source +++ b/src/test/regress/output/largeobject_1.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 99a9178315e..be194eec1f1 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -1,3 +1,6 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- -- create user defined conversion -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 681a0e1e62c..63df9402ed7 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -43,6 +43,27 @@ SELECT U&'wrong: +0061' UESCAPE '+'; RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDeAdBeE'::bytea; +SELECT E'\\xDeAdBeEx'::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\123dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; +SELECT E'De\\678dBeEf'::bytea; + +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types -- GitLab