diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index a86ba6089a49395d072dc5fb99fddfd78569512a..2b34921e70b3eced925b605a35b9388574cb8938 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.222 2009/07/16 20:55:44 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.223 2009/08/04 16:08:35 tgl Exp $ --> <chapter Id="runtime-config"> <title>Server Configuration</title> @@ -4060,6 +4060,23 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-bytea-output" xreflabel="bytea_output"> + <term><varname>bytea_output</varname> (<type>enum</type>)</term> + <indexterm> + <primary><varname>bytea_output</> configuration parameter</primary> + </indexterm> + <listitem> + <para> + Sets the output format for values of type <type>bytea</type>. + Valid values are <literal>hex</literal> (the default) + and <literal>escape</literal> (the traditional PostgreSQL + format). See <xref linkend="datatype-binary"> for more + information. The <type>bytea</type> type always + accepts both formats on input, regardless of this setting. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-xmlbinary" xreflabel="xmlbinary"> <term><varname>xmlbinary</varname> (<type>enum</type>)</term> <indexterm> diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 774e4dbfb04fef9869459ed8bda661657608c56f..abe747a6964729f7ec4c063bf98d305d41f7240c 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.240 2009/07/08 17:21:55 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.241 2009/08/04 16:08:35 tgl Exp $ --> <chapter id="datatype"> <title id="datatype-title">Data Types</title> @@ -1177,7 +1177,7 @@ SELECT b, char_length(b) FROM test2; <para> A binary string is a sequence of octets (or bytes). Binary strings are distinguished from character strings in two - ways: First, binary strings specifically allow storing + ways. First, binary strings specifically allow storing octets of value zero and other <quote>non-printable</quote> octets (usually, octets outside the range 32 to 126). Character strings disallow zero octets, and also disallow any @@ -1191,13 +1191,82 @@ SELECT b, char_length(b) FROM test2; </para> <para> - When entering <type>bytea</type> values, octets of certain - values <emphasis>must</emphasis> be escaped (but all octet - values <emphasis>can</emphasis> be escaped) when used as part - of a string literal in an <acronym>SQL</acronym> statement. In + The <type>bytea</type> type supports two external formats for + input and output: <productname>PostgreSQL</productname>'s historical + <quote>escape</quote> format, and <quote>hex</quote> format. Both + of these are always accepted on input. The output format depends + on the configuration parameter <xref linkend="guc-bytea-output">; + the default is hex. (Note that the hex format was introduced in + <productname>PostgreSQL</productname> 8.5; earlier versions and some + tools don't understand it.) + </para> + + <para> + The <acronym>SQL</acronym> standard defines a different binary + string type, called <type>BLOB</type> or <type>BINARY LARGE + OBJECT</type>. The input format is different from + <type>bytea</type>, but the provided functions and operators are + mostly the same. + </para> + + <sect2> + <title><type>bytea</> hex format</title> + + <para> + The <quote>hex</> format encodes binary data as 2 hexadecimal digits + per byte, most significant nibble first. The entire string is + preceded by the sequence <literal>\x</literal> (to distinguish it + from the escape format). In some contexts, the initial backslash may + need to be escaped by doubling it, in the same cases in which backslashes + have to be doubled in escape format; details appear below. + The hexadecimal digits can + be either upper or lower case, and whitespace is permitted between + digit pairs (but not within a digit pair nor in the starting + <literal>\x</literal> sequence). + The hex format is compatible with a wide + range of external applications and protocols, and it tends to be + faster to convert than the escape format, so its use is preferred. + </para> + + <para> + Example: +<programlisting> +SELECT E'\\xDEADBEEF'; +</programlisting> + </para> + </sect2> + + <sect2> + <title><type>bytea</> escape format</title> + + <para> + The <quote>escape</quote> format is the traditional + <productname>PostgreSQL</productname> format for the <type>bytea</type> + type. It + takes the approach of representing a binary string as a sequence + of ASCII characters, while converting those bytes that cannot be + represented as an ASCII character into special escape sequences. + If, from the point of view of the application, representing bytes + as characters makes sense, then this representation can be + convenient. But in practice it is usually confusing becauses it + fuzzes up the distinction between binary strings and character + strings, and also the particular escape mechanism that was chosen is + somewhat unwieldy. So this format should probably be avoided + for most new applications. + </para> + + <para> + When entering <type>bytea</type> values in escape format, + octets of certain + values <emphasis>must</emphasis> be escaped, while all octet + values <emphasis>can</emphasis> be escaped. In general, to escape an octet, convert it into its three-digit octal value and precede it - by two backslashes. <xref linkend="datatype-binary-sqlesc"> + by a backslash (or two backslashes, if writing the value as a + literal using escape string syntax). + Backslash itself (octet value 92) can alternatively be represented by + double backslashes. + <xref linkend="datatype-binary-sqlesc"> shows the characters that must be escaped, and gives the alternative escape sequences where applicable. </para> @@ -1343,14 +1412,7 @@ SELECT b, char_length(b) FROM test2; have to escape line feeds and carriage returns if your interface automatically translates these. </para> - - <para> - The <acronym>SQL</acronym> standard defines a different binary - string type, called <type>BLOB</type> or <type>BINARY LARGE - OBJECT</type>. The input format is different from - <type>bytea</type>, but the provided functions and operators are - mostly the same. - </para> + </sect2> </sect1> diff --git a/src/backend/catalog/pg_largeobject.c b/src/backend/catalog/pg_largeobject.c index 925d21387b88d68f83a7ca8c4ff748a00dfb857b..313ccdd3f07b4e8f1d978dcd9f2adc75c2ca6425 100644 --- a/src/backend/catalog/pg_largeobject.c +++ b/src/backend/catalog/pg_largeobject.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/pg_largeobject.c,v 1.32 2009/01/01 17:23:37 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/pg_largeobject.c,v 1.33 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,7 +18,7 @@ #include "access/heapam.h" #include "catalog/indexing.h" #include "catalog/pg_largeobject.h" -#include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/rel.h" #include "utils/tqual.h" diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 7bc82127893bd666ce085c269298d0b713a69bdd..c1f55cfcf931e33d2a46708828b0c79e97dfb97e 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.251 2009/07/30 02:45:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.252 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index b3f96eb773c0484c1244147e7e991eb543b8b006..3930acf05a7245c042e4c67516041e773a868aa3 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.240 2009/06/11 14:48:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.241 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,6 +31,7 @@ #include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" #include "utils/selfuncs.h" diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 480b85cefcbae4d197a0bfc6428f22bea035b051..e581e3bc42dfecd2f2940a4a7f6f1f9ea88a011e 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.23 2009/01/01 17:23:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.24 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,7 +109,7 @@ binary_decode(PG_FUNCTION_ARGS) * HEX */ -static const char *hextbl = "0123456789abcdef"; +static const char hextbl[] = "0123456789abcdef"; static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -122,7 +122,7 @@ static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; -static unsigned +unsigned hex_encode(const char *src, unsigned len, char *dst) { const char *end = src + len; @@ -136,7 +136,7 @@ hex_encode(const char *src, unsigned len, char *dst) return len * 2; } -static char +static inline char get_hex(char c) { int res = -1; @@ -152,7 +152,7 @@ get_hex(char c) return (char) res; } -static unsigned +unsigned hex_decode(const char *src, unsigned len, char *dst) { const char *s, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 3d60885a79511149c6e3e11e879a879e5dd54a95..e85ab06819c80d7a7df2ee4382e4f87ec114afd7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.261 2009/06/11 14:49:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.262 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,6 +109,7 @@ #include "parser/parse_coerce.h" #include "parser/parsetree.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/date.h" #include "utils/datum.h" #include "utils/fmgroids.h" diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index b9b54e6db636fedd93ea3ea65e5087323dd78a6c..c524454432e03b718c13895e9e558db195f252df 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.171 2009/06/11 14:49:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.172 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,10 +24,14 @@ #include "parser/scansup.h" #include "regex/regex.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + typedef struct varlena unknown; typedef struct @@ -186,10 +190,24 @@ byteain(PG_FUNCTION_ARGS) char *inputText = PG_GETARG_CSTRING(0); char *tp; char *rp; - int byte; + int bc; bytea *result; - for (byte = 0, tp = inputText; *tp != '\0'; byte ++) + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2)/2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode(inputText + 2, len - 2, VARDATA(result)); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) { if (tp[0] != '\\') tp++; @@ -204,7 +222,7 @@ byteain(PG_FUNCTION_ARGS) else { /* - * one backslash, not followed by 0 or ### valid octal + * one backslash, not followed by another or ### valid octal */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), @@ -212,10 +230,10 @@ byteain(PG_FUNCTION_ARGS) } } - byte +=VARHDRSZ; + bc += VARHDRSZ; - result = (bytea *) palloc(byte); - SET_VARSIZE(result, byte); + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); tp = inputText; rp = VARDATA(result); @@ -228,11 +246,11 @@ byteain(PG_FUNCTION_ARGS) (tp[2] >= '0' && tp[2] <= '7') && (tp[3] >= '0' && tp[3] <= '7')) { - byte = VAL(tp[1]); - byte <<=3; - byte +=VAL(tp[2]); - byte <<=3; - *rp++ = byte +VAL(tp[3]); + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); tp += 4; } @@ -259,21 +277,30 @@ byteain(PG_FUNCTION_ARGS) /* * byteaout - converts to printable representation of byte array * - * Non-printable characters are inserted as '\nnn' (octal) and '\' as - * '\\'. - * - * NULL vlena should be an error--returning string with NULL for now. + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. */ Datum byteaout(PG_FUNCTION_ARGS) { bytea *vlena = PG_GETARG_BYTEA_PP(0); char *result; - char *vp; char *rp; - int val; /* holds unprintable chars */ - int i; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; int len; + int i; len = 1; /* empty string has 1 char */ vp = VARDATA_ANY(vlena); @@ -297,6 +324,8 @@ byteaout(PG_FUNCTION_ARGS) } else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) { + int val; /* holds unprintable chars */ + val = *vp; rp[0] = '\\'; rp[3] = DIG(val & 07); @@ -309,6 +338,13 @@ byteaout(PG_FUNCTION_ARGS) else *rp++ = *vp; } + } + else + { + elog(ERROR, "unrecognized bytea_output setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } *rp = '\0'; PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 76d3ec9da65470c59ba5aeb7aeac158f9685bb7f..264b45451a966e2c959170f8303030813c3dde90 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut <peter_e@gmx.net>. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.509 2009/07/22 17:00:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.510 2009/08/04 16:08:36 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -61,6 +61,7 @@ #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/guc_tables.h" #include "utils/memutils.h" #include "utils/pg_locale.h" @@ -180,6 +181,12 @@ static char *config_enum_get_options(struct config_enum * record, * NOTE! Option values may not contain double quotes! */ +static const struct config_enum_entry bytea_output_options[] = { + {"escape", BYTEA_OUTPUT_ESCAPE, false}, + {"hex", BYTEA_OUTPUT_HEX, false}, + {NULL, 0, false} +}; + /* * We have different sets for client and server message level options because * they sort slightly different (see "log" level) @@ -2540,6 +2547,15 @@ static struct config_enum ConfigureNamesEnum[] = BACKSLASH_QUOTE_SAFE_ENCODING, backslash_quote_options, NULL, NULL }, + { + {"bytea_output", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the output format for bytea."), + NULL + }, + &bytea_output, + BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL + }, + { {"client_min_messages", PGC_USERSET, LOGGING_WHEN, gettext_noop("Sets the message levels that are sent to the client."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index e50d7a44f7b839d101ed1384fb1f0ce4972de7d5..41488e264f06a11a7a757fb13c71a218c3c1a3e2 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -424,6 +424,7 @@ #statement_timeout = 0 # in milliseconds, 0 is disabled #vacuum_freeze_min_age = 50000000 #vacuum_freeze_table_age = 150000000 +#bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 360eaf1caf72027ef0d7a0f1d49d13aec4efdb5f..b46c068d4ec75cb8ffd0157ccd2f92381aee7105 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -12,7 +12,7 @@ * by PostgreSQL * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump.c,v 1.544 2009/08/02 22:14:52 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump.c,v 1.545 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -11008,6 +11008,8 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) TableInfo *tbinfo = tginfo->tgtable; PQExpBuffer query; PQExpBuffer delqry; + char *tgargs; + size_t lentgargs; const char *p; int findx; @@ -11109,53 +11111,29 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) appendPQExpBuffer(query, "EXECUTE PROCEDURE %s(", fmtId(tginfo->tgfname)); - p = tginfo->tgargs; + tgargs = (char *) PQunescapeBytea(tginfo->tgargs, &lentgargs); + p = tgargs; for (findx = 0; findx < tginfo->tgnargs; findx++) { - const char *s = p; + /* find the embedded null that terminates this trigger argument */ + size_t tlen = strlen(p); - /* Set 'p' to end of arg string. marked by '\000' */ - for (;;) + if (p + tlen >= tgargs + lentgargs) { - p = strchr(p, '\\'); - if (p == NULL) - { - write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", - tginfo->tgargs, - tginfo->dobj.name, - tbinfo->dobj.name); - exit_nicely(); - } - p++; - if (*p == '\\') /* is it '\\'? */ - { - p++; - continue; - } - if (p[0] == '0' && p[1] == '0' && p[2] == '0') /* is it '\000'? */ - break; + /* hm, not found before end of bytea value... */ + write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", + tginfo->tgargs, + tginfo->dobj.name, + tbinfo->dobj.name); + exit_nicely(); } - p--; - - appendPQExpBufferChar(query, '\''); - while (s < p) - { - if (*s == '\'') - appendPQExpBufferChar(query, '\''); - /* - * bytea unconditionally doubles backslashes, so we suppress the - * doubling for standard_conforming_strings. - */ - if (fout->std_strings && *s == '\\' && s[1] == '\\') - s++; - appendPQExpBufferChar(query, *s++); - } - appendPQExpBufferChar(query, '\''); - appendPQExpBuffer(query, - (findx < tginfo->tgnargs - 1) ? ", " : ""); - p = p + 4; + if (findx > 0) + appendPQExpBuffer(query, ", "); + appendStringLiteralAH(query, p, fout); + p += tlen + 1; } + free(tgargs); appendPQExpBuffer(query, ");\n"); if (tginfo->tgenabled != 't' && tginfo->tgenabled != 'O') diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 4b92cbcb6084e3e7603b66f98a56beb9c4e14c38..b664799fc47dd8186833098abc20e716d36b18a0 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.337 2009/08/03 21:11:39 joe Exp $ + * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.338 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -138,6 +138,12 @@ extern Datum char_text(PG_FUNCTION_ARGS); extern Datum domain_in(PG_FUNCTION_ARGS); extern Datum domain_recv(PG_FUNCTION_ARGS); +/* encode.c */ +extern Datum binary_encode(PG_FUNCTION_ARGS); +extern Datum binary_decode(PG_FUNCTION_ARGS); +extern unsigned hex_encode(const char *src, unsigned len, char *dst); +extern unsigned hex_decode(const char *src, unsigned len, char *dst); + /* enum.c */ extern Datum enum_in(PG_FUNCTION_ARGS); extern Datum enum_out(PG_FUNCTION_ARGS); @@ -711,28 +717,6 @@ extern Datum unknownout(PG_FUNCTION_ARGS); extern Datum unknownrecv(PG_FUNCTION_ARGS); extern Datum unknownsend(PG_FUNCTION_ARGS); -extern Datum byteain(PG_FUNCTION_ARGS); -extern Datum byteaout(PG_FUNCTION_ARGS); -extern Datum bytearecv(PG_FUNCTION_ARGS); -extern Datum byteasend(PG_FUNCTION_ARGS); -extern Datum byteaoctetlen(PG_FUNCTION_ARGS); -extern Datum byteaGetByte(PG_FUNCTION_ARGS); -extern Datum byteaGetBit(PG_FUNCTION_ARGS); -extern Datum byteaSetByte(PG_FUNCTION_ARGS); -extern Datum byteaSetBit(PG_FUNCTION_ARGS); -extern Datum binary_encode(PG_FUNCTION_ARGS); -extern Datum binary_decode(PG_FUNCTION_ARGS); -extern Datum byteaeq(PG_FUNCTION_ARGS); -extern Datum byteane(PG_FUNCTION_ARGS); -extern Datum bytealt(PG_FUNCTION_ARGS); -extern Datum byteale(PG_FUNCTION_ARGS); -extern Datum byteagt(PG_FUNCTION_ARGS); -extern Datum byteage(PG_FUNCTION_ARGS); -extern Datum byteacmp(PG_FUNCTION_ARGS); -extern Datum byteacat(PG_FUNCTION_ARGS); -extern Datum byteapos(PG_FUNCTION_ARGS); -extern Datum bytea_substr(PG_FUNCTION_ARGS); -extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); extern Datum pg_column_size(PG_FUNCTION_ARGS); /* version.c */ diff --git a/src/include/utils/bytea.h b/src/include/utils/bytea.h new file mode 100644 index 0000000000000000000000000000000000000000..8750d6d0e9c9338c69f3e0c7b5335aa38b80718f --- /dev/null +++ b/src/include/utils/bytea.h @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * bytea.h + * Declarations for BYTEA data type support. + * + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/utils/bytea.h,v 1.1 2009/08/04 16:08:36 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef BYTEA_H +#define BYTEA_H + +#include "fmgr.h" + + +typedef enum +{ + BYTEA_OUTPUT_ESCAPE, + BYTEA_OUTPUT_HEX +} ByteaOutputType; + +extern int bytea_output; /* ByteaOutputType, but int for GUC enum */ + +/* functions are in utils/adt/varlena.c */ +extern Datum byteain(PG_FUNCTION_ARGS); +extern Datum byteaout(PG_FUNCTION_ARGS); +extern Datum bytearecv(PG_FUNCTION_ARGS); +extern Datum byteasend(PG_FUNCTION_ARGS); +extern Datum byteaoctetlen(PG_FUNCTION_ARGS); +extern Datum byteaGetByte(PG_FUNCTION_ARGS); +extern Datum byteaGetBit(PG_FUNCTION_ARGS); +extern Datum byteaSetByte(PG_FUNCTION_ARGS); +extern Datum byteaSetBit(PG_FUNCTION_ARGS); +extern Datum byteaeq(PG_FUNCTION_ARGS); +extern Datum byteane(PG_FUNCTION_ARGS); +extern Datum bytealt(PG_FUNCTION_ARGS); +extern Datum byteale(PG_FUNCTION_ARGS); +extern Datum byteagt(PG_FUNCTION_ARGS); +extern Datum byteage(PG_FUNCTION_ARGS); +extern Datum byteacmp(PG_FUNCTION_ARGS); +extern Datum byteacat(PG_FUNCTION_ARGS); +extern Datum byteapos(PG_FUNCTION_ARGS); +extern Datum bytea_substr(PG_FUNCTION_ARGS); +extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); + +#endif /* BYTEA_H */ diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index c00f5eae6ab1b005a737a05e5cda5abbe27604bc..f1318a4a942d1ffec46d7b5ffe3991d5f47a57f3 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.203 2009/06/11 14:49:13 momjian Exp $ + * $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.204 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3167,6 +3167,29 @@ PQescapeBytea(const unsigned char *from, size_t from_length, size_t *to_length) } +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static inline char +get_hex(char c) +{ + int res = -1; + + if (c > 0 && c < 127) + res = hexlookup[(unsigned char) c]; + + return (char) res; +} + + #define ISFIRSTOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '3') #define ISOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '7') #define OCTVAL(CH) ((CH) - '0') @@ -3198,6 +3221,40 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) strtextlen = strlen((const char *) strtext); + if (strtext[0] == '\\' && strtext[1] == 'x') + { + const unsigned char *s; + unsigned char *p; + + buflen = (strtextlen - 2)/2; + /* Avoid unportable malloc(0) */ + buffer = (unsigned char *) malloc(buflen > 0 ? buflen : 1); + if (buffer == NULL) + return NULL; + + s = strtext + 2; + p = buffer; + while (*s) + { + char v1, + v2; + + /* + * Bad input is silently ignored. Note that this includes + * whitespace between hex pairs, which is allowed by byteain. + */ + v1 = get_hex(*s++); + if (!*s || v1 == (char) -1) + continue; + v2 = get_hex(*s++); + if (v2 != (char) -1) + *p++ = (v1 << 4) | v2; + } + + buflen = p - buffer; + } + else + { /* * Length of input is max length of output, but add one to avoid * unportable malloc(0) if input is zero-length. @@ -3244,6 +3301,7 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) } } buflen = j; /* buflen is the length of the dequoted data */ + } /* Shrink the buffer to be no larger than necessary */ /* +1 avoids unportable behavior when buflen==0 */ diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index e80e1a45343eabf1196257ae7c4f427ea8bf3d29..82eca262f0921d2422bc32e89d327bc7f104f3c0 100644 --- a/src/test/regress/expected/conversion.out +++ b/src/test/regress/expected/conversion.out @@ -1,3 +1,5 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- -- create user defined conversion -- diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 1241a2ace63a04ecfbb8408e9b4175f183b6cad1..392f48ef8c661b5a8a8134eb573798af0bea343c 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -97,6 +97,99 @@ LINE 1: SELECT U&'wrong: +0061' UESCAPE '+'; ^ DETAIL: String constants with Unicode escapes cannot be used when standard_conforming_strings is off. RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\xDeAdBeE'::bytea; +ERROR: invalid hexadecimal data: odd number of digits +LINE 1: SELECT E'\\xDeAdBeE'::bytea; + ^ +SELECT E'\\xDeAdBeEx'::bytea; +ERROR: invalid hexadecimal digit: "x" +LINE 1: SELECT E'\\xDeAdBeEx'::bytea; + ^ +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------ + \xde00beef +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +-------------------- + \x4465416442654566 +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +-------------------- + \x4465006442654566 +(1 row) + +SELECT E'De\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\678dBeEf'::bytea; +ERROR: invalid input syntax for type bytea +LINE 1: SELECT E'De\\678dBeEf'::bytea; + ^ +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------------ + \336\000\276\357 +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +---------- + DeAdBeEf +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +------------- + De\000dBeEf +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +---------- + DeSdBeEf +(1 row) + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types diff --git a/src/test/regress/input/largeobject.source b/src/test/regress/input/largeobject.source index 46ba9261ac5fe9426217da76d2b769103976e23b..807cfd7cc466acee701811178428e3e23c5e62e5 100644 --- a/src/test/regress/input/largeobject.source +++ b/src/test/regress/input/largeobject.source @@ -2,6 +2,9 @@ -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject.source b/src/test/regress/output/largeobject.source index 9d69f6c913e2ebd7bfade32b9546450280594eae..d7468bb5131ae7da865af751b1027258bbb7d6c7 100644 --- a/src/test/regress/output/largeobject.source +++ b/src/test/regress/output/largeobject.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject_1.source b/src/test/regress/output/largeobject_1.source index 1fbc29c25171d6e2bc02b4d49012e990f1190a09..84e916fea451c5b545f5ba2043a57d93299c622c 100644 --- a/src/test/regress/output/largeobject_1.source +++ b/src/test/regress/output/largeobject_1.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 99a9178315e1282e2df49866b48f100df9a1e973..be194eec1f1265e58fd32bc7a14749d97ffe9390 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -1,3 +1,6 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- -- create user defined conversion -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 681a0e1e62c65c5bbe4f2ab0f665d859d64a8a1d..63df9402ed70aae6eecc299727db181d001ba1d0 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -43,6 +43,27 @@ SELECT U&'wrong: +0061' UESCAPE '+'; RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDeAdBeE'::bytea; +SELECT E'\\xDeAdBeEx'::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\123dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; +SELECT E'De\\678dBeEf'::bytea; + +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types