diff --git a/doc/src/sgml/errcodes.sgml b/doc/src/sgml/errcodes.sgml index ad556d01516238d9fe2cc7a62f834cb5021d42b4..71b4a8cd4063702a091e2df64a4625a86a0d914d 100644 --- a/doc/src/sgml/errcodes.sgml +++ b/doc/src/sgml/errcodes.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.12 2005/01/06 01:49:24 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.13 2005/06/26 19:16:04 tgl Exp $ --> <appendix id="errcodes-appendix"> <title><productname>PostgreSQL</productname> Error Codes</title> @@ -370,6 +370,11 @@ <entry>INVALID ESCAPE SEQUENCE</entry> </row> +<row> +<entry><literal>22P06</literal></entry> +<entry>NONSTANDARD USE OF ESCAPE CHARACTER</entry> +</row> + <row> <entry><literal>22010</literal></entry> <entry>INVALID INDICATOR PARAMETER VALUE</entry> diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 9c1b94e2be7660e2c4b160824d3aa1543905556c..a5bde7fc76e3cb990797a4fdda12462880b4ae5a 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/libpq.sgml,v 1.186 2005/06/21 04:02:29 tgl Exp $ +$PostgreSQL: pgsql/doc/src/sgml/libpq.sgml,v 1.187 2005/06/26 19:16:04 tgl Exp $ --> <chapter id="libpq"> @@ -286,7 +286,7 @@ PGconn *PQconnectdb(const char *conninfo); Kerberos service name to use when authenticating with Kerberos 4 or 5. This must match the service name specified in the server configuration for Kerberos authentication to succeed. (See also - <xref linkend="kerberos-auth">.) + <xref linkend="kerberos-auth">.) </para> </listitem> </varlistentry> @@ -888,10 +888,13 @@ Parameters reported as of the current release include <literal>is_superuser</>, <literal>session_authorization</>, <literal>DateStyle</>, -<literal>TimeZone</>, and -<literal>integer_datetimes</>. +<literal>TimeZone</>, +<literal>integer_datetimes</>, and +<literal>standard_compliant_strings</>. (<literal>server_encoding</>, <literal>TimeZone</>, and -<literal>integer_datetimes</> were not reported by releases before 8.0.) +<literal>integer_datetimes</> were not reported by releases before 8.0; +<literal>standard_compliant_strings</> was not reported by releases +before 8.1.) Note that <literal>server_version</>, <literal>server_encoding</> and @@ -913,6 +916,14 @@ see also <function>PQserverVersion</>, which returns the information in a numeric form that is much easier to compare against. </para> +<para> +If no value for <literal>standard_compliant_strings</> is reported, +applications may assume it is <literal>false</>, that is, backslashes +are treated as escapes in string literals. Also, the presence of this +parameter may be taken as an indication that the escape string syntax +(<literal>E'...'</>) is accepted. +</para> + <para> Although the returned pointer is declared <literal>const</>, it in fact points to mutable storage associated with the <literal>PGconn</> structure. diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index cd6fa0e94f0e6fccec0af3cb6ecb062bec0e8255..7ebcfc63a34381e3874ee476664eabccd9e10c10 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/protocol.sgml,v 1.59 2005/06/22 15:19:43 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/protocol.sgml,v 1.60 2005/06/26 19:16:04 tgl Exp $ --> <chapter id="protocol"> <title>Frontend/Backend Protocol</title> @@ -1072,10 +1072,13 @@ <literal>is_superuser</>, <literal>session_authorization</>, <literal>DateStyle</>, - <literal>TimeZone</>, and - <literal>integer_datetimes</>. + <literal>TimeZone</>, + <literal>integer_datetimes</>, and + <literal>standard_compliant_strings</>. (<literal>server_encoding</>, <literal>TimeZone</>, and - <literal>integer_datetimes</> were not reported by releases before 8.0.) + <literal>integer_datetimes</> were not reported by releases before 8.0; + <literal>standard_compliant_strings</> was not reported by releases + before 8.1.) Note that <literal>server_version</>, <literal>server_encoding</> and diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 3841b0dee2d6ab3da034fdcfc87c139c0e5a3a54..c0b3e65ba78a6a39bad6c94c4fedef612b9110f4 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.331 2005/06/26 03:03:17 momjian Exp $ +$PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.332 2005/06/26 19:16:04 tgl Exp $ --> <chapter Id="runtime"> @@ -3766,13 +3766,11 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' <listitem> <para> When <literal>on</>, a warning is issued if a backslash - (<literal>\</>) appears in a ordinary, non-escape syntax - (<literal>''</>) string. To log the statement that generated the - warning, set <varname>log_min_error_statement</> to - <literal>error</>. The default is off. + (<literal>\</>) appears in an ordinary string literal + (<literal>'...'</> syntax). The default is <literal>off</>. </para> <para> - Escape string syntax (<literal>E''</>) should be used for + Escape string syntax (<literal>E'...'</>) should be used for escapes, because in future versions of <productname>PostgreSQL</productname> ordinary strings will have the standard-compliant behavior of treating backslashes @@ -3988,22 +3986,7 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' </listitem> </varlistentry> - <varlistentry id="guc-escape-string-syntax" xreflabel="escape_string_syntax"> - <term><varname>escape_string_syntax</varname> (<type>boolean</type>)</term> - <indexterm><primary>strings</><secondary>escape</></> - <indexterm> - <primary><varname>escape_string_syntax</> configuration parameter</primary> - </indexterm> - <listitem> - <para> - Reports whether escape string syntax (<literal>E''</>) is - supported. This variable is used by applications that need to - determine if escape string syntax can be used in their code. - </para> - </listitem> - </varlistentry> - - <varlistentry id="guc-sql-standard-strings" xreflabel="standard_compliant_strings"> + <varlistentry id="guc-standard-compliant-strings" xreflabel="standard_compliant_strings"> <term><varname>standard_compliant_strings</varname> (<type>boolean</type>)</term> <indexterm><primary>strings</><secondary>escape</></> <indexterm> @@ -4011,10 +3994,16 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' </indexterm> <listitem> <para> - Reports whether ordinary, non-escape syntax strings - (<literal>''</>) treat backslashes literally, as specified in - the SQL standard. This variable is used by applications that - need to know how ordinary strings are processed`. + Reports whether ordinary string literals + (<literal>'...'</>) treat backslashes literally, as specified in + the SQL standard. The value is currently always <literal>false</>, + indicating that backslashes are treated as escapes. It is planned + that this will change to <literal>true</> in a future + <productname>PostgreSQL</productname> release when string literal + syntax changes to meet the standard. Applications may check this + parameter to determine how string literals will be processed. + The presence of this parameter can also be taken as an indication + that the escape string syntax (<literal>E'...'</>) is supported. </para> </listitem> </varlistentry> diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index 3d8d457c561f5d1d99f683d98c184fb83d814e6c..0d3d7f19f1896d3dae07f4b7e8ebe28e1c0f007a 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.101 2005/06/26 03:03:21 momjian Exp $ +$PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.102 2005/06/26 19:16:05 tgl Exp $ --> <chapter id="sql-syntax"> @@ -249,7 +249,7 @@ UPDATE "my_table" SET "a" = 5; <productname>PostgreSQL</productname> also allows single quotes to be escaped with a backslash (<literal>\'</literal>). However, future versions of <productname>PostgreSQL</productname> will not - support this so applications using this should convert to the + allow this, so applications using backslashes should convert to the standard-compliant method outlined above. </para> @@ -276,8 +276,8 @@ UPDATE "my_table" SET "a" = 5; eventually treat backslashes as literal characters to be standard-compliant. The proper way to specify escape processing is to use the escape string syntax to indicate that escape - processing is desired. Escape string syntax is specified by placing - the the letter <literal>E</literal> (upper or lower case) before + processing is desired. Escape string syntax is specified by writing + the letter <literal>E</literal> (upper or lower case) just before the string, e.g. <literal>E'\041'</>. This method will work in all future versions of <productname>PostgreSQL</productname>. </para> diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 4c556a5fba29e0f992cde889bd3d6a43f6f70ba1..f9b0dbdb75b8670596883bb79f3b1ba65bc5be26 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -24,7 +24,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.126 2005/06/26 03:03:38 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.127 2005/06/26 19:16:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,10 +48,19 @@ extern YYSTYPE yylval; static int xcdepth = 0; /* depth of nesting in slash-star comments */ -static char *dolqstart; /* current $foo$ quote start string */ -static bool warn_on_first_escape; +static char *dolqstart; /* current $foo$ quote start string */ + +/* + * GUC variable. This is a DIRECT violation of the warning given at the + * head of gram.y, ie flex/bison code must not depend on any GUC variables; + * as such, changing its value can induce very unintuitive behavior. + * But we shall have to live with it as a short-term thing until the switch + * to SQL-standard string syntax is complete. + */ bool escape_string_warning; +static bool warn_on_first_escape; + /* * literalbuf is used to accumulate literal values when multiple rules * are needed to parse a single literal. Call startlit to reset buffer @@ -66,6 +75,7 @@ static int literalalloc; /* current allocated buffer size */ static void addlit(char *ytext, int yleng); static void addlitchar(unsigned char ychar); static char *litbufdup(void); +static int pg_err_position(void); static void check_escape_warning(void); /* @@ -188,9 +198,8 @@ xhinside [^']* /* National character */ xnstart [nN]{quote} -/* Quote string does not warn about escapes */ +/* Quoted string that allows backslash escapes */ xestart [eE]{quote} -xeinside [^']* /* Extended quote * xqdouble implements embedded quote, '''' @@ -446,17 +455,21 @@ other . { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of \\' in a normal string"), - errhint("Use '' to place quotes in strings, or use the escape string syntax (E'')."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of \\' in a string literal"), + errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."), + errposition(pg_err_position()))); + warn_on_first_escape = false; /* warn only once per string */ } else if (yytext[1] == '\\') { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of \\\\ in a normal string"), - errhint("Use the escape string syntax for backslashes, e.g. E'\\\\'."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of \\\\ in a string literal"), + errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."), + errposition(pg_err_position()))); + warn_on_first_escape = false; /* warn only once per string */ } else check_escape_warning(); @@ -707,14 +720,20 @@ other . %% -void -yyerror(const char *message) +static int +pg_err_position(void) { const char *loc = token_start ? token_start : yytext; - int cursorpos; /* in multibyte encodings, return index in characters not bytes */ - cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1; + return pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1; +} + +void +yyerror(const char *message) +{ + const char *loc = token_start ? token_start : yytext; + int cursorpos = pg_err_position(); if (*loc == YY_END_OF_BUFFER_CHAR) { @@ -852,8 +871,9 @@ check_escape_warning(void) { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of escapes in an ordinary string"), - errhint("Use the escape string syntax for escapes, e.g. E'\\r\\n'."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of escape in a string literal"), + errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."), + errposition(pg_err_position()))); warn_on_first_escape = false; /* warn only once per string */ } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 1738604942615a179627518ee1c926bfa5753b85..0ab8e742336ae1eabdf6b16d313ea40a41856e09 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut <peter_e@gmx.net>. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.269 2005/06/26 03:03:41 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.270 2005/06/26 19:16:06 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -190,7 +190,6 @@ static int max_index_keys; static int max_identifier_length; static int block_size; static bool integer_datetimes; -static bool escape_string_syntax; static bool standard_compliant_strings; /* should be static, but commands/variable.c needs to get at it */ @@ -877,26 +876,16 @@ static struct config_bool ConfigureNamesBool[] = { {"escape_string_warning", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, - gettext_noop("Warn about backslash escapes in ordinary, non-escape-syntax strings."), + gettext_noop("Warn about backslash escapes in ordinary string literals."), NULL }, &escape_string_warning, false, NULL, NULL }, - { - {"escape_string_syntax", PGC_INTERNAL, PRESET_OPTIONS, - gettext_noop("Escape string syntax (E'') is supported."), - NULL, - GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE - }, - &escape_string_syntax, - true, NULL, NULL - }, - { {"standard_compliant_strings", PGC_INTERNAL, PRESET_OPTIONS, - gettext_noop("'' strings treat backslashes literally."), + gettext_noop("'...' strings treat backslashes literally."), NULL, GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE }, diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l index a093b3703f5d6ad31f73088d24a995ecb0269904..f62fe6224f9d740e88afab2f6f73d5f79d24f99f 100644 --- a/src/bin/psql/psqlscan.l +++ b/src/bin/psql/psqlscan.l @@ -33,7 +33,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.14 2005/06/02 17:45:19 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.15 2005/06/26 19:16:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -235,17 +235,18 @@ quotefail {quote}{whitespace}*"-" xbstart [bB]{quote} xbinside [^']* -/* Hexadecimal number - */ +/* Hexadecimal number */ xhstart [xX]{quote} xhinside [^']* -/* National character - */ +/* National character */ xnstart [nN]{quote} +/* Quoted string that allows backslash escapes */ +xestart [eE]{quote} + /* Extended quote - * xqdouble implements embedded quote + * xqdouble implements embedded quote, '''' */ xqstart {quote} xqdouble {quote}{quote} @@ -450,6 +451,10 @@ other . BEGIN(xq); ECHO; } +{xestart} { + BEGIN(xq); + ECHO; + } <xq>{quotestop} | <xq>{quotefail} { yyless(1); diff --git a/src/include/utils/errcodes.h b/src/include/utils/errcodes.h index 646193def499a1e6d672512e1c8334dbdfbb5ff6..636c3f7a379e1f6984f3f41d7b9af0d6e5425daa 100644 --- a/src/include/utils/errcodes.h +++ b/src/include/utils/errcodes.h @@ -11,7 +11,7 @@ * * Copyright (c) 2003-2005, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.17 2005/01/01 20:44:30 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.18 2005/06/26 19:16:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -126,6 +126,7 @@ #define ERRCODE_INVALID_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', '0','1','9') #define ERRCODE_INVALID_ESCAPE_OCTET MAKE_SQLSTATE('2','2', '0','0','D') #define ERRCODE_INVALID_ESCAPE_SEQUENCE MAKE_SQLSTATE('2','2', '0','2','5') +#define ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', 'P','0','6') #define ERRCODE_INVALID_INDICATOR_PARAMETER_VALUE MAKE_SQLSTATE('2','2', '0','1','0') #define ERRCODE_INVALID_LIMIT_VALUE MAKE_SQLSTATE('2','2', '0','2','0') #define ERRCODE_INVALID_PARAMETER_VALUE MAKE_SQLSTATE('2','2', '0','2','3') diff --git a/src/pl/plpgsql/src/scan.l b/src/pl/plpgsql/src/scan.l index 680a58fc018bd33d1bb37759639fb661e620ba76..e69c8f17b02a60c98d4ef9012edc4aeb8c49f935 100644 --- a/src/pl/plpgsql/src/scan.l +++ b/src/pl/plpgsql/src/scan.l @@ -4,7 +4,7 @@ * procedural language * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.41 2005/06/22 01:35:02 neilc Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.42 2005/06/26 19:16:07 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -291,6 +291,12 @@ dump { return O_DUMP; } start_charpos = yytext; BEGIN(IN_STRING); } +[eE]' { + /* for now, treat the same as a regular literal */ + start_lineno = plpgsql_scanner_lineno(); + start_charpos = yytext; + BEGIN(IN_STRING); + } <IN_STRING>\\. { } <IN_STRING>\\ { /* can only happen with \ at EOF */ } <IN_STRING>'' { } @@ -563,18 +569,41 @@ plpgsql_get_string_value(void) memcpy(result, yytext + dolqlen, len); result[len] = '\0'; } + else if (*yytext == 'E' || *yytext == 'e') + { + /* Token is an E'...' string */ + result = (char *) palloc(yyleng + 1); /* more than enough room */ + len = 0; + for (cp = yytext + 2; *cp; cp++) + { + if (*cp == '\'') + { + if (cp[1] == '\'') + result[len++] = *cp++; + /* else it must be string end quote */ + } + else if (*cp == '\\') + { + if (cp[1] != '\0') /* just a paranoid check */ + result[len++] = *(++cp); + } + else + result[len++] = *cp; + } + result[len] = '\0'; + } else { /* Token is a '...' string */ result = (char *) palloc(yyleng + 1); /* more than enough room */ len = 0; - for (cp = yytext; *cp; cp++) + for (cp = yytext + 1; *cp; cp++) { if (*cp == '\'') { if (cp[1] == '\'') result[len++] = *cp++; - /* else it must be string start or end quote */ + /* else it must be string end quote */ } else if (*cp == '\\') {