diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index 99cfde85f2c4486321689962827ac3b9d707404a..1696d61c257ec7c595bc19949f0352d395a9eef4 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -1,4 +1,4 @@ -<!-- $Header: /cvsroot/pgsql/doc/src/sgml/array.sgml,v 1.17 2001/11/28 20:49:09 petere Exp $ --> +<!-- $Header: /cvsroot/pgsql/doc/src/sgml/array.sgml,v 1.18 2001/11/29 21:02:41 tgl Exp $ --> <chapter id="arrays"> <title>Arrays</title> @@ -248,4 +248,36 @@ SELECT * FROM sal_emp WHERE pay_by_quarter **= 10000; </para> </note> + <formalpara> + <title>Quoting array elements.</title> + <para> + As shown above, when writing an array literal value you may write double + quotes around any individual array + element. You <emphasis>must</> do so if the element value would otherwise + confuse the array-value parser. For example, elements containing curly + braces, commas, double quotes, backslashes, or white space must be + double-quoted. To put a double quote or backslash in an array element + value, precede it with a backslash. + </para> + </formalpara> + + <tip> + <para> + Remember that what you write in an SQL query will first be interpreted + as a string literal, and then as an array. This doubles the number of + backslashes you need. For example, to insert a <type>text</> array + value containing a backslash and a double quote, you'd need to write +<programlisting> +INSERT ... VALUES ('{"\\\\","\\""}'); +</programlisting> + The string-literal processor removes one level of backslashes, so that + what arrives at the array-value parser looks like <literal>{"\\","\""}</>. + In turn, the strings fed to the <type>text</> datatype's input routine + become <literal>\</> and <literal>"</> respectively. (If we were working + with a datatype whose input routine also treated backslashes specially, + <type>bytea</> for example, we might need as many as eight backslashes + in the query to get one backslash into the stored array element.) + </para> + </tip> + </chapter> diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index f66878471448e2dbefe98b62450bfb8a839e3bde..0cfa371c998338a8fabbe2d78036c36d8f8ffbb6 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.71 2001/10/25 05:49:43 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.72 2001/11/29 21:02:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -346,6 +346,7 @@ ArrayCount(char *str, int *dim, int typdelim) * If element type is pass-by-ref, the Datums point to palloc'd values. * *nbytes is set to the amount of data space needed for the array, * including alignment padding but not including array header overhead. + * CAUTION: the contents of "arrayStr" may be modified! *--------------------------------------------------------------------------- */ static Datum * @@ -564,16 +565,13 @@ array_out(PG_FUNCTION_ARGS) char *p, *tmp, *retval, - **values, - delim[2]; + **values; + bool *needquotes; int nitems, overall_length, i, j, k, -#ifndef TCL_ARRAYS - l, -#endif indx[MAXDIM]; int ndim, *dim; @@ -581,26 +579,29 @@ array_out(PG_FUNCTION_ARGS) system_cache_lookup(element_type, false, &typlen, &typbyval, &typdelim, &typelem, &typoutput, &typalign); fmgr_info(typoutput, &outputproc); - sprintf(delim, "%c", typdelim); ndim = ARR_NDIM(v); dim = ARR_DIMS(v); nitems = ArrayGetNItems(ndim, dim); if (nitems == 0) { - retval = (char *) palloc(3); - retval[0] = '{'; - retval[1] = '}'; - retval[2] = '\0'; + retval = pstrdup("{}"); PG_RETURN_CSTRING(retval); } + /* + * Convert all values to string form, count total space needed + * (including any overhead such as escaping backslashes), + * and detect whether each item needs double quotes. + */ + values = (char **) palloc(nitems * sizeof(char *)); + needquotes = (bool *) palloc(nitems * sizeof(bool)); p = ARR_DATA_PTR(v); overall_length = 1; /* [TRH] don't forget to count \0 at end. */ - values = (char **) palloc(nitems * sizeof(char *)); for (i = 0; i < nitems; i++) { Datum itemvalue; + bool nq; itemvalue = fetch_att(p, typbyval, typlen); values[i] = DatumGetCString(FunctionCall3(&outputproc, @@ -612,20 +613,32 @@ array_out(PG_FUNCTION_ARGS) else p += INTALIGN(*(int32 *) p); - /* - * For the pair of double quotes - */ - if (!typbyval) - overall_length += 2; - + /* count data plus backslashes; detect chars needing quotes */ + nq = (values[i][0] == '\0'); /* force quotes for empty string */ for (tmp = values[i]; *tmp; tmp++) { + char ch = *tmp; + overall_length += 1; + if (ch == '"' || ch == '\\') + { + nq = true; #ifndef TCL_ARRAYS - if (*tmp == '"') overall_length += 1; #endif + } + else if (ch == '{' || ch == '}' || ch == typdelim || + isspace((unsigned char) ch)) + nq = true; } + + needquotes[i] = nq; + + /* Count the pair of double quotes, if needed */ + if (nq) + overall_length += 2; + + /* and the comma */ overall_length += 1; } @@ -634,41 +647,41 @@ array_out(PG_FUNCTION_ARGS) */ for (i = j = 0, k = 1; i < ndim; k *= dim[i++], j += k); - p = (char *) palloc(overall_length + 2 * j); - retval = p; + retval = (char *) palloc(overall_length + 2 * j); + p = retval; + +#define APPENDSTR(str) (strcpy(p, (str)), p += strlen(p)) +#define APPENDCHAR(ch) (*p++ = (ch), *p = '\0') - strcpy(p, "{"); + APPENDCHAR('{'); for (i = 0; i < ndim; indx[i++] = 0); j = 0; k = 0; do { for (i = j; i < ndim - 1; i++) - strcat(p, "{"); + APPENDCHAR('{'); - /* - * Surround anything that is not passed by value in double quotes. - * See above for more details. - */ - if (!typbyval) + if (needquotes[k]) { - strcat(p, "\""); + APPENDCHAR('"'); #ifndef TCL_ARRAYS - l = strlen(p); for (tmp = values[k]; *tmp; tmp++) { - if (*tmp == '"') - p[l++] = '\\'; - p[l++] = *tmp; + char ch = *tmp; + + if (ch == '"' || ch == '\\') + *p++ = '\\'; + *p++ = ch; } - p[l] = '\0'; + *p = '\0'; #else - strcat(p, values[k]); + APPENDSTR(values[k]); #endif - strcat(p, "\""); + APPENDCHAR('"'); } else - strcat(p, values[k]); + APPENDSTR(values[k]); pfree(values[k++]); for (i = ndim - 1; i >= 0; i--) @@ -676,16 +689,21 @@ array_out(PG_FUNCTION_ARGS) indx[i] = (indx[i] + 1) % dim[i]; if (indx[i]) { - strcat(p, delim); + APPENDCHAR(typdelim); break; } else - strcat(p, "}"); + APPENDCHAR('}'); } j = i; } while (j != -1); +#undef APPENDSTR +#undef APPENDCHAR + pfree(values); + pfree(needquotes); + PG_RETURN_CSTRING(retval); } diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 74c1008d907582c812530250cb9b8a46104cc467..f729fe1ed432c1d69da27e3950222b1b0b2150e3 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -28,11 +28,11 @@ INSERT INTO arrtest (a, b[2][2][1], c, d, e, f, g) INSERT INTO arrtest (a, b[1][2][2], c, d[2][1]) VALUES ('{}', '{3,4}', '{foo,bar}', '{bar,foo}'); SELECT * FROM arrtest; - a | b | c | d | e | f | g --------------+-----------------+---------------+-------------------+---------------+-------------------+----------------- - {1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | | {} | {} - {11,12,23} | {{3,4},{4,5}} | {"foobar"} | {{"elt1","elt2"}} | {"3.4","6.7"} | {"abc ","abcde"} | {"abc","abcde"} - {} | {3,4} | {"foo","bar"} | {"bar","foo"} | | | + a | b | c | d | e | f | g +-------------+-----------------+-----------+---------------+-----------+-----------------+------------- + {1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | | {} | {} + {11,12,23} | {{3,4},{4,5}} | {foobar} | {{elt1,elt2}} | {3.4,6.7} | {"abc ",abcde} | {abc,abcde} + {} | {3,4} | {foo,bar} | {bar,foo} | | | (3 rows) SELECT arrtest.a[1], @@ -62,11 +62,11 @@ SELECT a[1:3], c[1:2], d[1:1][1:2] FROM arrtest; - a | b | c | d -------------+-----------------+---------------+------------------- - {1,2,3} | {{{0,0},{1,2}}} | | - {11,12,23} | | {"foobar"} | {{"elt1","elt2"}} - | | {"foo","bar"} | + a | b | c | d +------------+-----------------+-----------+--------------- + {1,2,3} | {{{0,0},{1,2}}} | | + {11,12,23} | | {foobar} | {{elt1,elt2}} + | | {foo,bar} | (3 rows) SELECT array_dims(a) AS a,array_dims(b) AS b,array_dims(c) AS c @@ -98,11 +98,11 @@ UPDATE arrtest SET c[2:2] = '{"new_word"}' WHERE array_dims(c) is not null; SELECT a,b,c FROM arrtest; - a | b | c ----------------+-----------------------+----------------------- + a | b | c +---------------+-----------------------+------------------- {16,25,3,4,5} | {{{113,142},{1,147}}} | {} - {} | {3,4} | {"foo","new_word"} - {16,25,23} | {{3,4},{4,5}} | {"foobar","new_word"} + {} | {3,4} | {foo,new_word} + {16,25,23} | {{3,4},{4,5}} | {foobar,new_word} (3 rows) SELECT a[1:3], @@ -110,10 +110,10 @@ SELECT a[1:3], c[1:2], d[1:1][2:2] FROM arrtest; - a | b | c | d -------------+-----------------------+-----------------------+------------ - {16,25,3} | {{{113,142},{1,147}}} | | - | | {"foo","new_word"} | - {16,25,23} | | {"foobar","new_word"} | {{"elt2"}} + a | b | c | d +------------+-----------------------+-------------------+---------- + {16,25,3} | {{{113,142},{1,147}}} | | + | | {foo,new_word} | + {16,25,23} | | {foobar,new_word} | {{elt2}} (3 rows)