diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index ae2d74e8672094676f72d054f56f19e890bd21d4..e5ca426158ec33aacce9fea053b1c92acc2625b8 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/array.sgml,v 1.36 2004/08/05 03:29:11 joe Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/array.sgml,v 1.37 2004/08/08 05:01:51 joe Exp $ --> <sect1 id="arrays"> <title>Arrays</title> @@ -95,10 +95,12 @@ CREATE TABLE tictactoe ( </synopsis> where <replaceable>delim</replaceable> is the delimiter character for the type, as recorded in its <literal>pg_type</literal> entry. - (For all built-in types, this is the comma character - <quote><literal>,</literal></>.) Each - <replaceable>val</replaceable> is either a constant of the array - element type, or a subarray. An example of an array constant is + Among the standard data types provided in the + <productname>PostgreSQL</productname> distribution, type + <literal>box</> uses a semicolon (<literal>;</>) but all the others + use comma (<literal>,</>). Each <replaceable>val</replaceable> is + either a constant of the array element type, or a subarray. An example + of an array constant is <programlisting> '{{1,2,3},{4,5,6},{7,8,9}}' </programlisting> @@ -161,7 +163,7 @@ SELECT * FROM sal_emp; </para> <para> - The <literal>ARRAY</literal> expression syntax may also be used: + The <literal>ARRAY</> constructor syntax may also be used: <programlisting> INSERT INTO sal_emp VALUES ('Bill', @@ -176,8 +178,8 @@ INSERT INTO sal_emp Notice that the array elements are ordinary SQL constants or expressions; for instance, string literals are single quoted, instead of double quoted as they would be in an array literal. The <literal>ARRAY</> - expression syntax is discussed in more detail in <xref - linkend="sql-syntax-array-constructors">. + constructor syntax is discussed in more detail in + <xref linkend="sql-syntax-array-constructors">. </para> </sect2> @@ -524,10 +526,17 @@ SELECT * FROM sal_emp WHERE 10000 = ALL (pay_by_quarter); use comma.) In a multidimensional array, each dimension (row, plane, cube, etc.) gets its own level of curly braces, and delimiters must be written between adjacent curly-braced entities of the same level. - You may write whitespace before a left brace, after a right - brace, or before any individual item string. Whitespace after an item - is not ignored, however: after skipping leading whitespace, everything - up to the next right brace or delimiter is taken as the item value. + </para> + + <para> + The array output routine will put double quotes around element values + if they are empty strings or contain curly braces, delimiter characters, + double quotes, backslashes, or white space. Double quotes and backslashes + embedded in element values will be backslash-escaped. For numeric + data types it is safe to assume that double quotes will never appear, but + for textual data types one should be prepared to cope with either presence + or absence of quotes. (This is a change in behavior from pre-7.2 + <productname>PostgreSQL</productname> releases.) </para> <para> @@ -573,26 +582,22 @@ SELECT f1[1][-2][3] AS e1, f1[1][-1][5] AS e2 <para> As shown previously, when writing an array value you may write double - quotes around any individual array - element. You <emphasis>must</> do so if the element value would otherwise - confuse the array-value parser. For example, elements containing curly - braces, commas (or whatever the delimiter character is), double quotes, - backslashes, or leading white space must be double-quoted. To put a double - quote or backslash in a quoted array element value, precede it with a - backslash. - Alternatively, you can use backslash-escaping to protect all data characters - that would otherwise be taken as array syntax or ignorable white space. + quotes around any individual array element. You <emphasis>must</> do so + if the element value would otherwise confuse the array-value parser. + For example, elements containing curly braces, commas (or whatever the + delimiter character is), double quotes, backslashes, or leading white + space must be double-quoted. To put a double quote or backslash in a + quoted array element value, precede it with a backslash. Alternatively, + you can use backslash-escaping to protect all data characters that would + otherwise be taken as array syntax. </para> <para> - The array output routine will put double quotes around element values - if they are empty strings or contain curly braces, delimiter characters, - double quotes, backslashes, or white space. Double quotes and backslashes - embedded in element values will be backslash-escaped. For numeric - data types it is safe to assume that double quotes will never appear, but - for textual data types one should be prepared to cope with either presence - or absence of quotes. (This is a change in behavior from pre-7.2 - <productname>PostgreSQL</productname> releases.) + You may write whitespace before a left brace or after a right + brace. You may also write whitespace before or after any individual item + string. In all of these cases the whitespace will be ignored. However, + whitespace within double quoted elements, or surrounded on both sides by + non-whitespace characters of an element, are not ignored. </para> <note> @@ -616,10 +621,11 @@ INSERT ... VALUES ('{"\\\\","\\""}'); <tip> <para> - The <literal>ARRAY</> constructor syntax is often easier to work with - than the array-literal syntax when writing array values in SQL commands. - In <literal>ARRAY</>, individual element values are written the same way - they would be written when not members of an array. + The <literal>ARRAY</> constructor syntax (see + <xref linkend="sql-syntax-array-constructors">) is often easier to work + with than the array-literal syntax when writing array values in SQL + commands. In <literal>ARRAY</>, individual element values are written the + same way they would be written when not members of an array. </para> </tip> </sect2> diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index a8b1b636e0885986ade57f0e36d2e8f46f83f1ff..8e46c2fde105ba7b356941aa654886531f56e2eb 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.106 2004/08/05 03:29:37 joe Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.107 2004/08/08 05:01:55 joe Exp $ * *------------------------------------------------------------------------- */ @@ -351,18 +351,32 @@ array_in(PG_FUNCTION_ARGS) * The syntax for array input is C-like nested curly braces *----------------------------------------------------------------------------- */ +typedef enum +{ + ARRAY_NO_LEVEL, + ARRAY_LEVEL_STARTED, + ARRAY_ELEM_STARTED, + ARRAY_ELEM_COMPLETED, + ARRAY_QUOTED_ELEM_STARTED, + ARRAY_QUOTED_ELEM_COMPLETED, + ARRAY_ELEM_DELIMITED, + ARRAY_LEVEL_COMPLETED, + ARRAY_LEVEL_DELIMITED +} ArrayParseState; + static int ArrayCount(char *str, int *dim, char typdelim) { - int nest_level = 0, - i; - int ndim = 1, - temp[MAXDIM], - nelems[MAXDIM], - nelems_last[MAXDIM]; - bool scanning_string = false; - bool eoArray = false; - char *ptr; + int nest_level = 0, + i; + int ndim = 1, + temp[MAXDIM], + nelems[MAXDIM], + nelems_last[MAXDIM]; + bool scanning_string = false; + bool eoArray = false; + char *ptr; + ArrayParseState parse_state = ARRAY_NO_LEVEL; for (i = 0; i < MAXDIM; ++i) { @@ -370,6 +384,7 @@ ArrayCount(char *str, int *dim, char typdelim) nelems_last[i] = nelems[i] = 1; } + /* special case for an empty array */ if (strncmp(str, "{}", 2) == 0) return 0; @@ -389,6 +404,20 @@ ArrayCount(char *str, int *dim, char typdelim) errmsg("malformed array literal: \"%s\"", str))); break; case '\\': + /* + * An escape must be after a level start, after an + * element start, or after an element delimiter. In any + * case we now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_QUOTED_ELEM_STARTED && + parse_state != ARRAY_ELEM_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + if (parse_state != ARRAY_QUOTED_ELEM_STARTED) + parse_state = ARRAY_ELEM_STARTED; /* skip the escaped character */ if (*(ptr + 1)) ptr++; @@ -398,11 +427,38 @@ ArrayCount(char *str, int *dim, char typdelim) errmsg("malformed array literal: \"%s\"", str))); break; case '\"': + /* + * A quote must be after a level start, after a quoted + * element start, or after an element delimiter. In any + * case we now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_QUOTED_ELEM_STARTED && + parse_state != ARRAY_ELEM_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); scanning_string = !scanning_string; + if (scanning_string) + parse_state = ARRAY_QUOTED_ELEM_STARTED; + else + parse_state = ARRAY_QUOTED_ELEM_COMPLETED; break; case '{': if (!scanning_string) { + /* + * A left brace can occur if no nesting has + * occurred yet, after a level start, or + * after a level delimiter. + */ + if (parse_state != ARRAY_NO_LEVEL && + parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_LEVEL_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_LEVEL_STARTED; if (nest_level >= MAXDIM) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), @@ -417,6 +473,19 @@ ArrayCount(char *str, int *dim, char typdelim) case '}': if (!scanning_string) { + /* + * A right brace can occur after an element start, + * an element completion, a quoted element completion, + * or a level completion. + */ + if (parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_ELEM_COMPLETED && + parse_state != ARRAY_QUOTED_ELEM_COMPLETED && + parse_state != ARRAY_LEVEL_COMPLETED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_LEVEL_COMPLETED; if (nest_level == 0) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), @@ -445,10 +514,45 @@ ArrayCount(char *str, int *dim, char typdelim) } break; default: - if (*ptr == typdelim && !scanning_string) + if (!scanning_string) { - itemdone = true; - nelems[nest_level - 1]++; + if (*ptr == typdelim) + { + /* + * Delimiters can occur after an element start, + * an element completion, a quoted element + * completion, or a level completion. + */ + if (parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_ELEM_COMPLETED && + parse_state != ARRAY_QUOTED_ELEM_COMPLETED && + parse_state != ARRAY_LEVEL_COMPLETED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + if (parse_state == ARRAY_LEVEL_COMPLETED) + parse_state = ARRAY_LEVEL_DELIMITED; + else + parse_state = ARRAY_ELEM_DELIMITED; + itemdone = true; + nelems[nest_level - 1]++; + } + else if (!isspace(*ptr)) + { + /* + * Other non-space characters must be after a level + * start, after an element start, or after an element + * delimiter. In any case we now must be past an + * element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_ELEM_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_ELEM_STARTED; + } } break; } @@ -511,12 +615,15 @@ ReadArrayStr(char *arrayStr, while (!eoArray) { bool itemdone = false; + bool itemquoted = false; int i = -1; char *itemstart; + char *eptr; /* skip leading whitespace */ while (isspace((unsigned char) *ptr)) ptr++; + itemstart = ptr; while (!itemdone) @@ -547,11 +654,15 @@ ReadArrayStr(char *arrayStr, char *cptr; scanning_string = !scanning_string; - /* Crunch the string on top of the quote. */ - for (cptr = ptr; *cptr != '\0'; cptr++) - *cptr = *(cptr + 1); - /* Back up to not miss following character. */ - ptr--; + if (scanning_string) + { + itemquoted = true; + /* Crunch the string on top of the first quote. */ + for (cptr = ptr; *cptr != '\0'; cptr++) + *cptr = *(cptr + 1); + /* Back up to not miss following character. */ + ptr--; + } break; } case '{': @@ -615,6 +726,25 @@ ReadArrayStr(char *arrayStr, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", arrayStr))); + /* + * skip trailing whitespace + */ + eptr = ptr - 1; + if (!itemquoted) + { + /* skip to last non-NULL, non-space, character */ + while ((*eptr == '\0') || (isspace((unsigned char) *eptr))) + eptr--; + *(++eptr) = '\0'; + } + else + { + /* skip to last quote character */ + while (*eptr != '"') + eptr--; + *eptr = '\0'; + } + values[i] = FunctionCall3(inputproc, CStringGetDatum(itemstart), ObjectIdGetDatum(typioparam),