diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index e79c2946d0c650b854233ec96051462cb1c6a21f..a7a7c2b3ade869a273d992655c175a20e98d8352 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -43,8 +43,6 @@ typedef struct /* state of JSON lexer */ char *token_start; /* start of current token within input */ char *token_terminator; /* end of previous or current token */ JsonValueType token_type; /* type of current token, once it's known */ - int line_number; /* current line number (counting from 1) */ - char *line_start; /* start of current line within input (BROKEN!!) */ } JsonLexContext; typedef enum /* states of JSON parser */ @@ -78,6 +76,7 @@ static void json_lex_string(JsonLexContext *lex); static void json_lex_number(JsonLexContext *lex, char *s); static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex); static void report_invalid_token(JsonLexContext *lex); +static int report_json_context(JsonLexContext *lex); static char *extract_mb_char(char *s); static void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds); @@ -185,8 +184,6 @@ json_validate_cstring(char *input) /* Set up lexing context. */ lex.input = input; lex.token_terminator = lex.input; - lex.line_number = 1; - lex.line_start = input; /* Set up parse stack. */ stacksize = 32; @@ -335,11 +332,7 @@ json_lex(JsonLexContext *lex) /* Skip leading whitespace. */ s = lex->token_terminator; while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') - { - if (*s == '\n') - lex->line_number++; s++; - } lex->token_start = s; /* Determine token type. */ @@ -350,7 +343,7 @@ json_lex(JsonLexContext *lex) { /* End of string. */ lex->token_start = NULL; - lex->token_terminator = NULL; + lex->token_terminator = s; } else { @@ -397,7 +390,8 @@ json_lex(JsonLexContext *lex) /* * We got some sort of unexpected punctuation or an otherwise * unexpected character, so just complain about that one - * character. + * character. (It can't be multibyte because the above loop + * will advance over any multibyte characters.) */ lex->token_terminator = s + 1; report_invalid_token(lex); @@ -443,11 +437,14 @@ json_lex_string(JsonLexContext *lex) lex->token_terminator = s; report_invalid_token(lex); } + /* Since *s isn't printable, exclude it from the context string */ + lex->token_terminator = s; ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Character with value \"0x%02x\" must be escaped.", - lex->line_number, (unsigned char) *s))); + errdetail("Character with value 0x%02x must be escaped.", + (unsigned char) *s), + report_json_context(lex))); } else if (*s == '\\') { @@ -465,38 +462,39 @@ json_lex_string(JsonLexContext *lex) for (i = 1; i <= 4; i++) { - if (s[i] == '\0') + s++; + if (*s == '\0') { - lex->token_terminator = s + i; + lex->token_terminator = s; report_invalid_token(lex); } - else if (s[i] >= '0' && s[i] <= '9') - ch = (ch * 16) + (s[i] - '0'); - else if (s[i] >= 'a' && s[i] <= 'f') - ch = (ch * 16) + (s[i] - 'a') + 10; - else if (s[i] >= 'A' && s[i] <= 'F') - ch = (ch * 16) + (s[i] - 'A') + 10; + else if (*s >= '0' && *s <= '9') + ch = (ch * 16) + (*s - '0'); + else if (*s >= 'a' && *s <= 'f') + ch = (ch * 16) + (*s - 'a') + 10; + else if (*s >= 'A' && *s <= 'F') + ch = (ch * 16) + (*s - 'A') + 10; else { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: \"\\u\" must be followed by four hexadecimal digits.", - lex->line_number))); + errdetail("\"\\u\" must be followed by four hexadecimal digits."), + report_json_context(lex))); } } - - /* Account for the four additional bytes we just parsed. */ - s += 4; } else if (strchr("\"\\/bfnrt", *s) == NULL) { /* Not a valid string escape, so error out. */ + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Invalid escape \"\\%s\".", - lex->line_number, extract_mb_char(s)))); + errdetail("Escape sequence \"\\%s\" is invalid.", + extract_mb_char(s)), + report_json_context(lex))); } } } @@ -599,68 +597,108 @@ json_lex_number(JsonLexContext *lex, char *s) /* * Report a parse error. + * + * lex->token_start and lex->token_terminator must identify the current token. */ static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex) { - char *detail = NULL; - char *token = NULL; + char *token; int toklen; /* Handle case where the input ended prematurely. */ if (lex->token_start == NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type json: \"%s\"", - lex->input), - errdetail("The input string ended unexpectedly."))); + errmsg("invalid input syntax for type json"), + errdetail("The input string ended unexpectedly."), + report_json_context(lex))); - /* Separate out the offending token. */ + /* Separate out the current token. */ toklen = lex->token_terminator - lex->token_start; token = palloc(toklen + 1); memcpy(token, lex->token_start, toklen); token[toklen] = '\0'; - /* Select correct detail message. */ + /* Complain, with the appropriate detail message. */ if (stack == NULL) - detail = "line %d: Expected end of input, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected end of input, but found \"%s\".", + token), + report_json_context(lex))); else { switch (stack->state) { case JSON_PARSE_VALUE: - detail = "line %d: Expected string, number, object, array, true, false, or null, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected JSON value, but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_ARRAY_START: - detail = "line %d: Expected array element or \"]\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected array element or \"]\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_ARRAY_NEXT: - detail = "line %d: Expected \",\" or \"]\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \",\" or \"]\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_START: - detail = "line %d: Expected string or \"}\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected string or \"}\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_LABEL: - detail = "line %d: Expected \":\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \":\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_NEXT: - detail = "line %d: Expected \",\" or \"}\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \",\" or \"}\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_COMMA: - detail = "line %d: Expected string, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected string, but found \"%s\".", + token), + report_json_context(lex))); break; + default: + elog(ERROR, "unexpected json parse state: %d", + (int) stack->state); } } - - ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type json: \"%s\"", - lex->input), - detail ? errdetail(detail, lex->line_number, token) : 0)); } /* * Report an invalid input token. + * + * lex->token_start and lex->token_terminator must identify the token. */ static void report_invalid_token(JsonLexContext *lex) @@ -668,6 +706,7 @@ report_invalid_token(JsonLexContext *lex) char *token; int toklen; + /* Separate out the offending token. */ toklen = lex->token_terminator - lex->token_start; token = palloc(toklen + 1); memcpy(token, lex->token_start, toklen); @@ -676,8 +715,80 @@ report_invalid_token(JsonLexContext *lex) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Token \"%s\" is invalid.", - lex->line_number, token))); + errdetail("Token \"%s\" is invalid.", token), + report_json_context(lex))); +} + +/* + * Report a CONTEXT line for bogus JSON input. + * + * lex->token_terminator must be set to identify the spot where we detected + * the error. Note that lex->token_start might be NULL, in case we recognized + * error at EOF. + * + * The return value isn't meaningful, but we make it non-void so that this + * can be invoked inside ereport(). + */ +static int +report_json_context(JsonLexContext *lex) +{ + const char *context_start; + const char *context_end; + const char *line_start; + int line_number; + char *ctxt; + int ctxtlen; + const char *prefix; + const char *suffix; + + /* Choose boundaries for the part of the input we will display */ + context_start = lex->input; + context_end = lex->token_terminator; + line_start = context_start; + line_number = 1; + for (;;) + { + /* Always advance over newlines (context_end test is just paranoia) */ + if (*context_start == '\n' && context_start < context_end) + { + context_start++; + line_start = context_start; + line_number++; + continue; + } + /* Otherwise, done as soon as we are close enough to context_end */ + if (context_end - context_start < 50) + break; + /* Advance to next multibyte character */ + if (IS_HIGHBIT_SET(*context_start)) + context_start += pg_mblen(context_start); + else + context_start++; + } + + /* + * We add "..." to indicate that the excerpt doesn't start at the + * beginning of the line ... but if we're within 3 characters of the + * beginning of the line, we might as well just show the whole line. + */ + if (context_start - line_start <= 3) + context_start = line_start; + + /* Get a null-terminated copy of the data to present */ + ctxtlen = context_end - context_start; + ctxt = palloc(ctxtlen + 1); + memcpy(ctxt, context_start, ctxtlen); + ctxt[ctxtlen] = '\0'; + + /* + * Show the context, prefixing "..." if not starting at start of line, and + * suffixing "..." if not ending at end of line. + */ + prefix = (context_start > line_start) ? "..." : ""; + suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : ""; + + return errcontext("JSON data, line %d: %s%s%s", + line_number, prefix, ctxt, suffix); } /* diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 4b1ad89de6e6b73bdfcd3fb6fb2ef4dbbab0a3d5..2dfe7bb0eec87c2793340fed1a1b4ce1a9604e98 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -9,7 +9,8 @@ SELECT $$''$$::json; -- ERROR, single quotes are not allowed ERROR: invalid input syntax for type json LINE 1: SELECT $$''$$::json; ^ -DETAIL: line 1: Token "'" is invalid. +DETAIL: Token "'" is invalid. +CONTEXT: JSON data, line 1: '... SELECT '"abc"'::json; -- OK json ------- @@ -20,13 +21,15 @@ SELECT '"abc'::json; -- ERROR, quotes not closed ERROR: invalid input syntax for type json LINE 1: SELECT '"abc'::json; ^ -DETAIL: line 1: Token ""abc" is invalid. +DETAIL: Token ""abc" is invalid. +CONTEXT: JSON data, line 1: "abc SELECT '"abc def"'::json; -- ERROR, unescaped newline in string constant ERROR: invalid input syntax for type json LINE 1: SELECT '"abc ^ -DETAIL: line 1: Character with value "0x0a" must be escaped. +DETAIL: Character with value 0x0a must be escaped. +CONTEXT: JSON data, line 1: "abc SELECT '"\n\"\\"'::json; -- OK, legal escapes json ---------- @@ -37,22 +40,26 @@ SELECT '"\v"'::json; -- ERROR, not a valid JSON escape ERROR: invalid input syntax for type json LINE 1: SELECT '"\v"'::json; ^ -DETAIL: line 1: Invalid escape "\v". +DETAIL: Escape sequence "\v" is invalid. +CONTEXT: JSON data, line 1: "\v... SELECT '"\u"'::json; -- ERROR, incomplete escape ERROR: invalid input syntax for type json LINE 1: SELECT '"\u"'::json; ^ -DETAIL: line 1: "\u" must be followed by four hexadecimal digits. +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u" SELECT '"\u00"'::json; -- ERROR, incomplete escape ERROR: invalid input syntax for type json LINE 1: SELECT '"\u00"'::json; ^ -DETAIL: line 1: "\u" must be followed by four hexadecimal digits. +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u00" SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit ERROR: invalid input syntax for type json LINE 1: SELECT '"\u000g"'::json; ^ -DETAIL: line 1: "\u" must be followed by four hexadecimal digits. +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u000g... SELECT '"\u0000"'::json; -- OK, legal escape json ---------- @@ -82,7 +89,8 @@ SELECT '01'::json; -- ERROR, not valid according to JSON spec ERROR: invalid input syntax for type json LINE 1: SELECT '01'::json; ^ -DETAIL: line 1: Token "01" is invalid. +DETAIL: Token "01" is invalid. +CONTEXT: JSON data, line 1: 01 SELECT '0.1'::json; -- OK json ------ @@ -111,17 +119,20 @@ SELECT '1f2'::json; -- ERROR ERROR: invalid input syntax for type json LINE 1: SELECT '1f2'::json; ^ -DETAIL: line 1: Token "1f2" is invalid. +DETAIL: Token "1f2" is invalid. +CONTEXT: JSON data, line 1: 1f2 SELECT '0.x1'::json; -- ERROR ERROR: invalid input syntax for type json LINE 1: SELECT '0.x1'::json; ^ -DETAIL: line 1: Token "0.x1" is invalid. +DETAIL: Token "0.x1" is invalid. +CONTEXT: JSON data, line 1: 0.x1 SELECT '1.3ex100'::json; -- ERROR ERROR: invalid input syntax for type json LINE 1: SELECT '1.3ex100'::json; ^ -DETAIL: line 1: Token "1.3ex100" is invalid. +DETAIL: Token "1.3ex100" is invalid. +CONTEXT: JSON data, line 1: 1.3ex100 -- Arrays. SELECT '[]'::json; -- OK json @@ -142,20 +153,23 @@ SELECT '[1,2]'::json; -- OK (1 row) SELECT '[1,2,]'::json; -- ERROR, trailing comma -ERROR: invalid input syntax for type json: "[1,2,]" +ERROR: invalid input syntax for type json LINE 1: SELECT '[1,2,]'::json; ^ -DETAIL: line 1: Expected string, number, object, array, true, false, or null, but found "]". +DETAIL: Expected JSON value, but found "]". +CONTEXT: JSON data, line 1: [1,2,] SELECT '[1,2'::json; -- ERROR, no closing bracket -ERROR: invalid input syntax for type json: "[1,2" +ERROR: invalid input syntax for type json LINE 1: SELECT '[1,2'::json; ^ DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: [1,2 SELECT '[1,[2]'::json; -- ERROR, no closing bracket -ERROR: invalid input syntax for type json: "[1,[2]" +ERROR: invalid input syntax for type json LINE 1: SELECT '[1,[2]'::json; ^ DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: [1,[2] -- Objects. SELECT '{}'::json; -- OK json @@ -164,10 +178,11 @@ SELECT '{}'::json; -- OK (1 row) SELECT '{"abc"}'::json; -- ERROR, no value -ERROR: invalid input syntax for type json: "{"abc"}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc"}'::json; ^ -DETAIL: line 1: Expected ":", but found "}". +DETAIL: Expected ":", but found "}". +CONTEXT: JSON data, line 1: {"abc"} SELECT '{"abc":1}'::json; -- OK json ----------- @@ -175,25 +190,29 @@ SELECT '{"abc":1}'::json; -- OK (1 row) SELECT '{1:"abc"}'::json; -- ERROR, keys must be strings -ERROR: invalid input syntax for type json: "{1:"abc"}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{1:"abc"}'::json; ^ -DETAIL: line 1: Expected string or "}", but found "1". +DETAIL: Expected string or "}", but found "1". +CONTEXT: JSON data, line 1: {1... SELECT '{"abc",1}'::json; -- ERROR, wrong separator -ERROR: invalid input syntax for type json: "{"abc",1}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc",1}'::json; ^ -DETAIL: line 1: Expected ":", but found ",". +DETAIL: Expected ":", but found ",". +CONTEXT: JSON data, line 1: {"abc",... SELECT '{"abc"=1}'::json; -- ERROR, totally wrong separator ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc"=1}'::json; ^ -DETAIL: line 1: Token "=" is invalid. +DETAIL: Token "=" is invalid. +CONTEXT: JSON data, line 1: {"abc"=... SELECT '{"abc"::1}'::json; -- ERROR, another wrong separator -ERROR: invalid input syntax for type json: "{"abc"::1}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc"::1}'::json; ^ -DETAIL: line 1: Expected string, number, object, array, true, false, or null, but found ":". +DETAIL: Expected JSON value, but found ":". +CONTEXT: JSON data, line 1: {"abc"::... SELECT '{"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}}'::json; -- OK json --------------------------------------------------------- @@ -201,15 +220,17 @@ SELECT '{"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}}'::json; -- OK (1 row) SELECT '{"abc":1:2}'::json; -- ERROR, colon in wrong spot -ERROR: invalid input syntax for type json: "{"abc":1:2}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc":1:2}'::json; ^ -DETAIL: line 1: Expected "," or "}", but found ":". +DETAIL: Expected "," or "}", but found ":". +CONTEXT: JSON data, line 1: {"abc":1:... SELECT '{"abc":1,3}'::json; -- ERROR, no value -ERROR: invalid input syntax for type json: "{"abc":1,3}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc":1,3}'::json; ^ -DETAIL: line 1: Expected string, but found "3". +DETAIL: Expected string, but found "3". +CONTEXT: JSON data, line 1: {"abc":1,3... -- Miscellaneous stuff. SELECT 'true'::json; -- OK json @@ -236,35 +257,41 @@ SELECT ' true '::json; -- OK, even with extra whitespace (1 row) SELECT 'true false'::json; -- ERROR, too many values -ERROR: invalid input syntax for type json: "true false" +ERROR: invalid input syntax for type json LINE 1: SELECT 'true false'::json; ^ -DETAIL: line 1: Expected end of input, but found "false". +DETAIL: Expected end of input, but found "false". +CONTEXT: JSON data, line 1: true false SELECT 'true, false'::json; -- ERROR, too many values -ERROR: invalid input syntax for type json: "true, false" +ERROR: invalid input syntax for type json LINE 1: SELECT 'true, false'::json; ^ -DETAIL: line 1: Expected end of input, but found ",". +DETAIL: Expected end of input, but found ",". +CONTEXT: JSON data, line 1: true,... SELECT 'truf'::json; -- ERROR, not a keyword ERROR: invalid input syntax for type json LINE 1: SELECT 'truf'::json; ^ -DETAIL: line 1: Token "truf" is invalid. +DETAIL: Token "truf" is invalid. +CONTEXT: JSON data, line 1: truf SELECT 'trues'::json; -- ERROR, not a keyword ERROR: invalid input syntax for type json LINE 1: SELECT 'trues'::json; ^ -DETAIL: line 1: Token "trues" is invalid. +DETAIL: Token "trues" is invalid. +CONTEXT: JSON data, line 1: trues SELECT ''::json; -- ERROR, no value -ERROR: invalid input syntax for type json: "" +ERROR: invalid input syntax for type json LINE 1: SELECT ''::json; ^ DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: SELECT ' '::json; -- ERROR, no value -ERROR: invalid input syntax for type json: " " +ERROR: invalid input syntax for type json LINE 1: SELECT ' '::json; ^ DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: --constructors -- array_to_json SELECT array_to_json(array(select 1 as a));