diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index ab929eed64720308428a971193373308268170b9..597ba4f7137fcb35687cd15bfe275e788ce3374a 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.11 2007/01/06 19:18:36 petere Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.12 2007/01/07 00:13:55 petere Exp $ * *------------------------------------------------------------------------- */ @@ -489,6 +489,122 @@ xml_init(void) } +/* + * SQL/XML allows storing "XML documents" or "XML content". "XML + * documents" are specified by the XML specification and are parsed + * easily by libxml. "XML content" is specified by SQL/XML as the + * production "XMLDecl? content". But libxml can only parse the + * "content" part, so we have to parse the XML declaration ourselves + * to complete this. + */ + +#define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED +#define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++ + +static int +parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone) +{ + const xmlChar *p; + const xmlChar *save_p; + + p = str; + + if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0) + goto finished; + + p += 5; + + /* version */ + CHECK_XML_SPACE(p); + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0) + return XML_ERR_VERSION_MISSING; + p += 7; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_VERSION_MISSING; + p += 1; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0) + return XML_ERR_VERSION_MISSING; + p += 5; + + /* encoding */ + save_p = p; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0) + { + CHECK_XML_SPACE(save_p); + p += 8; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_MISSING_ENCODING; + p += 1; + SKIP_XML_SPACE(p); + + if (*p == '\'' || *p == '"') + { + const xmlChar *q; + + q = xmlStrchr(p + 1, *p); + if (!q) + return XML_ERR_MISSING_ENCODING; + + *encoding = xmlStrndup(p + 1, q - p - 1); + p = q + 1; + } + else + return XML_ERR_MISSING_ENCODING; + } + else + { + p = save_p; + *encoding = NULL; + } + + /* standalone */ + save_p = p; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0) + { + CHECK_XML_SPACE(save_p); + p += 10; + SKIP_XML_SPACE(p); + if (*p != '=') + return XML_ERR_STANDALONE_VALUE; + p += 1; + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0) + { + *standalone = 1; + p += 5; + } + else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0) + { + *standalone = 0; + p += 4; + } + else + return XML_ERR_STANDALONE_VALUE; + } + else + { + p = save_p; + *standalone = -1; + } + + SKIP_XML_SPACE(p); + if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0) + return XML_ERR_XMLDECL_NOT_FINISHED; + p += 2; + +finished: + if (len) + *len = (p - str); + return XML_ERR_OK; +} + + /* * Convert a C string to XML internal representation * @@ -536,19 +652,23 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace) } else { + size_t count; + xmlChar *encoding = NULL; + int standalone = -1; + doc = xmlNewDoc(NULL); - /* - * FIXME: An XMLDecl is supposed to be accepted before the - * content, but libxml doesn't allow this. Parse that - * ourselves? - */ + res_code = parse_xml_decl(string, &count, &encoding, &standalone); /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */ - res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL); + if (res_code == 0) + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL); if (res_code != 0) xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT, "invalid XML content", res_code); + + doc->encoding = encoding; + doc->standalone = standalone; } /* TODO encoding issues