From a0b7b717a4324f573d3a7651a06037557066eb77 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Fri, 13 Aug 2010 18:36:26 +0000 Subject: [PATCH] Add xml_is_well_formed, xml_is_well_formed_document, xml_is_well_formed_content functions to the core XML code. Per discussion, the former depends on XMLOPTION while the others do not. These supersede a version previously offered by contrib/xml2. Mike Fowler, reviewed by Pavel Stehule --- contrib/xml2/pgxml.sql.in | 10 +--- contrib/xml2/uninstall_pgxml.sql | 4 +- contrib/xml2/xpath.c | 11 +++- doc/src/sgml/func.sgml | 80 +++++++++++++++++++++++++- src/backend/utils/adt/xml.c | 72 +++++++++++++++++++++++- src/include/catalog/catversion.h | 4 +- src/include/catalog/pg_proc.h | 8 ++- src/include/utils/xml.h | 5 +- src/test/regress/expected/xml.out | 87 +++++++++++++++++++++++++++++ src/test/regress/expected/xml_1.out | 59 +++++++++++++++++++ src/test/regress/sql/xml.sql | 21 +++++++ 11 files changed, 343 insertions(+), 18 deletions(-) diff --git a/contrib/xml2/pgxml.sql.in b/contrib/xml2/pgxml.sql.in index 98d8f81b575..0a52561135d 100644 --- a/contrib/xml2/pgxml.sql.in +++ b/contrib/xml2/pgxml.sql.in @@ -1,18 +1,14 @@ -/* $PostgreSQL: pgsql/contrib/xml2/pgxml.sql.in,v 1.12 2010/03/01 18:07:59 tgl Exp $ */ +/* $PostgreSQL: pgsql/contrib/xml2/pgxml.sql.in,v 1.13 2010/08/13 18:36:23 tgl Exp $ */ -- Adjust this setting to control where the objects get created. SET search_path = public; --SQL for XML parser -CREATE OR REPLACE FUNCTION xml_is_well_formed(text) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -- deprecated old name for xml_is_well_formed CREATE OR REPLACE FUNCTION xml_valid(text) RETURNS bool -AS 'MODULE_PATHNAME', 'xml_is_well_formed' -LANGUAGE C STRICT IMMUTABLE; +AS 'xml_is_well_formed' +LANGUAGE INTERNAL STRICT STABLE; CREATE OR REPLACE FUNCTION xml_encode_special_chars(text) RETURNS text AS 'MODULE_PATHNAME' diff --git a/contrib/xml2/uninstall_pgxml.sql b/contrib/xml2/uninstall_pgxml.sql index 09441ef01fa..016658dc7f1 100644 --- a/contrib/xml2/uninstall_pgxml.sql +++ b/contrib/xml2/uninstall_pgxml.sql @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/contrib/xml2/uninstall_pgxml.sql,v 1.4 2007/11/13 04:24:29 momjian Exp $ */ +/* $PostgreSQL: pgsql/contrib/xml2/uninstall_pgxml.sql,v 1.5 2010/08/13 18:36:23 tgl Exp $ */ -- Adjust this setting to control where the objects get dropped. SET search_path = public; @@ -29,5 +29,3 @@ DROP FUNCTION xml_encode_special_chars(text); -- deprecated old name for xml_is_well_formed DROP FUNCTION xml_valid(text); - -DROP FUNCTION xml_is_well_formed(text); diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index dbf0b76f927..8ee949ce4ef 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -1,5 +1,5 @@ /* - * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.30 2010/07/06 19:18:55 momjian Exp $ + * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.31 2010/08/13 18:36:23 tgl Exp $ * * Parser interface for DOM-based parser (libxml) rather than * stream-based SAX-type parser @@ -71,7 +71,14 @@ pgxml_parser_init(void) } -/* Returns true if document is well-formed */ +/* + * Returns true if document is well-formed + * + * Note: this has been superseded by a core function. We still have to + * have it in the contrib module so that existing SQL-level references + * to the function won't fail; but in normal usage with up-to-date SQL + * definitions for the contrib module, this won't be called. + */ PG_FUNCTION_INFO_V1(xml_is_well_formed); diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index de6ba616504..562ba485d23 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.526 2010/08/10 21:51:00 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.527 2010/08/13 18:36:23 tgl Exp $ --> <chapter id="functions"> <title>Functions and Operators</title> @@ -8625,6 +8625,84 @@ SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Tor supports XPath, which is a subset of XQuery. </para> </sect3> + + <sect3> + <title>xml_is_well_formed</title> + + <indexterm> + <primary>xml_is_well_formed</primary> + </indexterm> + + <indexterm> + <primary>xml_is_well_formed_document</primary> + </indexterm> + + <indexterm> + <primary>xml_is_well_formed_content</primary> + </indexterm> + +<synopsis> +<function>xml_is_well_formed</function>(<replaceable>text</replaceable>) +<function>xml_is_well_formed_document</function>(<replaceable>text</replaceable>) +<function>xml_is_well_formed_content</function>(<replaceable>text</replaceable>) +</synopsis> + + <para> + These functions check whether a <type>text</> string is well-formed XML, + returning a boolean result. + <function>xml_is_well_formed_document</function> checks for a well-formed + document, while <function>xml_is_well_formed_content</function> checks + for well-formed content. <function>xml_is_well_formed</function> does + the former if the <xref linkend="guc-xmloption"> configuration + parameter is set to <literal>DOCUMENT</>, or the latter if it is set to + <literal>CONTENT</>. This means that + <function>xml_is_well_formed</function> is useful for seeing whether + a simple cast to type <type>xml</> will succeed, whereas the other two + functions are useful for seeing whether the corresponding variants of + <function>XMLPARSE</> will succeed. + </para> + + <para> + Examples: + +<screen><![CDATA[ +SET xmloption TO DOCUMENT; +SELECT xml_is_well_formed('<>'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('<abc/>'); + xml_is_well_formed +-------------------- + t +(1 row) + +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc'); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed_document('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>'); + xml_is_well_formed_document +----------------------------- + t +(1 row) + +SELECT xml_is_well_formed_document('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>'); + xml_is_well_formed_document +----------------------------- + f +(1 row) +]]></screen> + + The last example shows that the checks include whether + namespaces are correctly matched. + </para> + </sect3> </sect2> <sect2 id="functions-xml-processing"> diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 520668cf400..756390530a1 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.100 2010/08/08 19:15:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.101 2010/08/13 18:36:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3565,3 +3565,73 @@ xpath_exists(PG_FUNCTION_ARGS) return 0; #endif } + +/* + * Functions for checking well-formed-ness + */ + +#ifdef USE_LIBXML +static bool +wellformed_xml(text *data, XmlOptionType xmloption_arg) +{ + bool result; + xmlDocPtr doc = NULL; + + /* We want to catch any exceptions and return false */ + PG_TRY(); + { + doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding()); + result = true; + } + PG_CATCH(); + { + FlushErrorState(); + result = false; + } + PG_END_TRY(); + + if (doc) + xmlFreeDoc(doc); + + return result; +} +#endif + +Datum +xml_is_well_formed(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_P(0); + + PG_RETURN_BOOL(wellformed_xml(data, xmloption)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +Datum +xml_is_well_formed_document(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_P(0); + + PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +Datum +xml_is_well_formed_content(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_P(0); + + PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index a7739db82d7..db5f3c67b3a 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.594 2010/08/10 21:51:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.595 2010/08/13 18:36:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201008101 +#define CATALOG_VERSION_NO 201008131 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 0ba9435b0af..7531b7ab5ec 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.578 2010/08/10 21:51:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.579 2010/08/13 18:36:25 tgl Exp $ * * NOTES * The script catalog/genbki.pl reads this file and generates .bki @@ -4423,6 +4423,12 @@ DATA(insert OID = 3049 ( xpath_exists PGNSP PGUID 12 1 0 0 f f f t f i 3 0 16 DESCR("test XML value against XPath expression, with namespace support"); DATA(insert OID = 3050 ( xpath_exists PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_ "select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); DESCR("test XML value against XPath expression"); +DATA(insert OID = 3051 ( xml_is_well_formed PGNSP PGUID 12 1 0 0 f f f t f s 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed _null_ _null_ _null_ )); +DESCR("determine if a string is well formed XML"); +DATA(insert OID = 3052 ( xml_is_well_formed_document PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_document _null_ _null_ _null_ )); +DESCR("determine if a string is well formed XML document"); +DATA(insert OID = 3053 ( xml_is_well_formed_content PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_content _null_ _null_ _null_ )); +DESCR("determine if a string is well formed XML content"); /* uuid */ DATA(insert OID = 2952 ( uuid_in PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ )); diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h index 807bb08485f..96029c2ebd9 100644 --- a/src/include/utils/xml.h +++ b/src/include/utils/xml.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.33 2010/08/08 19:15:27 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.34 2010/08/13 18:36:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -39,6 +39,9 @@ extern Datum xmlvalidate(PG_FUNCTION_ARGS); extern Datum xpath(PG_FUNCTION_ARGS); extern Datum xpath_exists(PG_FUNCTION_ARGS); extern Datum xmlexists(PG_FUNCTION_ARGS); +extern Datum xml_is_well_formed(PG_FUNCTION_ARGS); +extern Datum xml_is_well_formed_document(PG_FUNCTION_ARGS); +extern Datum xml_is_well_formed_content(PG_FUNCTION_ARGS); extern Datum table_to_xml(PG_FUNCTION_ARGS); extern Datum query_to_xml(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 435331dcc37..eaa5a74ef07 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -599,3 +599,90 @@ SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data); 2 (1 row) +-- Test xml_is_well_formed and variants +SELECT xml_is_well_formed_document('<foo>bar</foo>'); + xml_is_well_formed_document +----------------------------- + t +(1 row) + +SELECT xml_is_well_formed_document('abc'); + xml_is_well_formed_document +----------------------------- + f +(1 row) + +SELECT xml_is_well_formed_content('<foo>bar</foo>'); + xml_is_well_formed_content +---------------------------- + t +(1 row) + +SELECT xml_is_well_formed_content('abc'); + xml_is_well_formed_content +---------------------------- + t +(1 row) + +SET xmloption TO DOCUMENT; +SELECT xml_is_well_formed('abc'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('<>'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('<abc/>'); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed('<foo>bar</foo>'); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed('<foo>bar</foo'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('<foo><bar>baz</foo>'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>'); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>'); + xml_is_well_formed +-------------------- + t +(1 row) + +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc'); + xml_is_well_formed +-------------------- + t +(1 row) + diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 2ce543aeaa0..711b4358a25 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -573,3 +573,62 @@ SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data); 0 (1 row) +-- Test xml_is_well_formed and variants +SELECT xml_is_well_formed_document('<foo>bar</foo>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed_document('abc'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed_content('<foo>bar</foo>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed_content('abc'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SET xmloption TO DOCUMENT; +SELECT xml_is_well_formed('abc'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<abc/>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<foo>bar</foo>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<foo>bar</foo'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<foo><bar>baz</foo>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 0e8c0fb2273..717a1e7170e 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -190,3 +190,24 @@ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:nam CREATE TABLE query ( expr TEXT ); INSERT INTO query VALUES ('/menu/beers/cost[text() = ''lots'']'); SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data); + +-- Test xml_is_well_formed and variants + +SELECT xml_is_well_formed_document('<foo>bar</foo>'); +SELECT xml_is_well_formed_document('abc'); +SELECT xml_is_well_formed_content('<foo>bar</foo>'); +SELECT xml_is_well_formed_content('abc'); + +SET xmloption TO DOCUMENT; +SELECT xml_is_well_formed('abc'); +SELECT xml_is_well_formed('<>'); +SELECT xml_is_well_formed('<abc/>'); +SELECT xml_is_well_formed('<foo>bar</foo>'); +SELECT xml_is_well_formed('<foo>bar</foo'); +SELECT xml_is_well_formed('<foo><bar>baz</foo>'); +SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>'); +SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>'); +SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>'); + +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc'); -- GitLab