From a0b7b717a4324f573d3a7651a06037557066eb77 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 13 Aug 2010 18:36:26 +0000
Subject: [PATCH] Add xml_is_well_formed, xml_is_well_formed_document,
 xml_is_well_formed_content functions to the core XML code.  Per discussion,
 the former depends on XMLOPTION while the others do not.  These supersede a
 version previously offered by contrib/xml2.

Mike Fowler, reviewed by Pavel Stehule
---
 contrib/xml2/pgxml.sql.in           | 10 +---
 contrib/xml2/uninstall_pgxml.sql    |  4 +-
 contrib/xml2/xpath.c                | 11 +++-
 doc/src/sgml/func.sgml              | 80 +++++++++++++++++++++++++-
 src/backend/utils/adt/xml.c         | 72 +++++++++++++++++++++++-
 src/include/catalog/catversion.h    |  4 +-
 src/include/catalog/pg_proc.h       |  8 ++-
 src/include/utils/xml.h             |  5 +-
 src/test/regress/expected/xml.out   | 87 +++++++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out | 59 +++++++++++++++++++
 src/test/regress/sql/xml.sql        | 21 +++++++
 11 files changed, 343 insertions(+), 18 deletions(-)

diff --git a/contrib/xml2/pgxml.sql.in b/contrib/xml2/pgxml.sql.in
index 98d8f81b575..0a52561135d 100644
--- a/contrib/xml2/pgxml.sql.in
+++ b/contrib/xml2/pgxml.sql.in
@@ -1,18 +1,14 @@
-/* $PostgreSQL: pgsql/contrib/xml2/pgxml.sql.in,v 1.12 2010/03/01 18:07:59 tgl Exp $ */
+/* $PostgreSQL: pgsql/contrib/xml2/pgxml.sql.in,v 1.13 2010/08/13 18:36:23 tgl Exp $ */
 
 -- Adjust this setting to control where the objects get created.
 SET search_path = public;
 
 --SQL for XML parser
 
-CREATE OR REPLACE FUNCTION xml_is_well_formed(text) RETURNS bool
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT IMMUTABLE;
-
 -- deprecated old name for xml_is_well_formed
 CREATE OR REPLACE FUNCTION xml_valid(text) RETURNS bool
-AS 'MODULE_PATHNAME', 'xml_is_well_formed'
-LANGUAGE C STRICT IMMUTABLE;
+AS 'xml_is_well_formed'
+LANGUAGE INTERNAL STRICT STABLE;
 
 CREATE OR REPLACE FUNCTION xml_encode_special_chars(text) RETURNS text
 AS 'MODULE_PATHNAME'
diff --git a/contrib/xml2/uninstall_pgxml.sql b/contrib/xml2/uninstall_pgxml.sql
index 09441ef01fa..016658dc7f1 100644
--- a/contrib/xml2/uninstall_pgxml.sql
+++ b/contrib/xml2/uninstall_pgxml.sql
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/xml2/uninstall_pgxml.sql,v 1.4 2007/11/13 04:24:29 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/xml2/uninstall_pgxml.sql,v 1.5 2010/08/13 18:36:23 tgl Exp $ */
 
 -- Adjust this setting to control where the objects get dropped.
 SET search_path = public;
@@ -29,5 +29,3 @@ DROP FUNCTION xml_encode_special_chars(text);
 
 -- deprecated old name for xml_is_well_formed
 DROP FUNCTION xml_valid(text);
-
-DROP FUNCTION xml_is_well_formed(text);
diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c
index dbf0b76f927..8ee949ce4ef 100644
--- a/contrib/xml2/xpath.c
+++ b/contrib/xml2/xpath.c
@@ -1,5 +1,5 @@
 /*
- * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.30 2010/07/06 19:18:55 momjian Exp $
+ * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.31 2010/08/13 18:36:23 tgl Exp $
  *
  * Parser interface for DOM-based parser (libxml) rather than
  * stream-based SAX-type parser
@@ -71,7 +71,14 @@ pgxml_parser_init(void)
 }
 
 
-/* Returns true if document is well-formed */
+/*
+ * Returns true if document is well-formed
+ *
+ * Note: this has been superseded by a core function.  We still have to
+ * have it in the contrib module so that existing SQL-level references
+ * to the function won't fail; but in normal usage with up-to-date SQL
+ * definitions for the contrib module, this won't be called.
+ */
 
 PG_FUNCTION_INFO_V1(xml_is_well_formed);
 
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index de6ba616504..562ba485d23 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.526 2010/08/10 21:51:00 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.527 2010/08/13 18:36:23 tgl Exp $ -->
 
  <chapter id="functions">
   <title>Functions and Operators</title>
@@ -8625,6 +8625,84 @@ SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Tor
      supports XPath, which is a subset of XQuery.
     </para>
    </sect3>
+
+   <sect3>
+    <title>xml_is_well_formed</title>
+
+    <indexterm>
+     <primary>xml_is_well_formed</primary>
+    </indexterm>
+
+    <indexterm>
+     <primary>xml_is_well_formed_document</primary>
+    </indexterm>
+
+    <indexterm>
+     <primary>xml_is_well_formed_content</primary>
+    </indexterm>
+
+<synopsis>
+<function>xml_is_well_formed</function>(<replaceable>text</replaceable>)
+<function>xml_is_well_formed_document</function>(<replaceable>text</replaceable>)
+<function>xml_is_well_formed_content</function>(<replaceable>text</replaceable>)
+</synopsis>
+
+    <para>
+     These functions check whether a <type>text</> string is well-formed XML,
+     returning a boolean result.
+     <function>xml_is_well_formed_document</function> checks for a well-formed
+     document, while <function>xml_is_well_formed_content</function> checks
+     for well-formed content.  <function>xml_is_well_formed</function> does
+     the former if the <xref linkend="guc-xmloption"> configuration
+     parameter is set to <literal>DOCUMENT</>, or the latter if it is set to
+     <literal>CONTENT</>.  This means that
+     <function>xml_is_well_formed</function> is useful for seeing whether
+     a simple cast to type <type>xml</> will succeed, whereas the other two
+     functions are useful for seeing whether the corresponding variants of
+     <function>XMLPARSE</> will succeed.
+    </para>
+
+    <para>
+     Examples:
+
+<screen><![CDATA[
+SET xmloption TO DOCUMENT;
+SELECT xml_is_well_formed('<>');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed('<abc/>');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
+SET xmloption TO CONTENT;
+SELECT xml_is_well_formed('abc');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed_document('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>');
+ xml_is_well_formed_document 
+-----------------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed_document('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>');
+ xml_is_well_formed_document 
+-----------------------------
+ f
+(1 row)
+]]></screen>
+
+     The last example shows that the checks include whether
+     namespaces are correctly matched.
+    </para>
+   </sect3>
   </sect2>
 
   <sect2 id="functions-xml-processing">
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 520668cf400..756390530a1 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.100 2010/08/08 19:15:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.101 2010/08/13 18:36:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3565,3 +3565,73 @@ xpath_exists(PG_FUNCTION_ARGS)
 	return 0;
 #endif
 }
+
+/*
+ * Functions for checking well-formed-ness
+ */
+
+#ifdef USE_LIBXML
+static bool
+wellformed_xml(text *data, XmlOptionType xmloption_arg)
+{
+	bool		result;
+	xmlDocPtr	doc = NULL;
+
+	/* We want to catch any exceptions and return false */
+	PG_TRY();
+	{
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
+		result = true;
+	}
+	PG_CATCH();
+	{
+		FlushErrorState();
+		result = false;
+	}
+	PG_END_TRY();
+
+	if (doc)
+		xmlFreeDoc(doc);
+
+	return result;
+}
+#endif
+
+Datum
+xml_is_well_formed(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	text	   *data = PG_GETARG_TEXT_P(0);
+	
+	PG_RETURN_BOOL(wellformed_xml(data, xmloption));
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif   /* not USE_LIBXML */
+}
+
+Datum
+xml_is_well_formed_document(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	text	   *data = PG_GETARG_TEXT_P(0);
+	
+	PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif   /* not USE_LIBXML */
+}
+
+Datum
+xml_is_well_formed_content(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	text	   *data = PG_GETARG_TEXT_P(0);
+	
+	PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif   /* not USE_LIBXML */
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index a7739db82d7..db5f3c67b3a 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.594 2010/08/10 21:51:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.595 2010/08/13 18:36:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201008101
+#define CATALOG_VERSION_NO	201008131
 
 #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 0ba9435b0af..7531b7ab5ec 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.578 2010/08/10 21:51:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.579 2010/08/13 18:36:25 tgl Exp $
  *
  * NOTES
  *	  The script catalog/genbki.pl reads this file and generates .bki
@@ -4423,6 +4423,12 @@ DATA(insert OID = 3049 (  xpath_exists	 PGNSP PGUID 12 1 0 0 f f f t f i 3 0 16
 DESCR("test XML value against XPath expression, with namespace support");
 DATA(insert OID = 3050 (  xpath_exists	 PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_ "select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ ));
 DESCR("test XML value against XPath expression");
+DATA(insert OID = 3051 (  xml_is_well_formed			 PGNSP PGUID 12 1 0 0 f f f t f s 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed _null_ _null_ _null_ ));
+DESCR("determine if a string is well formed XML");
+DATA(insert OID = 3052 (  xml_is_well_formed_document	 PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_document _null_ _null_ _null_ ));
+DESCR("determine if a string is well formed XML document");
+DATA(insert OID = 3053 (  xml_is_well_formed_content	 PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_content _null_ _null_ _null_ ));
+DESCR("determine if a string is well formed XML content");
 
 /* uuid */
 DATA(insert OID = 2952 (  uuid_in		   PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ ));
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 807bb08485f..96029c2ebd9 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.33 2010/08/08 19:15:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.34 2010/08/13 18:36:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -39,6 +39,9 @@ extern Datum xmlvalidate(PG_FUNCTION_ARGS);
 extern Datum xpath(PG_FUNCTION_ARGS);
 extern Datum xpath_exists(PG_FUNCTION_ARGS);
 extern Datum xmlexists(PG_FUNCTION_ARGS);
+extern Datum xml_is_well_formed(PG_FUNCTION_ARGS);
+extern Datum xml_is_well_formed_document(PG_FUNCTION_ARGS);
+extern Datum xml_is_well_formed_content(PG_FUNCTION_ARGS);
 
 extern Datum table_to_xml(PG_FUNCTION_ARGS);
 extern Datum query_to_xml(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 435331dcc37..eaa5a74ef07 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -599,3 +599,90 @@ SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data);
      2
 (1 row)
 
+-- Test xml_is_well_formed and variants
+SELECT xml_is_well_formed_document('<foo>bar</foo>');
+ xml_is_well_formed_document 
+-----------------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed_document('abc');
+ xml_is_well_formed_document 
+-----------------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed_content('<foo>bar</foo>');
+ xml_is_well_formed_content 
+----------------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed_content('abc');
+ xml_is_well_formed_content 
+----------------------------
+ t
+(1 row)
+
+SET xmloption TO DOCUMENT;
+SELECT xml_is_well_formed('abc');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed('<>');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed('<abc/>');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed('<foo>bar</foo>');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed('<foo>bar</foo');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed('<foo><bar>baz</foo>');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
+SET xmloption TO CONTENT;
+SELECT xml_is_well_formed('abc');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 2ce543aeaa0..711b4358a25 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -573,3 +573,62 @@ SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data);
      0
 (1 row)
 
+-- Test xml_is_well_formed and variants
+SELECT xml_is_well_formed_document('<foo>bar</foo>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed_document('abc');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed_content('<foo>bar</foo>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed_content('abc');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SET xmloption TO DOCUMENT;
+SELECT xml_is_well_formed('abc');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<abc/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<foo>bar</foo>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<foo>bar</foo');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<foo><bar>baz</foo>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SET xmloption TO CONTENT;
+SELECT xml_is_well_formed('abc');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 0e8c0fb2273..717a1e7170e 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -190,3 +190,24 @@ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:nam
 CREATE TABLE query ( expr TEXT );
 INSERT INTO query VALUES ('/menu/beers/cost[text() = ''lots'']');
 SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data);
+
+-- Test xml_is_well_formed and variants
+
+SELECT xml_is_well_formed_document('<foo>bar</foo>');
+SELECT xml_is_well_formed_document('abc');
+SELECT xml_is_well_formed_content('<foo>bar</foo>');
+SELECT xml_is_well_formed_content('abc');
+
+SET xmloption TO DOCUMENT;
+SELECT xml_is_well_formed('abc');
+SELECT xml_is_well_formed('<>');
+SELECT xml_is_well_formed('<abc/>');
+SELECT xml_is_well_formed('<foo>bar</foo>');
+SELECT xml_is_well_formed('<foo>bar</foo');
+SELECT xml_is_well_formed('<foo><bar>baz</foo>');
+SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>');
+SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>');
+
+SET xmloption TO CONTENT;
+SELECT xml_is_well_formed('abc');
-- 
GitLab