From 48ba3f5711aab5bd8f009aa9da8687bc465cdc21 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter_e@gmx.net>
Date: Mon, 29 Jan 2007 13:24:30 +0000
Subject: [PATCH] Update documentation on XML functions

---
 doc/src/sgml/datatype.sgml |  82 +-------
 doc/src/sgml/func.sgml     | 412 ++++++++++++++++++++++++++++++++-----
 2 files changed, 367 insertions(+), 127 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index f0ba6c32c71..10d5a34cf96 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.186 2007/01/25 11:53:50 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.187 2007/01/29 13:24:30 petere Exp $ -->
 
  <chapter id="datatype">
   <title id="datatype-title">Data Types</title>
@@ -3538,86 +3538,6 @@ SET xmloption TO { DOCUMENT | CONTENT };
     processed in UTF-8, computations will be most efficient if the
     server encoding is also UTF-8.
    </para>
-
-   <para>
-    <acronym>XML</> (Extensible Markup Language) support is not
-    just the existance of an <type>xml</type> data type, but a
-    variety of features supported by a database system. These
-    capabilities include import/export, indexing, searching,
-    transforming, and <acronym>XML</> to <acronym>SQL</> mapping.
-    <productname>PostgreSQL</> supports some but not all of these
-    <acronym>XML</> capabilities.  For an overview of <acronym>XML</>
-    use in databases, see <ulink
-    url="http://www.rpbourret.com/xml/XMLAndDatabases.htm"></>.
-   </para>
-
-   <variablelist>
-   <varlistentry>
-    <term>Import/Export</term>
-    <listitem>
-
-     <para>
-      There is no facility for mapping <acronym>XML</> to relational
-      tables. An external tool must be used for this. One simple way to
-      export <acronym>XML</> is to use <application>psql</> in
-      <acronym>HTML</> mode (<literal>\pset format html</>), and convert
-      the <acronym>XHTML</> output to XML using an external tool.
-     </para>
-    </listitem>
-   </varlistentry>
-
-   <varlistentry>
-    <term>Indexing</term>
-    <listitem>
-
-     <para>
-      <filename>/contrib/xml2</> functions can be used in expression
-      indexes to index specific <acronym>XML</> fields. To index the
-      full contents of <acronym>XML</> documents, the full-text indexing
-      tool <filename>/contrib/tsearch2</> can be used. Of course,
-      Tsearch2 indexes have no <acronym>XML</> awareness so additional
-      <filename>/contrib/xml2</> checks should be added to queries.
-     </para>
-    </listitem>
-   </varlistentry>
-
-   <varlistentry>
-    <term>Searching</term>
-    <listitem>
-
-     <para>
-      XPath searches are implemented using <filename>/contrib/xml2</>.
-      It processes <acronym>XML</> text documents and returns results
-      based on the requested query.
-     </para>
-    </listitem>
-   </varlistentry>
-
-   <varlistentry>
-    <term>Transforming</term>
-    <listitem>
-
-     <para>
-      <filename>/contrib/xml2</> supports <acronym>XSLT</> (Extensible
-      Stylesheet Language Transformation).
-     </para>
-    </listitem>
-   </varlistentry>
-
-   <varlistentry>
-    <term>XML to SQL Mapping</term>
-    <listitem>
-
-     <para>
-      This involves converting <acronym>XML</> data to and from
-      relational structures. <productname>PostgreSQL</> has no internal
-      support for such mapping, and relies on external tools to do such
-      conversions.
-     </para>
-    </listitem>
-   </varlistentry>
-   </variablelist>
-
   </sect1>
 
  </chapter>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index f433f9221c7..4b03b4df148 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.352 2007/01/20 21:59:34 neilc Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.353 2007/01/29 13:24:30 petere Exp $ -->
 
  <chapter id="functions">
   <title>Functions and Operators</title>
@@ -10752,65 +10752,215 @@ SELECT (pg_stat_file('filename')).modification;
 
   </sect1>
 
-  <sect1 id="functions-xml">
-   <title>XML Functions</title>
+ <sect1 id="functions-xml">
+  <title>XML Functions</title>
+
+  <para>
+   The functions and function-like expressions described in this
+   section operate on values of type <type>xml</type>.  Check <xref
+   linkend="datatype-xml"> for information about the <type>xml</type>
+   type.  The function-like expressions <function>xmlparse</function>
+   and <function>xmlserialize</function> for converting to and from
+   type <type>xml</type> are not repeated here.
+  </para>
+
+  <sect2>
+   <title>Producing XML Content</title>
 
    <para>
-    The functions and function-like expressions described in this
-    section operate on values of type <type>xml</type>.
+    A set of functions and function-like expressions are available for
+    producing XML content from SQL data.  As such, they are
+    particularly suitable for formatting query results into XML
+    documents for processing in client applications.
    </para>
 
-   <sect2>
+   <sect3>
     <title><literal>xmlcomment</literal></title>
- 
-   <indexterm>
-    <primary>xmlcomment</primary>
-   </indexterm>
- 
- <synopsis>
- <function>xmlcomment</function>(<replaceable>text</replaceable>)
- </synopsis>
- 
+
+    <indexterm>
+     <primary>xmlcomment</primary>
+    </indexterm>
+
+<synopsis>
+<function>xmlcomment</function>(<replaceable>text</replaceable>)
+</synopsis>
+
     <para>
-     Creates an XML comment.
+     The function <function>xmlcomment</function> creates an XML value
+     containing an XML comment with the specified text as content.
+     The text may not contain <literal>--</literal> or end with a
+     <literal>-</literal> so that the resulting construct is a valid
+     XML comment.  If the argument is null, the result is null.
     </para>
-    </sect2>
- 
-   <sect2>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT xmlcomment('hello');
+
+  xmlcomment
+--------------
+ <!--hello-->
+]]></screen>
+    </para>
+   </sect3>
+
+   <sect3>
     <title><literal>xmlconcat</literal></title>
- 
-   <indexterm>
-    <primary>xmlconcat</primary>
-   </indexterm>
- 
+
+    <indexterm>
+     <primary>xmlconcat</primary>
+    </indexterm>
+
  <synopsis>
- <function>xmlconcat</function>(<replaceable>xml</replaceable><optional>, xml, ...</optional>)
+ <function>xmlconcat</function>(<replaceable>xml</replaceable><optional>, ...</optional>)
  </synopsis>
  
     <para>
-     Combines a list of individual XML values to create a
-     single value containing an XML forest.
+     The function <function>xmlconcat</function> concatenates a list
+     of individual XML values to create a single value containing an
+     XML content fragment.  Null values are omitted; the result is
+     only null if there are no nonnull arguments.
     </para>
-    </sect2>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT xmlconcat('<abc/>', '<bar>foo</bar>');
+
+      xmlconcat
+----------------------
+ <abc/><bar>foo</bar>
+]]></screen>
+    </para>
+
+    <para>
+     XML declarations, if present are combined as follows.  If all
+     argument values have the same XML version declaration, that
+     version is used in the result, else no version is used.  If all
+     argument values have the standalone declaration value
+     <quote>yes</quote>, then that value is used in the result.  If
+     all argument values have a standalone declaration value and at
+     least one is <quote>no</quote>, then that is used in the result.
+     Else the result will have no standalone declaration.  If the
+     result is determined to require a standalone declaration but no
+     version declaration, a version declaration with version 1.0 will
+     be used because XML requires an XML declaration to contain a
+     version declaration.  Encoding declarations are ignored and
+     removed in all cases.
+    </para>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT xmlconcat('<?xml version="1.1"?><foo/>', '<?xml version="1.1" standalone="no"?><bar/>');
+
+             xmlconcat
+-----------------------------------
+ <?xml version="1.1"?><foo/><bar/>
+]]></screen>
+    </para>
+   </sect3>
  
-   <sect2>
+   <sect3>
     <title><literal>xmlelement</literal></title>
  
    <indexterm>
     <primary>xmlelement</primary>
    </indexterm>
  
- <synopsis>
- <function>xmlelement</function>(name <replaceable>name</replaceable><optional>, xmlattribytes(<replaceable>value</replaceable> <optional>AS <replaceable>label</replaceable></optional><optional>, ... </optional>)</optional>
- <optional><replaceable>, content, ...</replaceable></optional>)
+<synopsis>
+ <function>xmlelement</function>(name <replaceable>name</replaceable> <optional>, xmlattributes(<replaceable>value</replaceable> <optional>AS <replaceable>attname</replaceable></optional> <optional>, ... </optional>)</optional> <optional><replaceable>, content, ...</replaceable></optional>)
  </synopsis>
  
     <para>
-     Creates an XML element, allowing the name to be specified.
+     The <function>xmlelement</function> expression produces an XML
+     element with the given name, attributes, and content.
+    </para>
+
+    <para>
+     Examples:
+<screen><![CDATA[
+SELECT xmlelement(name foo);
+
+ xmlelement
+------------
+ <foo/>
+
+SELECT xmlelement(name foo, xmlattributes('xyz' as bar));
+
+    xmlelement
+------------------
+ <foo bar="xyz"/>
+
+SELECT xmlelement(name foo, xmlattributes(current_date as bar), 'cont', 'ent');
+
+             xmlelement
+-------------------------------------
+ <foo bar="2007-01-26">content</foo>
+]]></screen>
+    </para>
+
+    <para>
+     Element and attribute names that are not valid XML names are
+     escaped by replacing the offending characters by the sequence
+     <literal>_x<replaceable>HHHH</replaceable>_</literal>, where
+     <replaceable>HHHH</replaceable> is the character's Unicode
+     codepoint in hexadecimal notation.  For example:
+<screen><![CDATA[
+SELECT xmlelement(name "foo$bar", xmlattributes('xyz' as "a&b"));
+
+            xmlelement
+----------------------------------
+ <foo_x0024_bar a_x0026_b="xyz"/>
+]]></screen>
     </para>
-    </sect2>
+
+    <para>
+     An explicit attribute name need not be specified if the attribute
+     value is a column reference, in which case the column's name will
+     be used as attribute name by default.  In any other case, the
+     attribute must be given an explicit name.  So this example is
+     valid:
+<screen>
+CREATE TABLE test (a xml, b xml);
+SELECT xmlelement(name test, xmlattributes(a, b)) FROM test;
+</screen>
+     But these are not:
+<screen>
+SELECT xmlelement(name test, xmlattributes('constant'), a, b) FROM test;
+SELECT xmlelement(name test, xmlattributes(func(a, b))) FROM test;
+</screen>
+    </para>
+
+    <para>
+     Element content, if specified, will be formatted according to
+     data type.  If the content is itself of type <type>xml</type>,
+     complex XML documents can be constructed.  For example:
+<screen><![CDATA[
+SELECT xmlelement(name foo, xmlattributes('xyz' as bar),
+                            xmlelement(name abc),
+                            xmlcomment('test'),
+                            xmlelement(name xyz));
+
+                  xmlelement
+----------------------------------------------
+ <foo bar="xyz"><abc/><!--test--><xyz/></foo>
+]]></screen>
+
+     Content of other types will be formatted into valid XML character
+     data.  This means in particular that the characters &lt;, &gt;,
+     and &amp; will be converted to entities.  Binary data (data type
+     <type>bytea</type>) will be represented in base64 or hex
+     encoding, depending on the setting of the configuration parameter
+     <xref linkend="guc-xmlbinary">.  The particular behavior for
+     individual data types is expected evolve in order to align the
+     SQL and PostgreSQL data types with the XML Schema specification,
+     at which point a more precise description will appear.
+    </para>
+   </sect3>
  
-   <sect2>
+   <sect3>
     <title><literal>xmlforest</literal></title>
  
    <indexterm>
@@ -10818,16 +10968,54 @@ SELECT (pg_stat_file('filename')).modification;
    </indexterm>
  
  <synopsis>
- <function>xmlforest</function>(<replaceable>value</replaceable> <optional>AS <replaceable>label</replaceable></optional><optional>, ...</optional>)
+ <function>xmlforest</function>(<replaceable>content</replaceable> <optional>AS <replaceable>name</replaceable></optional> <optional>, ...</optional>)
  </synopsis>
  
     <para>
-     Creates XML elements from columns, using the name of each
-     column as the name of the corresponding element.
+     The <function>xmlforest</function> expression produces an XML
+     forest (sequence) of elements using the given names and content.
+    </para>
+
+    <para>
+     Examples:
+<screen><![CDATA[
+SELECT xmlforest('abc' AS foo, 123 AS bar);
+
+          xmlforest
+------------------------------
+ <foo>abc</foo><bar>123</bar>
+
+
+SELECT xmlforest(table_name, column_name) FROM information_schema.columns WHERE table_schema = 'pg_catalog';
+
+                                         xmlforest
+-------------------------------------------------------------------------------------------
+ <table_name>pg_authid</table_name><column_name>rolname</column_name>
+ <table_name>pg_authid</table_name><column_name>rolsuper</column_name>
+ ...
+]]></screen>
+
+     As seen in the second example, the element name can be omitted if
+     the content value is a column reference, in which case the column
+     name is used by default.  Otherwise, a name must be specified.
+    </para>
+
+    <para>
+     Element names that are not valid XML names are escaped as shown
+     for <function>xmlelement</function> above.  Similarly, content
+     data is escaped to make valid XML content, unless it is already
+     of type <type>xml</type>.
+    </para>
+
+    <para>
+     Note that XML forests are not valid XML documents if they consist
+     of more than one element.  So it might be useful to wrap
+     <function>xmlforest</function> expressions in
+     <function>xmlelement</function>.
     </para>
-    </sect2>
+   </sect3>
  
-   <sect2>
+   <sect3>
     <title><literal>xmlpi</literal></title>
  
    <indexterm>
@@ -10839,11 +11027,24 @@ SELECT (pg_stat_file('filename')).modification;
  </synopsis>
  
     <para>
-     Creates an XML processing instruction.
+     The <function>xmlpi</function> expression creates an XML
+     processing instruction.  The content, if present, must not
+     contain the character sequence <literal>?&lt;</literal>.
+    </para>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT xmlpi(name php, 'echo "hello world";');
+
+            xmlpi
+-----------------------------
+ <?php echo "hello world";?>
+]]></screen>
     </para>
-    </sect2>
+   </sect3>
  
-   <sect2>
+   <sect3>
     <title><literal>xmlroot</literal></title>
  
    <indexterm>
@@ -10851,13 +11052,132 @@ SELECT (pg_stat_file('filename')).modification;
    </indexterm>
  
  <synopsis>
- <function>xmlroot</function>(<replaceable>xml</replaceable>, version <replaceable>text</replaceable> <optional>, standalone yes|no|no value</optional>)
+ <function>xmlroot</function>(<replaceable>xml</replaceable>, version <replaceable>text</replaceable>|no value <optional>, standalone yes|no|no value</optional>)
  </synopsis>
  
     <para>
-     Creates the root node of an XML document.
+     The <function>xmlroot</function> expression alters the properties
+     of the root node of an XML value.  If a version is specified,
+     this replaces the value in the version declaration, if a
+     standalone value is specified, this replaces the value in the
+     standalone declaration.
+    </para>
+
+    <para>
+<screen><![CDATA[
+SELECT xmlroot(xmlparse(document '<?xml version="1.1"?><content>abc</content>'), version '1.0', standalone yes);
+
+                xmlroot
+----------------------------------------
+ <?xml version="1.0" standalone="yes"?>
+ <content>abc</content>
+]]></screen>
+    </para>
+   </sect3>
+
+   <sect3>
+    <title>XML Predicates</title>
+
+    <indexterm>
+     <primary>IS DOCUMENT</primary>
+    </indexterm>
+
+<synopsis>
+<replaceable>xml</replaceable> IS DOCUMENT
+</synopsis>
+
+    <para>
+     The expression <literal>IS DOCUMENT</literal> returns true if the
+     argument XML value is a proper XML document, false if it is not
+     (that is, it is a content fragment), or null if the argument is
+     null.  See <xref linkend="datatype-xml"> about the difference
+     between documents and content fragments.
     </para>
+   </sect3>
+  </sect2>
+
+  <sect2>
+   <title>Processing XML</title>
+
+   <para>
+    <acronym>XML</> support is not just the existence of an
+    <type>xml</type> data type, but a variety of features supported by
+    a database system.  These capabilities include import/export,
+    indexing, searching, transforming, and <acronym>XML</> to
+    <acronym>SQL</> mapping.  <productname>PostgreSQL</> supports some
+    but not all of these <acronym>XML</> capabilities.  For an
+    overview of <acronym>XML</> use in databases, see <ulink
+    url="http://www.rpbourret.com/xml/XMLAndDatabases.htm"></>.
+   </para>
+
+   <variablelist>
+   <varlistentry>
+    <term>Import/Export</term>
+    <listitem>
+
+     <para>
+      There is no facility for mapping <acronym>XML</> to relational
+      tables.  An external tool must be used for this.  One simple way
+      to export <acronym>XML</> is to use <application>psql</> in
+      <acronym>HTML</> mode (<literal>\pset format html</>), and
+      convert the <acronym>XHTML</> output to XML using an external
+      tool.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>Indexing</term>
+    <listitem>
+
+     <para>
+      <filename>contrib/xml2/</> functions can be used in expression
+      indexes to index specific <acronym>XML</> fields.  To index the
+      full contents of <acronym>XML</> documents, the full-text
+      indexing tool <filename>contrib/tsearch2/</> can be used.  Of
+      course, Tsearch2 indexes have no <acronym>XML</> awareness so
+      additional <filename>contrib/xml2/</> checks should be added to
+      queries.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>Searching</term>
+    <listitem>
+
+     <para>
+      XPath searches are implemented using <filename>contrib/xml2/</>.
+      It processes <acronym>XML</> text documents and returns results
+      based on the requested query.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>Transforming</term>
+    <listitem>
+
+     <para>
+      <filename>contrib/xml2/</> supports <acronym>XSLT</> (Extensible
+      Stylesheet Language Transformation).
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>XML to SQL Mapping</term>
+    <listitem>
+
+     <para>
+      This involves converting <acronym>XML</> data to and from
+      relational structures. <productname>PostgreSQL</> has no
+      internal support for such mapping, and relies on external tools
+      to do such conversions.
+     </para>
+    </listitem>
+   </varlistentry>
+   </variablelist>
   </sect2>
- 
  </sect1>
 </chapter>
-- 
GitLab