From 48ba3f5711aab5bd8f009aa9da8687bc465cdc21 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <peter_e@gmx.net> Date: Mon, 29 Jan 2007 13:24:30 +0000 Subject: [PATCH] Update documentation on XML functions --- doc/src/sgml/datatype.sgml | 82 +------- doc/src/sgml/func.sgml | 412 ++++++++++++++++++++++++++++++++----- 2 files changed, 367 insertions(+), 127 deletions(-) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index f0ba6c32c71..10d5a34cf96 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.186 2007/01/25 11:53:50 petere Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.187 2007/01/29 13:24:30 petere Exp $ --> <chapter id="datatype"> <title id="datatype-title">Data Types</title> @@ -3538,86 +3538,6 @@ SET xmloption TO { DOCUMENT | CONTENT }; processed in UTF-8, computations will be most efficient if the server encoding is also UTF-8. </para> - - <para> - <acronym>XML</> (Extensible Markup Language) support is not - just the existance of an <type>xml</type> data type, but a - variety of features supported by a database system. These - capabilities include import/export, indexing, searching, - transforming, and <acronym>XML</> to <acronym>SQL</> mapping. - <productname>PostgreSQL</> supports some but not all of these - <acronym>XML</> capabilities. For an overview of <acronym>XML</> - use in databases, see <ulink - url="http://www.rpbourret.com/xml/XMLAndDatabases.htm"></>. - </para> - - <variablelist> - <varlistentry> - <term>Import/Export</term> - <listitem> - - <para> - There is no facility for mapping <acronym>XML</> to relational - tables. An external tool must be used for this. One simple way to - export <acronym>XML</> is to use <application>psql</> in - <acronym>HTML</> mode (<literal>\pset format html</>), and convert - the <acronym>XHTML</> output to XML using an external tool. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term>Indexing</term> - <listitem> - - <para> - <filename>/contrib/xml2</> functions can be used in expression - indexes to index specific <acronym>XML</> fields. To index the - full contents of <acronym>XML</> documents, the full-text indexing - tool <filename>/contrib/tsearch2</> can be used. Of course, - Tsearch2 indexes have no <acronym>XML</> awareness so additional - <filename>/contrib/xml2</> checks should be added to queries. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term>Searching</term> - <listitem> - - <para> - XPath searches are implemented using <filename>/contrib/xml2</>. - It processes <acronym>XML</> text documents and returns results - based on the requested query. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term>Transforming</term> - <listitem> - - <para> - <filename>/contrib/xml2</> supports <acronym>XSLT</> (Extensible - Stylesheet Language Transformation). - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term>XML to SQL Mapping</term> - <listitem> - - <para> - This involves converting <acronym>XML</> data to and from - relational structures. <productname>PostgreSQL</> has no internal - support for such mapping, and relies on external tools to do such - conversions. - </para> - </listitem> - </varlistentry> - </variablelist> - </sect1> </chapter> diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index f433f9221c7..4b03b4df148 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.352 2007/01/20 21:59:34 neilc Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.353 2007/01/29 13:24:30 petere Exp $ --> <chapter id="functions"> <title>Functions and Operators</title> @@ -10752,65 +10752,215 @@ SELECT (pg_stat_file('filename')).modification; </sect1> - <sect1 id="functions-xml"> - <title>XML Functions</title> + <sect1 id="functions-xml"> + <title>XML Functions</title> + + <para> + The functions and function-like expressions described in this + section operate on values of type <type>xml</type>. Check <xref + linkend="datatype-xml"> for information about the <type>xml</type> + type. The function-like expressions <function>xmlparse</function> + and <function>xmlserialize</function> for converting to and from + type <type>xml</type> are not repeated here. + </para> + + <sect2> + <title>Producing XML Content</title> <para> - The functions and function-like expressions described in this - section operate on values of type <type>xml</type>. + A set of functions and function-like expressions are available for + producing XML content from SQL data. As such, they are + particularly suitable for formatting query results into XML + documents for processing in client applications. </para> - <sect2> + <sect3> <title><literal>xmlcomment</literal></title> - - <indexterm> - <primary>xmlcomment</primary> - </indexterm> - - <synopsis> - <function>xmlcomment</function>(<replaceable>text</replaceable>) - </synopsis> - + + <indexterm> + <primary>xmlcomment</primary> + </indexterm> + +<synopsis> +<function>xmlcomment</function>(<replaceable>text</replaceable>) +</synopsis> + <para> - Creates an XML comment. + The function <function>xmlcomment</function> creates an XML value + containing an XML comment with the specified text as content. + The text may not contain <literal>--</literal> or end with a + <literal>-</literal> so that the resulting construct is a valid + XML comment. If the argument is null, the result is null. </para> - </sect2> - - <sect2> + + <para> + Example: +<screen><![CDATA[ +SELECT xmlcomment('hello'); + + xmlcomment +-------------- + <!--hello--> +]]></screen> + </para> + </sect3> + + <sect3> <title><literal>xmlconcat</literal></title> - - <indexterm> - <primary>xmlconcat</primary> - </indexterm> - + + <indexterm> + <primary>xmlconcat</primary> + </indexterm> + <synopsis> - <function>xmlconcat</function>(<replaceable>xml</replaceable><optional>, xml, ...</optional>) + <function>xmlconcat</function>(<replaceable>xml</replaceable><optional>, ...</optional>) </synopsis> <para> - Combines a list of individual XML values to create a - single value containing an XML forest. + The function <function>xmlconcat</function> concatenates a list + of individual XML values to create a single value containing an + XML content fragment. Null values are omitted; the result is + only null if there are no nonnull arguments. </para> - </sect2> + + <para> + Example: +<screen><![CDATA[ +SELECT xmlconcat('<abc/>', '<bar>foo</bar>'); + + xmlconcat +---------------------- + <abc/><bar>foo</bar> +]]></screen> + </para> + + <para> + XML declarations, if present are combined as follows. If all + argument values have the same XML version declaration, that + version is used in the result, else no version is used. If all + argument values have the standalone declaration value + <quote>yes</quote>, then that value is used in the result. If + all argument values have a standalone declaration value and at + least one is <quote>no</quote>, then that is used in the result. + Else the result will have no standalone declaration. If the + result is determined to require a standalone declaration but no + version declaration, a version declaration with version 1.0 will + be used because XML requires an XML declaration to contain a + version declaration. Encoding declarations are ignored and + removed in all cases. + </para> + + <para> + Example: +<screen><![CDATA[ +SELECT xmlconcat('<?xml version="1.1"?><foo/>', '<?xml version="1.1" standalone="no"?><bar/>'); + + xmlconcat +----------------------------------- + <?xml version="1.1"?><foo/><bar/> +]]></screen> + </para> + </sect3> - <sect2> + <sect3> <title><literal>xmlelement</literal></title> <indexterm> <primary>xmlelement</primary> </indexterm> - <synopsis> - <function>xmlelement</function>(name <replaceable>name</replaceable><optional>, xmlattribytes(<replaceable>value</replaceable> <optional>AS <replaceable>label</replaceable></optional><optional>, ... </optional>)</optional> - <optional><replaceable>, content, ...</replaceable></optional>) +<synopsis> + <function>xmlelement</function>(name <replaceable>name</replaceable> <optional>, xmlattributes(<replaceable>value</replaceable> <optional>AS <replaceable>attname</replaceable></optional> <optional>, ... </optional>)</optional> <optional><replaceable>, content, ...</replaceable></optional>) </synopsis> <para> - Creates an XML element, allowing the name to be specified. + The <function>xmlelement</function> expression produces an XML + element with the given name, attributes, and content. + </para> + + <para> + Examples: +<screen><![CDATA[ +SELECT xmlelement(name foo); + + xmlelement +------------ + <foo/> + +SELECT xmlelement(name foo, xmlattributes('xyz' as bar)); + + xmlelement +------------------ + <foo bar="xyz"/> + +SELECT xmlelement(name foo, xmlattributes(current_date as bar), 'cont', 'ent'); + + xmlelement +------------------------------------- + <foo bar="2007-01-26">content</foo> +]]></screen> + </para> + + <para> + Element and attribute names that are not valid XML names are + escaped by replacing the offending characters by the sequence + <literal>_x<replaceable>HHHH</replaceable>_</literal>, where + <replaceable>HHHH</replaceable> is the character's Unicode + codepoint in hexadecimal notation. For example: +<screen><![CDATA[ +SELECT xmlelement(name "foo$bar", xmlattributes('xyz' as "a&b")); + + xmlelement +---------------------------------- + <foo_x0024_bar a_x0026_b="xyz"/> +]]></screen> </para> - </sect2> + + <para> + An explicit attribute name need not be specified if the attribute + value is a column reference, in which case the column's name will + be used as attribute name by default. In any other case, the + attribute must be given an explicit name. So this example is + valid: +<screen> +CREATE TABLE test (a xml, b xml); +SELECT xmlelement(name test, xmlattributes(a, b)) FROM test; +</screen> + But these are not: +<screen> +SELECT xmlelement(name test, xmlattributes('constant'), a, b) FROM test; +SELECT xmlelement(name test, xmlattributes(func(a, b))) FROM test; +</screen> + </para> + + <para> + Element content, if specified, will be formatted according to + data type. If the content is itself of type <type>xml</type>, + complex XML documents can be constructed. For example: +<screen><![CDATA[ +SELECT xmlelement(name foo, xmlattributes('xyz' as bar), + xmlelement(name abc), + xmlcomment('test'), + xmlelement(name xyz)); + + xmlelement +---------------------------------------------- + <foo bar="xyz"><abc/><!--test--><xyz/></foo> +]]></screen> + + Content of other types will be formatted into valid XML character + data. This means in particular that the characters <, >, + and & will be converted to entities. Binary data (data type + <type>bytea</type>) will be represented in base64 or hex + encoding, depending on the setting of the configuration parameter + <xref linkend="guc-xmlbinary">. The particular behavior for + individual data types is expected evolve in order to align the + SQL and PostgreSQL data types with the XML Schema specification, + at which point a more precise description will appear. + </para> + </sect3> - <sect2> + <sect3> <title><literal>xmlforest</literal></title> <indexterm> @@ -10818,16 +10968,54 @@ SELECT (pg_stat_file('filename')).modification; </indexterm> <synopsis> - <function>xmlforest</function>(<replaceable>value</replaceable> <optional>AS <replaceable>label</replaceable></optional><optional>, ...</optional>) + <function>xmlforest</function>(<replaceable>content</replaceable> <optional>AS <replaceable>name</replaceable></optional> <optional>, ...</optional>) </synopsis> <para> - Creates XML elements from columns, using the name of each - column as the name of the corresponding element. + The <function>xmlforest</function> expression produces an XML + forest (sequence) of elements using the given names and content. + </para> + + <para> + Examples: +<screen><![CDATA[ +SELECT xmlforest('abc' AS foo, 123 AS bar); + + xmlforest +------------------------------ + <foo>abc</foo><bar>123</bar> + + +SELECT xmlforest(table_name, column_name) FROM information_schema.columns WHERE table_schema = 'pg_catalog'; + + xmlforest +------------------------------------------------------------------------------------------- + <table_name>pg_authid</table_name><column_name>rolname</column_name> + <table_name>pg_authid</table_name><column_name>rolsuper</column_name> + ... +]]></screen> + + As seen in the second example, the element name can be omitted if + the content value is a column reference, in which case the column + name is used by default. Otherwise, a name must be specified. + </para> + + <para> + Element names that are not valid XML names are escaped as shown + for <function>xmlelement</function> above. Similarly, content + data is escaped to make valid XML content, unless it is already + of type <type>xml</type>. + </para> + + <para> + Note that XML forests are not valid XML documents if they consist + of more than one element. So it might be useful to wrap + <function>xmlforest</function> expressions in + <function>xmlelement</function>. </para> - </sect2> + </sect3> - <sect2> + <sect3> <title><literal>xmlpi</literal></title> <indexterm> @@ -10839,11 +11027,24 @@ SELECT (pg_stat_file('filename')).modification; </synopsis> <para> - Creates an XML processing instruction. + The <function>xmlpi</function> expression creates an XML + processing instruction. The content, if present, must not + contain the character sequence <literal>?<</literal>. + </para> + + <para> + Example: +<screen><![CDATA[ +SELECT xmlpi(name php, 'echo "hello world";'); + + xmlpi +----------------------------- + <?php echo "hello world";?> +]]></screen> </para> - </sect2> + </sect3> - <sect2> + <sect3> <title><literal>xmlroot</literal></title> <indexterm> @@ -10851,13 +11052,132 @@ SELECT (pg_stat_file('filename')).modification; </indexterm> <synopsis> - <function>xmlroot</function>(<replaceable>xml</replaceable>, version <replaceable>text</replaceable> <optional>, standalone yes|no|no value</optional>) + <function>xmlroot</function>(<replaceable>xml</replaceable>, version <replaceable>text</replaceable>|no value <optional>, standalone yes|no|no value</optional>) </synopsis> <para> - Creates the root node of an XML document. + The <function>xmlroot</function> expression alters the properties + of the root node of an XML value. If a version is specified, + this replaces the value in the version declaration, if a + standalone value is specified, this replaces the value in the + standalone declaration. + </para> + + <para> +<screen><![CDATA[ +SELECT xmlroot(xmlparse(document '<?xml version="1.1"?><content>abc</content>'), version '1.0', standalone yes); + + xmlroot +---------------------------------------- + <?xml version="1.0" standalone="yes"?> + <content>abc</content> +]]></screen> + </para> + </sect3> + + <sect3> + <title>XML Predicates</title> + + <indexterm> + <primary>IS DOCUMENT</primary> + </indexterm> + +<synopsis> +<replaceable>xml</replaceable> IS DOCUMENT +</synopsis> + + <para> + The expression <literal>IS DOCUMENT</literal> returns true if the + argument XML value is a proper XML document, false if it is not + (that is, it is a content fragment), or null if the argument is + null. See <xref linkend="datatype-xml"> about the difference + between documents and content fragments. </para> + </sect3> + </sect2> + + <sect2> + <title>Processing XML</title> + + <para> + <acronym>XML</> support is not just the existence of an + <type>xml</type> data type, but a variety of features supported by + a database system. These capabilities include import/export, + indexing, searching, transforming, and <acronym>XML</> to + <acronym>SQL</> mapping. <productname>PostgreSQL</> supports some + but not all of these <acronym>XML</> capabilities. For an + overview of <acronym>XML</> use in databases, see <ulink + url="http://www.rpbourret.com/xml/XMLAndDatabases.htm"></>. + </para> + + <variablelist> + <varlistentry> + <term>Import/Export</term> + <listitem> + + <para> + There is no facility for mapping <acronym>XML</> to relational + tables. An external tool must be used for this. One simple way + to export <acronym>XML</> is to use <application>psql</> in + <acronym>HTML</> mode (<literal>\pset format html</>), and + convert the <acronym>XHTML</> output to XML using an external + tool. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>Indexing</term> + <listitem> + + <para> + <filename>contrib/xml2/</> functions can be used in expression + indexes to index specific <acronym>XML</> fields. To index the + full contents of <acronym>XML</> documents, the full-text + indexing tool <filename>contrib/tsearch2/</> can be used. Of + course, Tsearch2 indexes have no <acronym>XML</> awareness so + additional <filename>contrib/xml2/</> checks should be added to + queries. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>Searching</term> + <listitem> + + <para> + XPath searches are implemented using <filename>contrib/xml2/</>. + It processes <acronym>XML</> text documents and returns results + based on the requested query. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>Transforming</term> + <listitem> + + <para> + <filename>contrib/xml2/</> supports <acronym>XSLT</> (Extensible + Stylesheet Language Transformation). + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>XML to SQL Mapping</term> + <listitem> + + <para> + This involves converting <acronym>XML</> data to and from + relational structures. <productname>PostgreSQL</> has no + internal support for such mapping, and relies on external tools + to do such conversions. + </para> + </listitem> + </varlistentry> + </variablelist> </sect2> - </sect1> </chapter> -- GitLab