Skip to content
Snippets Groups Projects
xml.c 81.9 KiB
Newer Older
/*-------------------------------------------------------------------------
 *
 * xml.c
 *	  XML data type support.
 *
 *
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.87 2009/05/12 20:17:40 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */

/*
 * Generally, XML type support is only available when libxml use was
 * configured during the build.  But even if that is not done, the
 * type and all the functions are available, but most of them will
 * fail.  For one thing, this avoids having to manage variant catalog
 * installations.  But it also has nice effects such as that you can
 * dump a database containing XML type data even if the server is not
Bruce Momjian's avatar
Bruce Momjian committed
 * linked with libxml.	Thus, make sure xml_out() works even if nothing
 * Notes on memory management:
 *
 * Via callbacks, libxml is told to use palloc and friends for memory
 * management, within a context that we reset at transaction end (and also at
 * subtransaction abort) to prevent memory leaks.  Resetting at transaction or
 * subtransaction abort is necessary since we might have thrown a longjmp
 * while some data structures were not linked from anywhere persistent.
 * Resetting at transaction commit might not be necessary, but seems a good
 * idea to forestall long-term leaks.
 *
 * Sometimes libxml allocates global structures in the hope that it can reuse
 * them later on.  Therefore, before resetting LibxmlContext, we must tell
 * libxml to discard any global data it has.  The libxml API documentation is
 * not very good about specifying this, but for now we assume that
 * xmlCleanupParser() will get rid of anything we need to worry about.
 *
 * We use palloc --- which will throw a longjmp on error --- for allocation
 * callbacks that officially should act like malloc, ie, return NULL on
 * out-of-memory.  This is a bit risky since there is a chance of leaving
 * persistent libxml data structures in an inconsistent partially-constructed
 * state, perhaps leading to crash in xmlCleanupParser().  However, as of
 * early 2008 it is *known* that libxml can crash on out-of-memory due to
 * inadequate checks for NULL returns, so this behavior seems the lesser
 * of two evils.
#include "postgres.h"

#ifdef USE_LIBXML
#include <libxml/chvalid.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/uri.h>
#include <libxml/xmlerror.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
Bruce Momjian's avatar
Bruce Momjian committed
#endif   /* USE_LIBXML */
#include "libpq/pqformat.h"
#include "utils/date.h"
#include "utils/datetime.h"
#include "utils/memutils.h"
static StringInfo xml_err_buf = NULL;
static MemoryContext LibxmlContext = NULL;
Bruce Momjian's avatar
Bruce Momjian committed
static void xml_init(void);
static void xml_memory_init(void);
static void xml_memory_cleanup(void);
Bruce Momjian's avatar
Bruce Momjian committed
static void *xml_palloc(size_t size);
static void *xml_repalloc(void *ptr, size_t size);
static void xml_pfree(void *ptr);
static char *xml_pstrdup(const char *string);
static void xml_ereport(int level, int sqlcode, const char *msg);
static void xml_errorHandler(void *ctxt, const char *msg,...);
static void xml_ereport_by_code(int level, int sqlcode,
					const char *msg, int errcode);
static xmlChar *xml_text2xmlChar(text *in);
Bruce Momjian's avatar
Bruce Momjian committed
static int parse_xml_decl(const xmlChar * str, size_t *lenp,
			   xmlChar ** version, xmlChar ** encoding, int *standalone);
static bool print_xml_decl(StringInfo buf, const xmlChar * version,
			   pg_enc encoding, int standalone);
Tom Lane's avatar
Tom Lane committed
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
Bruce Momjian's avatar
Bruce Momjian committed
		  bool preserve_whitespace, xmlChar * encoding);
static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
#endif   /* USE_LIBXML */
Tom Lane's avatar
Tom Lane committed
static StringInfo query_to_xml_internal(const char *query, char *tablename,
					  const char *xmlschema, bool nulls, bool tableforest,
					  const char *targetns, bool top_level);
static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
Bruce Momjian's avatar
Bruce Momjian committed
						 bool nulls, bool tableforest, const char *targetns);
Tom Lane's avatar
Tom Lane committed
static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
								  List *relid_list, bool nulls,
								  bool tableforest, const char *targetns);
static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
								   bool nulls, bool tableforest,
								   const char *targetns);
Bruce Momjian's avatar
Bruce Momjian committed
static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
Tom Lane's avatar
Tom Lane committed
static void SPI_sql_row_to_xmlelement(int rownum, StringInfo result,
						  char *tablename, bool nulls, bool tableforest,
						  const char *targetns, bool top_level);
#define NO_XML_SUPPORT() \
	ereport(ERROR, \
			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
			 errmsg("unsupported XML feature"), \
			 errdetail("This functionality requires the server to be built with libxml support."), \
Peter Eisentraut's avatar
Peter Eisentraut committed
			 errhint("You need to rebuild PostgreSQL using --with-libxml.")))
/* from SQL/XML:2003 section 4.7 */
#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"


Bruce Momjian's avatar
Bruce Momjian committed
xmlChar_to_encoding(xmlChar * encoding_name)
Bruce Momjian's avatar
Bruce Momjian committed
	int			encoding = pg_char_to_encoding((char *) encoding_name);

	if (encoding < 0)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid encoding name \"%s\"",
						(char *) encoding_name)));
	return encoding;
}
#endif


/*
 * xml_in uses a plain C string to VARDATA conversion, so for the time being
 * we use the conversion function for the text datatype.
 *
 * This is only acceptable so long as xmltype and text use the same
 * representation.
 */
Datum
xml_in(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
Bruce Momjian's avatar
Bruce Momjian committed
	char	   *s = PG_GETARG_CSTRING(0);
	xmltype    *vardata;
	xmlDocPtr	doc;
	vardata = (xmltype *) cstring_to_text(s);
Bruce Momjian's avatar
Bruce Momjian committed
	 * Parse the data to check if it is well-formed XML data.  Assume that
	 * ERROR occurred if parsing failed.
	doc = xml_parse(vardata, xmloption, true, NULL);

	PG_RETURN_XML_P(vardata);
#else
	NO_XML_SUPPORT();
	return 0;
#endif
}


/*
 * xml_out_internal uses a plain VARDATA to C string conversion, so for the
 * time being we use the conversion function for the text datatype.
 *
 * This is only acceptable so long as xmltype and text use the same
 * representation.
 */
xml_out_internal(xmltype *x, pg_enc target_encoding)
	char	   *str = text_to_cstring((text *) x);
Bruce Momjian's avatar
Bruce Momjian committed
	xmlChar    *version;
Tom Lane's avatar
Tom Lane committed
	if ((res_code = parse_xml_decl((xmlChar *) str,
								   &len, &version, NULL, &standalone)) == 0)
		if (!print_xml_decl(&buf, version, target_encoding, standalone))
Bruce Momjian's avatar
Bruce Momjian committed
			 * If we are not going to produce an XML declaration, eat a single
			 * newline in the original string to prevent empty first lines in
			 * the output.
			 */
			if (*(str + len) == '\n')
				len += 1;
		}
		appendStringInfoString(&buf, str + len);

		return buf.data;
	}

	xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
						"could not parse XML declaration in stored value",
						res_code);
Bruce Momjian's avatar
Bruce Momjian committed
	xmltype    *x = PG_GETARG_XML_P(0);
Bruce Momjian's avatar
Bruce Momjian committed
	 * xml_out removes the encoding property in all cases.	This is because we
	 * cannot control from here whether the datum will be converted to a
	 * different client encoding, so we'd do more harm than good by including
	 * it.
	 */
	PG_RETURN_CSTRING(xml_out_internal(x, 0));
Datum
xml_recv(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
Bruce Momjian's avatar
Bruce Momjian committed
	xmltype    *result;
Bruce Momjian's avatar
Bruce Momjian committed
	xmlChar    *encoding = NULL;
Bruce Momjian's avatar
Bruce Momjian committed
	 * Read the data in raw format. We don't know yet what the encoding is, as
	 * that information is embedded in the xml declaration; so we have to
	 * parse that before converting to server encoding.
	 */
	nbytes = buf->len - buf->cursor;
	str = (char *) pq_getmsgbytes(buf, nbytes);
	/*
	 * We need a null-terminated string to pass to parse_xml_decl().  Rather
	 * than make a separate copy, make the temporary result one byte bigger
	 * than it needs to be.
	 */
	result = palloc(nbytes + 1 + VARHDRSZ);
	SET_VARSIZE(result, nbytes + VARHDRSZ);
	memcpy(VARDATA(result), str, nbytes);
	str = VARDATA(result);
	str[nbytes] = '\0';

	parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL);
Bruce Momjian's avatar
Bruce Momjian committed
	 * Parse the data to check if it is well-formed XML data.  Assume that
	 * xml_parse will throw ERROR if not.
	doc = xml_parse(result, xmloption, true, encoding);
	/* Now that we know what we're dealing with, convert to server encoding */
	newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
												nbytes,
Bruce Momjian's avatar
Bruce Momjian committed
											  xmlChar_to_encoding(encoding) :
												GetDatabaseEncoding());

	if (newstr != str)
	{
		result = (xmltype *) cstring_to_text(newstr);
	PG_RETURN_XML_P(result);
#else
	NO_XML_SUPPORT();
	return 0;
#endif
}


Datum
xml_send(PG_FUNCTION_ARGS)
{
Bruce Momjian's avatar
Bruce Momjian committed
	xmltype    *x = PG_GETARG_XML_P(0);
	StringInfoData buf;
Bruce Momjian's avatar
Bruce Momjian committed
	 * xml_out_internal doesn't convert the encoding, it just prints the right
	 * declaration. pq_sendtext will do the conversion.
	 */
	outval = xml_out_internal(x, pg_get_client_encoding());

	pq_begintypsend(&buf);
	pq_sendtext(&buf, outval, strlen(outval));
	pfree(outval);
	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}


#ifdef USE_LIBXML
static void
appendStringInfoText(StringInfo str, const text *t)
{
	appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
}


static xmltype *
stringinfo_to_xmltype(StringInfo buf)
{
	return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
static xmltype *
cstring_to_xmltype(const char *string)
{
	return (xmltype *) cstring_to_text(string);
static xmltype *
xmlBuffer_to_xmltype(xmlBufferPtr buf)
{
	return (xmltype *) cstring_to_text_with_len((char *) xmlBufferContent(buf),
												xmlBufferLength(buf));
#endif


Datum
xmlcomment(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
Bruce Momjian's avatar
Bruce Momjian committed
	text	   *arg = PG_GETARG_TEXT_P(0);
	char	   *argdata = VARDATA(arg);
	int			len = VARSIZE(arg) - VARHDRSZ;
Bruce Momjian's avatar
Bruce Momjian committed
	int			i;

	/* check for "--" in string or "-" at the end */
	for (i = 1; i < len; i++)
Tom Lane's avatar
Tom Lane committed
	{
		if (argdata[i] == '-' && argdata[i - 1] == '-')
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_XML_COMMENT),
					 errmsg("invalid XML comment")));
	}
	if (len > 0 && argdata[len - 1] == '-')
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_XML_COMMENT),
				 errmsg("invalid XML comment")));

	initStringInfo(&buf);
	appendStringInfo(&buf, "<!--");
	appendStringInfoText(&buf, arg);
	appendStringInfo(&buf, "-->");

	PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
#else
	NO_XML_SUPPORT();
	return 0;
#endif
}



/*
 * TODO: xmlconcat needs to merge the notations and unparsed entities
Bruce Momjian's avatar
Bruce Momjian committed
 * of the argument values.	Not very important in practice, though.
 */
xmltype *
xmlconcat(List *args)
{
#ifdef USE_LIBXML
	int			global_standalone = 1;
Bruce Momjian's avatar
Bruce Momjian committed
	xmlChar    *global_version = NULL;
	bool		global_version_no_value = false;
Tom Lane's avatar
Tom Lane committed
	StringInfoData buf;
	ListCell   *v;
Bruce Momjian's avatar
Bruce Momjian committed
		xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
Bruce Momjian's avatar
Bruce Momjian committed
		xmlChar    *version;
		int			standalone;
		char	   *str;

		len = VARSIZE(x) - VARHDRSZ;
		str = text_to_cstring((text *) x);

		parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);

		if (standalone == 0 && global_standalone == 1)
			global_standalone = 0;
		if (standalone < 0)
			global_standalone = -1;

		if (!version)
			global_version_no_value = true;
		else if (!global_version)
		else if (xmlStrcmp(version, global_version) != 0)
			global_version_no_value = true;

		appendStringInfoString(&buf, str + len);
		pfree(str);
	}

	if (!global_version_no_value || global_standalone >= 0)
	{
		StringInfoData buf2;

		initStringInfo(&buf2);

		print_xml_decl(&buf2,
Tom Lane's avatar
Tom Lane committed
					   (!global_version_no_value) ? global_version : NULL,
					   0,
					   global_standalone);

		appendStringInfoString(&buf2, buf.data);
		buf = buf2;
	}

	return stringinfo_to_xmltype(&buf);
#else
	NO_XML_SUPPORT();
	return NULL;
#endif
}


/*
 * XMLAGG support
 */
Datum
xmlconcat2(PG_FUNCTION_ARGS)
{
	if (PG_ARGISNULL(0))
	{
		if (PG_ARGISNULL(1))
			PG_RETURN_NULL();
		else
			PG_RETURN_XML_P(PG_GETARG_XML_P(1));
	}
	else if (PG_ARGISNULL(1))
		PG_RETURN_XML_P(PG_GETARG_XML_P(0));
	else
Tom Lane's avatar
Tom Lane committed
		PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
											 PG_GETARG_XML_P(1))));
	text	   *data = PG_GETARG_TEXT_P(0);
	PG_RETURN_XML_P(xmlparse(data, xmloption, true));
}


Datum
xmltotext(PG_FUNCTION_ARGS)
{
Bruce Momjian's avatar
Bruce Momjian committed
	xmltype    *data = PG_GETARG_XML_P(0);
	/* It's actually binary compatible. */
Tom Lane's avatar
Tom Lane committed
	PG_RETURN_TEXT_P((text *) data);
xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
{
	if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
		ereport(ERROR,
				(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
				 errmsg("not an XML document")));

	/* It's actually binary compatible, save for the above check. */
	return (text *) data;
xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
Bruce Momjian's avatar
Bruce Momjian committed
	XmlExpr    *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
	xmltype    *result;
	List	   *named_arg_strings;
	List	   *arg_strings;
	int			i;
	ListCell   *arg;
	ListCell   *narg;
	xmlBufferPtr buf;
	xmlTextWriterPtr writer;

Bruce Momjian's avatar
Bruce Momjian committed
	 * We first evaluate all the arguments, then start up libxml and create
	 * the result.	This avoids issues if one of the arguments involves a call
	 * to some other function or subsystem that wants to use libxml on its own
	 * terms.
Bruce Momjian's avatar
Bruce Momjian committed
		ExprState  *e = (ExprState *) lfirst(arg);

		value = ExecEvalExpr(e, econtext, &isnull, NULL);
			str = map_sql_value_to_xml_value(value, exprType((Node *) e->expr));
		named_arg_strings = lappend(named_arg_strings, str);
Bruce Momjian's avatar
Bruce Momjian committed
		ExprState  *e = (ExprState *) lfirst(arg);

		value = ExecEvalExpr(e, econtext, &isnull, NULL);
		/* here we can just forget NULL elements immediately */
		{
			str = map_sql_value_to_xml_value(value,
											 exprType((Node *) e->expr));
			arg_strings = lappend(arg_strings, str);
		}
	}

	/* now safe to run libxml */
	xml_init();

	buf = xmlBufferCreate();
	writer = xmlNewTextWriterMemory(buf, 0);

	xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);

	forboth(arg, named_arg_strings, narg, xexpr->arg_names)
	{
Bruce Momjian's avatar
Bruce Momjian committed
		char	   *str = (char *) lfirst(arg);
		char	   *argname = strVal(lfirst(narg));

		if (str)
			xmlTextWriterWriteAttribute(writer,
										(xmlChar *) argname,
										(xmlChar *) str);
	}

	foreach(arg, arg_strings)
	{
Bruce Momjian's avatar
Bruce Momjian committed
		char	   *str = (char *) lfirst(arg);

		xmlTextWriterWriteRaw(writer, (xmlChar *) str);
	}

	xmlTextWriterEndElement(writer);
	xmlFreeTextWriter(writer);

	result = xmlBuffer_to_xmltype(buf);
	xmlBufferFree(buf);
xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
	doc = xml_parse(data, xmloption_arg, preserve_whitespace, NULL);
xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
Bruce Momjian's avatar
Bruce Momjian committed
	xmltype    *result;
	if (pg_strcasecmp(target, "xml") == 0)
Bruce Momjian's avatar
Bruce Momjian committed
				(errcode(ERRCODE_SYNTAX_ERROR), /* really */
				 errmsg("invalid XML processing instruction"),
				 errdetail("XML processing instruction target name cannot be \"%s\".", target)));
Bruce Momjian's avatar
Bruce Momjian committed
	 * Following the SQL standard, the null check comes after the syntax check
	 * above.
	 */
	*result_is_null = arg_is_null;
	if (*result_is_null)
Bruce Momjian's avatar
Bruce Momjian committed
		return NULL;
	appendStringInfo(&buf, "<?%s", target);

	if (arg != NULL)
Bruce Momjian's avatar
Bruce Momjian committed
		char	   *string;
		if (strstr(string, "?>") != NULL)
Bruce Momjian's avatar
Bruce Momjian committed
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
					 errmsg("invalid XML processing instruction"),
			errdetail("XML processing instruction cannot contain \"?>\".")));
		appendStringInfoChar(&buf, ' ');
		appendStringInfoString(&buf, string + strspn(string, " "));
	result = stringinfo_to_xmltype(&buf);
	pfree(buf.data);
	return result;
xmlroot(xmltype *data, text *version, int standalone)
	char	   *str;
	size_t		len;
Bruce Momjian's avatar
Bruce Momjian committed
	xmlChar    *orig_version;
	int			orig_standalone;
	StringInfoData buf;
	len = VARSIZE(data) - VARHDRSZ;
	str = text_to_cstring((text *) data);

	parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
		orig_version = xml_text2xmlChar(version);
		orig_version = NULL;
		case XML_STANDALONE_YES:
			orig_standalone = 1;
			break;
		case XML_STANDALONE_NO:
			orig_standalone = 0;
		case XML_STANDALONE_NO_VALUE:
			orig_standalone = -1;
		case XML_STANDALONE_OMITTED:
			/* leave original value */
	initStringInfo(&buf);
	print_xml_decl(&buf, orig_version, 0, orig_standalone);
	appendStringInfoString(&buf, str + len);
	return stringinfo_to_xmltype(&buf);
#endif
}


/*
 * Validate document (given as string) against DTD (given as external link)
 *
 * This has been removed because it is a security hole: unprivileged users
 * should not be able to use Postgres to fetch arbitrary external files,
 * which unfortunately is exactly what libxml is willing to do with the DTD
 * parameter.
	ereport(ERROR,
			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
			 errmsg("xmlvalidate is not implemented")));
{
#ifdef USE_LIBXML
	bool		result;
	xmlDocPtr	doc = NULL;
	MemoryContext ccxt = CurrentMemoryContext;

	PG_TRY();
	{
		doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, NULL);
Bruce Momjian's avatar
Bruce Momjian committed
		ErrorData  *errdata;
		MemoryContext ecxt;

		ecxt = MemoryContextSwitchTo(ccxt);
		errdata = CopyErrorData();
		if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
		{
			FlushErrorState();
			result = false;
		}
		else
		{
			MemoryContextSwitchTo(ecxt);
			PG_RE_THROW();
		}
	}
	PG_END_TRY();

	if (doc)
		xmlFreeDoc(doc);

	return result;
Bruce Momjian's avatar
Bruce Momjian committed
#else							/* not USE_LIBXML */
	NO_XML_SUPPORT();
	return false;
Bruce Momjian's avatar
Bruce Momjian committed
#endif   /* not USE_LIBXML */
/*
 * xml cleanup function for transaction end.  This is also called on
 * subtransaction abort; see notes at top of file for rationale.
 */
void
AtEOXact_xml(void)
{
#ifdef USE_LIBXML
	xml_memory_cleanup();
#endif
}


 * Set up for use of libxml --- this should be called by each function that
 * is about to use libxml facilities.
 *
 * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
 * check)
		/* Stuff we need do only once per session */
Bruce Momjian's avatar
Bruce Momjian committed
		 * Currently, we have no pure UTF-8 support for internals -- check if
		 * we can work.
		 */
		if (sizeof(char) != sizeof(xmlChar))
			ereport(ERROR,
					(errmsg("could not initialize XML library"),
					 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
							   (int) sizeof(char), (int) sizeof(xmlChar))));

		/* create error buffer in permanent context */
		oldcontext = MemoryContextSwitchTo(TopMemoryContext);
		xml_err_buf = makeStringInfo();
		MemoryContextSwitchTo(oldcontext);

		/* Now that xml_err_buf exists, safe to call xml_errorHandler */
		xmlSetGenericErrorFunc(NULL, xml_errorHandler);

		/* Set up memory allocation our way, too */

		/* Check library compatibility */
		LIBXML_TEST_VERSION;

		first_time = false;
	}
	else
	{
		/* Reset pre-existing buffer to empty */
		/*
		 * We re-establish the callback functions every time.  This makes it
		 * safe for other subsystems (PL/Perl, say) to also use libxml with
		 * their own callbacks ... so long as they likewise set up the
Bruce Momjian's avatar
Bruce Momjian committed
		 * callbacks on every use.	It's cheap enough to not be worth worrying
		 * about, anyway.
		 */
		xmlSetGenericErrorFunc(NULL, xml_errorHandler);
/*
 * SQL/XML allows storing "XML documents" or "XML content".  "XML
 * documents" are specified by the XML specification and are parsed
 * easily by libxml.  "XML content" is specified by SQL/XML as the
 * production "XMLDecl? content".  But libxml can only parse the
 * "content" part, so we have to parse the XML declaration ourselves
 * to complete this.
 */

Tom Lane's avatar
Tom Lane committed
#define CHECK_XML_SPACE(p) \
	do { \
		if (!xmlIsBlank_ch(*(p))) \
			return XML_ERR_SPACE_REQUIRED; \
	} while (0)

#define SKIP_XML_SPACE(p) \
	while (xmlIsBlank_ch(*(p))) (p)++
/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
/* Beware of multiple evaluations of argument! */
#define PG_XMLISNAMECHAR(c) \
	(xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
			|| xmlIsDigit_ch(c) \
			|| c == '.' || c == '-' || c == '_' || c == ':' \
			|| xmlIsCombiningQ(c) \
			|| xmlIsExtender_ch(c))

/* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
static xmlChar *
xml_pnstrdup(const xmlChar *str, size_t len)
{
	xmlChar	   *result;

	result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
	memcpy(result, str, len * sizeof(xmlChar));
	result[len] = 0;
	return result;
}

/*
 * str is the null-terminated input string.  Remaining arguments are
 * output arguments; each can be NULL if value is not wanted.
 * version and encoding are returned as locally-palloc'd strings.
 * Result is 0 if OK, an error code if not.
 */
Bruce Momjian's avatar
Bruce Momjian committed
parse_xml_decl(const xmlChar * str, size_t *lenp,
			   xmlChar ** version, xmlChar ** encoding, int *standalone)
Tom Lane's avatar
Tom Lane committed
	xml_init();
	/* Initialize output arguments to "not present" */
	if (version)
		*version = NULL;
	if (encoding)
		*encoding = NULL;
	if (standalone)
		*standalone = -1;

Tom Lane's avatar
Tom Lane committed
	p = str;

Bruce Momjian's avatar
Bruce Momjian committed
	if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
	/* if next char is name char, it's a PI like <?xml-stylesheet ...?> */
Bruce Momjian's avatar
Bruce Momjian committed
	utf8len = strlen((const char *) (p + 5));
	utf8char = xmlGetUTF8Char(p + 5, &utf8len);
	if (PG_XMLISNAMECHAR(utf8char))
	p += 5;

	/* version */
	CHECK_XML_SPACE(p);
	SKIP_XML_SPACE(p);
Bruce Momjian's avatar
Bruce Momjian committed
	if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
		return XML_ERR_VERSION_MISSING;
	p += 7;
	SKIP_XML_SPACE(p);
	if (*p != '=')
		return XML_ERR_VERSION_MISSING;
	p += 1;
	SKIP_XML_SPACE(p);

	if (*p == '\'' || *p == '"')
	{
		const xmlChar *q;

		q = xmlStrchr(p + 1, *p);
		if (!q)
			return XML_ERR_VERSION_MISSING;

		if (version)
			*version = xml_pnstrdup(p + 1, q - p - 1);
		return XML_ERR_VERSION_MISSING;

	/* encoding */
	save_p = p;
	SKIP_XML_SPACE(p);
Bruce Momjian's avatar
Bruce Momjian committed
	if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)