From 4e8b5cd94bd24e8162ef4fd45d0ce621481844da Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter_e@gmx.net>
Date: Tue, 23 Jan 2007 23:39:16 +0000
Subject: [PATCH] Simplify handling of XML error messages: Just use the string
 provided by libxml as the detail message.

As per <http://archives.postgresql.org/pgsql-hackers/2006-12/msg01087.php>.

For converting error codes to messages, we only need to cover those codes
that we raise ourselves now.
---
 src/backend/utils/adt/xml.c       | 273 +++++++-----------------------
 src/test/regress/expected/xml.out |  16 +-
 2 files changed, 74 insertions(+), 215 deletions(-)

diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index fdf7f8e0a91..3b283d247f0 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.20 2007/01/20 09:27:19 petere Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.21 2007/01/23 23:39:16 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -51,8 +51,6 @@
 
 #ifdef USE_LIBXML
 
-#define PG_XML_DEFAULT_URI "dummy.xml"
-
 static StringInfo xml_err_buf = NULL;
 
 static void 	xml_init(void);
@@ -63,7 +61,7 @@ static void 	xml_pfree(void *ptr);
 static char    *xml_pstrdup(const char *string);
 #endif
 static void 	xml_ereport(int level, int sqlcode,
-							const char *msg, void *ctxt);
+							const char *msg);
 static void 	xml_errorHandler(void *ctxt, const char *msg, ...);
 static void 	xml_ereport_by_code(int level, int sqlcode,
 									const char *msg, int errcode);
@@ -667,14 +665,14 @@ xmlvalidate(PG_FUNCTION_ARGS)
 		ctxt = xmlNewParserCtxt();
 		if (ctxt == NULL)
 			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not allocate parser context", ctxt);
+						"could not allocate parser context");
 
 		doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data),
 								VARSIZE(data) - VARHDRSZ,
-								PG_XML_DEFAULT_URI, NULL, 0);
+								NULL, NULL, 0);
 		if (doc == NULL)
 			xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
-						"could not parse XML data", ctxt);
+						"could not parse XML data");
 
 #if 0
 		uri = xmlCreateURI();
@@ -683,21 +681,21 @@ xmlvalidate(PG_FUNCTION_ARGS)
 		uri = xmlParseURI(dtdOrUri);
 		if (uri == NULL)
 			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
-						"not implemented yet... (TODO)", ctxt);
+						"not implemented yet... (TODO)");
 		else
 #endif
 			dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri));
 
 		if (dtd == NULL)
 			xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
-						"could not load DTD", ctxt);
+						"could not load DTD");
 
 		if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1)
 			result = true;
 
 		if (!result)
 			xml_ereport(NOTICE, ERRCODE_INVALID_XML_DOCUMENT,
-						"validation against DTD failed", ctxt);
+						"validation against DTD failed");
 
 #if 0
 		if (uri)
@@ -977,7 +975,6 @@ finished:
  * Convert a C string to XML internal representation
  *
  * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
- * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
  */
 static xmlDocPtr
 xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encoding)
@@ -1006,7 +1003,7 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod
 		ctxt = xmlNewParserCtxt();
 		if (ctxt == NULL)
 			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not allocate parser context", ctxt);
+						"could not allocate parser context");
 
 		if (is_document)
 		{
@@ -1018,13 +1015,13 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod
 			 * SQL/XML:10.16.7.e)
 			 */
 			doc = xmlCtxtReadDoc(ctxt, utf8string,
-								 PG_XML_DEFAULT_URI,
+								 NULL,
 								 "UTF-8",
-									XML_PARSE_NOENT | XML_PARSE_DTDATTR
-									| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
+								 XML_PARSE_NOENT | XML_PARSE_DTDATTR
+								 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
 			if (doc == NULL)
 				xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
-							"invalid XML document", ctxt);
+							"invalid XML document");
 		}
 		else
 		{
@@ -1036,12 +1033,14 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod
 			doc = xmlNewDoc(NULL);
 
 			res_code = parse_xml_decl(utf8string, &count, &version, NULL, &standalone);
-
-			if (res_code == 0)
-				res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL);
 			if (res_code != 0)
 				xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
-									"invalid XML content", res_code);
+									"invalid XML content: invalid XML declaration", res_code);
+
+			res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL);
+			if (res_code != 0)
+				xml_ereport(ERROR, ERRCODE_INVALID_XML_CONTENT,
+							"invalid XML content");
 
 			doc->version = xmlStrdup(version);
 			doc->encoding = xmlStrdup((xmlChar *) "UTF-8");
@@ -1120,49 +1119,45 @@ xml_pstrdup(const char *string)
 
 
 /*
- * Wrapper for "ereport" function.
- * Adds detail - libxml's native error message, if any.
+ * Wrapper for "ereport" function for XML-related errors.  The "msg"
+ * is the SQL-level message; some can be adopted from the SQL/XML
+ * standard.  This function adds libxml's native error messages, if
+ * any, as detail.
  */
 static void
 xml_ereport(int level, int sqlcode,
-			const char *msg, void *ctxt)
+			const char *msg)
 {
-	xmlErrorPtr libxmlErr = NULL;
+	char *detail;
 
 	if (xml_err_buf->len > 0)
 	{
-		ereport(DEBUG1,
-				(errmsg("%s", xml_err_buf->data)));
+		detail = pstrdup(xml_err_buf->data);
 		xml_err_buf->data[0] = '\0';
 		xml_err_buf->len = 0;
 	}
+	else
+		detail = NULL;
 
-	if (ctxt != NULL)
-		libxmlErr = xmlCtxtGetLastError(ctxt);
-
-	if (libxmlErr == NULL)
+	/* libxml error messages end in '\n'; get rid of it */
+	if (detail)
 	{
+		size_t len;
+
+		len = strlen(detail);
+		if (len > 0 && detail[len-1] == '\n')
+			detail[len-1] = '\0';
+
 		ereport(level,
 				(errcode(sqlcode),
-				 errmsg("%s", msg)));
+				 errmsg("%s", msg),
+				 errdetail("%s", detail)));
 	}
 	else
 	{
-		/* as usual, libxml error message contains '\n'; get rid of it */
-		char *xmlErrDetail;
-		int xmlErrLen, i;
-
-		xmlErrDetail = pstrdup(libxmlErr->message);
-		xmlErrLen = strlen(xmlErrDetail);
-		for (i = 0; i < xmlErrLen; i++)
-		{
-			if (xmlErrDetail[i] == '\n')
-				xmlErrDetail[i] = '.';
-		}
 		ereport(level,
 				(errcode(sqlcode),
-				 errmsg("%s", msg),
-				 errdetail("%s", xmlErrDetail)));
+				 errmsg("%s", msg)));
 	}
 }
 
@@ -1194,8 +1189,11 @@ xml_errorHandler(void *ctxt, const char *msg,...)
 
 
 /*
- * Return error message by libxml error code
- * TODO make them closer to recommendations from Postgres manual
+ * Wrapper for "ereport" function for XML-related errors.  The "msg"
+ * is the SQL-level message; some can be adopted from the SQL/XML
+ * standard.  This function uses "code" to create a textual detail
+ * message.  At the moment, we only need to cover those codes that we
+ * may raise in this file.
  */
 static void
 xml_ereport_by_code(int level, int sqlcode,
@@ -1203,173 +1201,26 @@ xml_ereport_by_code(int level, int sqlcode,
 {
     const char *det;
 
-	if (xml_err_buf->len > 0)
-	{
-		ereport(DEBUG1,
-				(errmsg("%s", xml_err_buf->data)));
-		xml_err_buf->data[0] = '\0';
-		xml_err_buf->len = 0;
-	}
-
     switch (code)
 	{
-        case XML_ERR_INTERNAL_ERROR:
-            det = "libxml internal error";
-            break;
-        case XML_ERR_ENTITY_LOOP:
-            det = "Detected an entity reference loop";
-            break;
-        case XML_ERR_ENTITY_NOT_STARTED:
-            det = "EntityValue: \" or ' expected";
-            break;
-        case XML_ERR_ENTITY_NOT_FINISHED:
-            det = "EntityValue: \" or ' expected";
-            break;
-        case XML_ERR_ATTRIBUTE_NOT_STARTED:
-            det = "AttValue: \" or ' expected";
-            break;
-        case XML_ERR_LT_IN_ATTRIBUTE:
-            det = "Unescaped '<' not allowed in attributes values";
-            break;
-        case XML_ERR_LITERAL_NOT_STARTED:
-            det = "SystemLiteral \" or ' expected";
-            break;
-        case XML_ERR_LITERAL_NOT_FINISHED:
-            det = "Unfinished System or Public ID \" or ' expected";
-            break;
-        case XML_ERR_MISPLACED_CDATA_END:
-            det = "Sequence ']]>' not allowed in content";
-            break;
-        case XML_ERR_URI_REQUIRED:
-            det = "SYSTEM or PUBLIC, the URI is missing";
-            break;
-        case XML_ERR_PUBID_REQUIRED:
-            det = "PUBLIC, the Public Identifier is missing";
-            break;
-        case XML_ERR_HYPHEN_IN_COMMENT:
-            det = "Comment must not contain '--' (double-hyphen)";
-            break;
-        case XML_ERR_PI_NOT_STARTED:
-            det = "xmlParsePI : no target name";
-            break;
-        case XML_ERR_RESERVED_XML_NAME:
-            det = "Invalid PI name";
-            break;
-        case XML_ERR_NOTATION_NOT_STARTED:
-            det = "NOTATION: Name expected here";
-            break;
-        case XML_ERR_NOTATION_NOT_FINISHED:
-            det = "'>' required to close NOTATION declaration";
-            break;
-        case XML_ERR_VALUE_REQUIRED:
-            det = "Entity value required";
-            break;
-        case XML_ERR_URI_FRAGMENT:
-            det = "Fragment not allowed";
-            break;
-        case XML_ERR_ATTLIST_NOT_STARTED:
-            det = "'(' required to start ATTLIST enumeration";
-            break;
-        case XML_ERR_NMTOKEN_REQUIRED:
-            det = "NmToken expected in ATTLIST enumeration";
-            break;
-        case XML_ERR_ATTLIST_NOT_FINISHED:
-            det = "')' required to finish ATTLIST enumeration";
-            break;
-        case XML_ERR_MIXED_NOT_STARTED:
-            det = "MixedContentDecl : '|' or ')*' expected";
-            break;
-        case XML_ERR_PCDATA_REQUIRED:
-            det = "MixedContentDecl : '#PCDATA' expected";
-            break;
-        case XML_ERR_ELEMCONTENT_NOT_STARTED:
-            det = "ContentDecl : Name or '(' expected";
-            break;
-        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
-            det = "ContentDecl : ',' '|' or ')' expected";
-            break;
-        case XML_ERR_PEREF_IN_INT_SUBSET:
-            det = "PEReference: forbidden within markup decl in internal subset";
-            break;
-        case XML_ERR_GT_REQUIRED:
-            det = "Expected '>'";
-            break;
-        case XML_ERR_CONDSEC_INVALID:
-            det = "XML conditional section '[' expected";
-            break;
-        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
-            det = "Content error in the external subset";
-            break;
-        case XML_ERR_CONDSEC_INVALID_KEYWORD:
-            det = "conditional section INCLUDE or IGNORE keyword expected";
-            break;
-        case XML_ERR_CONDSEC_NOT_FINISHED:
-            det = "XML conditional section not closed";
-            break;
-        case XML_ERR_XMLDECL_NOT_STARTED:
-            det = "Text declaration '<?xml' required";
-            break;
-        case XML_ERR_XMLDECL_NOT_FINISHED:
-            det = "parsing XML declaration: '?>' expected";
-            break;
-        case XML_ERR_EXT_ENTITY_STANDALONE:
-            det = "external parsed entities cannot be standalone";
-            break;
-        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
-            det = "EntityRef: expecting ';'";
-            break;
-        case XML_ERR_DOCTYPE_NOT_FINISHED:
-            det = "DOCTYPE improperly terminated";
-            break;
-        case XML_ERR_LTSLASH_REQUIRED:
-            det = "EndTag: '</' not found";
-            break;
-        case XML_ERR_EQUAL_REQUIRED:
-            det = "Expected '='";
-            break;
-        case XML_ERR_STRING_NOT_CLOSED:
-            det = "String not closed expecting \" or '";
-            break;
-        case XML_ERR_STRING_NOT_STARTED:
-            det = "String not started expecting ' or \"";
-            break;
-        case XML_ERR_ENCODING_NAME:
-            det = "Invalid XML encoding name";
-            break;
-        case XML_ERR_STANDALONE_VALUE:
-            det = "Standalone accepts only 'yes' or 'no'";
-            break;
-        case XML_ERR_DOCUMENT_EMPTY:
-            det = "Document is empty";
-            break;
-        case XML_ERR_DOCUMENT_END:
-            det = "Extra content at the end of the document";
-            break;
-        case XML_ERR_NOT_WELL_BALANCED:
-            det = "Chunk is not well balanced";
-            break;
-        case XML_ERR_EXTRA_CONTENT:
-            det = "Extra content at the end of well balanced chunk";
-            break;
-        case XML_ERR_VERSION_MISSING:
-            det = "Malformed declaration expecting version";
-            break;
-        /* more err codes... Please, keep the order! */
-        case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */
-        	det ="Attribute without value";
-        	break;
-        case XML_ERR_ATTRIBUTE_REDEFINED:
-        	det ="Attribute defined more than once in the same element";
-        	break;
-        case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */
-            det = "Comment is not finished";
-            break;
-        case XML_ERR_NAME_REQUIRED: /* 68 */
-            det = "Element name not found";
-            break;
-        case XML_ERR_TAG_NOT_FINISHED: /* 77 */
-            det = "Closing tag not found";
-            break;
+		case XML_ERR_INVALID_CHAR:
+			det = "Invalid character value";
+			break;
+		case XML_ERR_SPACE_REQUIRED:
+			det = "Space required";
+			break;
+		case XML_ERR_STANDALONE_VALUE:
+			det = "standalone accepts only 'yes' or 'no'";
+			break;
+		case XML_ERR_VERSION_MISSING:
+			det = "Malformed declaration expecting version";
+			break;
+		case XML_ERR_MISSING_ENCODING:
+			det = "Missing encoding in text declaration";
+			break;
+		case XML_ERR_XMLDECL_NOT_FINISHED:
+			det = "Parsing XML declaration: '?>' expected";
+			break;
         default:
             det = "Unrecognized libxml error code: %d";
 			break;
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 8c76dcee0c8..d91d7653036 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -6,7 +6,9 @@ INSERT INTO xmltest VALUES (1, '<value>one</value>');
 INSERT INTO xmltest VALUES (2, '<value>two</value>');
 INSERT INTO xmltest VALUES (3, '<wrong');
 ERROR:  invalid XML content
-DETAIL:  Expected '>'
+DETAIL:  Entity: line 1: parser error : Couldn't find end of Start Tag wrong line 1
+<wrong
+      ^
 SELECT * FROM xmltest;
  id |        data        
 ----+--------------------
@@ -54,7 +56,9 @@ SELECT xmlconcat(1, 2);
 ERROR:  argument of XMLCONCAT must be type xml, not type integer
 SELECT xmlconcat('bad', '<syntax');
 ERROR:  invalid XML content
-DETAIL:  Expected '>'
+DETAIL:  Entity: line 1: parser error : Couldn't find end of Start Tag syntax line 1
+<syntax
+       ^
 SELECT xmlconcat('<foo/>', NULL, '<?xml version="1.1" standalone="no"?><bar/>');
              xmlconcat             
 -----------------------------------
@@ -155,7 +159,9 @@ SELECT xmlparse(content '<abc>x</abc>');
 
 SELECT xmlparse(document 'abc');
 ERROR:  invalid XML document
-DETAIL:  Start tag expected, '<' not found.
+DETAIL:  Entity: line 1: parser error : Start tag expected, '<' not found
+abc
+^
 SELECT xmlparse(document '<abc>x</abc>');
    xmlparse   
 --------------
@@ -273,7 +279,9 @@ SELECT xml 'abc' IS NOT DOCUMENT;
 
 SELECT '<>' IS NOT DOCUMENT;
 ERROR:  invalid XML content
-DETAIL:  Element name not found
+DETAIL:  Entity: line 1: parser error : StartTag: invalid element name
+<>
+ ^
 SELECT xmlagg(data) FROM xmltest;
                 xmlagg                
 --------------------------------------
-- 
GitLab