From ab9b6c45cf020d72f1600443fe76b9d7a4f8944c Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Wed, 15 Aug 2001 07:07:40 +0000
Subject: [PATCH] Add conver/convert2 functions. They are similar to the
 SQL99's convert.

---
 src/backend/utils/init/miscinit.c |  15 +-
 src/backend/utils/mb/mbutils.c    | 268 +++++++++++++++++++++---------
 src/include/catalog/pg_proc.h     |  10 +-
 src/include/mb/pg_wchar.h         |   4 +-
 src/include/utils/builtins.h      |   4 +-
 5 files changed, 221 insertions(+), 80 deletions(-)

diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index 86b46106cd3..a57f3d2624a 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.75 2001/08/06 18:17:42 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.76 2001/08/15 07:07:40 ishii Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -209,6 +209,19 @@ PG_char_to_encoding(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(0);
 }
 
+Datum
+pg_convert(PG_FUNCTION_ARGS)
+{
+	elog(ERROR, "convert is not supported. To use convert, you need to enable multibyte capability");
+	return DirectFunctionCall1(textin, CStringGetDatum(""));
+}
+
+Datum
+pg_convert2(PG_FUNCTION_ARGS)
+{
+	elog(ERROR, "convert is not supported. To use convert, you need to enable multibyte capability");
+	return DirectFunctionCall1(textin, CStringGetDatum(""));
+}
 #endif
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 7b5262da6c4..8e4fc56ef09 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -3,7 +3,7 @@
  * client encoding and server internal encoding.
  * (currently mule internal code (mic) is used)
  * Tatsuo Ishii
- * $Id: mbutils.c,v 1.18 2001/07/15 11:07:36 ishii Exp $
+ * $Id: mbutils.c,v 1.19 2001/08/15 07:07:40 ishii Exp $
  */
 #include "postgres.h"
 
@@ -34,67 +34,84 @@ pg_get_enc_ent(int encoding)
 }
 
 /*
- * set the client encoding. if encoding conversion between
- * client/server encoding is not supported, returns -1
+ * Find appropriate encoding conversion functions. If no such
+ * functions found, returns -1.
+ *
+ * Arguments:
+ *
+ * src, dest (in): source and destination encoding ids
+ *
+ * src_to_mic (out): pointer to a function which converts src to
+ * mic/unicode according to dest. if src == mic/unicode or no
+ * appropriate function found, set to 0.
+ *
+ * dest_from_mic (out): pointer to a function which converts
+ * mic/unicode to dest according to src. if dest == mic/unicode or no
+ * appropriate function found, set to 0.
  */
 int
-pg_set_client_encoding(int encoding)
+pg_find_encoding_converters(int src, int dest, void (**src_to_mic)(), void (**dest_from_mic)())
 {
-	int			current_server_encoding = GetDatabaseEncoding();
-
-	client_encoding = encoding;
-
-	if (client_encoding == current_server_encoding)
-	{							/* server == client? */
-		client_to_mic = client_from_mic = 0;
-		server_to_mic = server_from_mic = 0;
+	if (src == dest)
+	{							/* src == dest? */
+		*src_to_mic = *dest_from_mic = 0;
 	}
-	else if (current_server_encoding == MULE_INTERNAL)
-	{							/* server == MULE_INETRNAL? */
-		client_to_mic = pg_get_enc_ent(encoding)->to_mic;
-		client_from_mic = pg_get_enc_ent(encoding)->from_mic;
-		server_to_mic = server_from_mic = 0;
-		if (client_to_mic == 0 || client_from_mic == 0)
+	else if (src == MULE_INTERNAL)
+	{							/* src == MULE_INETRNAL? */
+		*dest_from_mic = pg_get_enc_ent(dest)->from_mic;
+		if (*dest_from_mic == 0)
 			return (-1);
+		*src_to_mic = 0;
 	}
-	else if (encoding == MULE_INTERNAL)
-	{							/* client == MULE_INETRNAL? */
-		client_to_mic = client_from_mic = 0;
-		server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
-		server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
-		if (server_to_mic == 0 || server_from_mic == 0)
+	else if (dest == MULE_INTERNAL)
+	{							/* dest == MULE_INETRNAL? */
+		*src_to_mic = pg_get_enc_ent(src)->to_mic;
+		if (*src_to_mic == 0)
 			return (-1);
+		*dest_from_mic = 0;
 	}
-	else if (current_server_encoding == UNICODE)
-	{							/* server == UNICODE? */
-		client_to_mic = pg_get_enc_ent(encoding)->to_unicode;
-		client_from_mic = pg_get_enc_ent(encoding)->from_unicode;
-		server_to_mic = server_from_mic = 0;
-		if (client_to_mic == 0 || client_from_mic == 0)
+	else if (src == UNICODE)
+	{							/* src == UNICODE? */
+		*dest_from_mic = pg_get_enc_ent(dest)->from_unicode;
+		if (*dest_from_mic == 0)
 			return (-1);
+		*src_to_mic = 0;
 	}
-	else if (encoding == UNICODE)
-	{							/* client == UNICODE? */
-		client_to_mic = client_from_mic = 0;
-		server_to_mic = pg_get_enc_ent(current_server_encoding)->to_unicode;
-		server_from_mic = pg_get_enc_ent(current_server_encoding)->from_unicode;
-		if (server_to_mic == 0 || server_from_mic == 0)
+	else if (dest == UNICODE)
+	{							/* dest == UNICODE? */
+		*src_to_mic = pg_get_enc_ent(src)->to_unicode;
+		if (*src_to_mic == 0)
 			return (-1);
+		*dest_from_mic = 0;
 	}
 	else
 	{
-		client_to_mic = pg_get_enc_ent(encoding)->to_mic;
-		client_from_mic = pg_get_enc_ent(encoding)->from_mic;
-		server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
-		server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
-		if (client_to_mic == 0 || client_from_mic == 0)
-			return (-1);
-		if (server_to_mic == 0 || server_from_mic == 0)
+		*src_to_mic = pg_get_enc_ent(src)->to_mic;
+		*dest_from_mic = pg_get_enc_ent(dest)->from_mic;
+		if (*src_to_mic == 0 || *dest_from_mic == 0)
 			return (-1);
 	}
 	return (0);
 }
 
+/*
+ * set the client encoding. if encoding conversion between
+ * client/server encoding is not supported, returns -1
+ */
+int
+pg_set_client_encoding(int encoding)
+{
+	int current_server_encoding = GetDatabaseEncoding();
+
+	if (pg_find_encoding_converters(encoding, current_server_encoding, &client_to_mic, &server_from_mic) < 0)
+		return (-1);
+	client_encoding = encoding;
+
+	if (pg_find_encoding_converters(current_server_encoding, encoding, &server_to_mic, &client_from_mic) < 0)
+		return (-1);
+	return 0;
+}
+
 /*
  * returns the current client encoding
  */
@@ -110,7 +127,21 @@ pg_get_client_encoding()
 }
 
 /*
- * convert client encoding to server encoding.
+ * Convert src encoding and returns it. Actual conversion is done by
+ * src_to_mic and dest_from_mic, which can be obtained by
+ * pg_find_encoding_converters(). The reason we require two conversion
+ * functions is that we have an intermediate encoding: MULE_INTERNAL
+ * Using intermediate encodings will reduce the number of functions
+ * doing encoding conversions. Special case is either src or dest is
+ * the intermediate encoding itself. In this case, you don't need src
+ * or dest (setting 0 will indicate there's no conversion
+ * function). Another case is you have direct-conversion function from
+ * src to dest. In this case either src_to_mic or dest_from_mic could
+ * be set to 0 also.
+ * 
+ * Note that If src or dest is UNICODE, we have to do
+ * direct-conversion, since we don't support conversion bwteen UNICODE
+ * and MULE_INTERNAL, we cannot go through MULE_INTERNAL.
  *
  * CASE 1: if no conversion is required, then the given pointer s is returned.
  *
@@ -120,34 +151,138 @@ pg_get_client_encoding()
  * to determine whether to pfree the result or not!
  *
  * Note: we assume that conversion cannot cause more than a 4-to-1 growth
- * in the length of the string --- is this enough?
- */
+ * in the length of the string --- is this enough?  */
+
 unsigned char *
-pg_client_to_server(unsigned char *s, int len)
+pg_do_encoding_conversion(unsigned char *src, int len, void (*src_to_mic)(), void (*dest_from_mic)())
 {
-	unsigned char *result = s;
+	unsigned char *result = src;
 	unsigned char *buf;
 
-	if (client_encoding == GetDatabaseEncoding())
-		return result;
-	if (client_to_mic)
+	if (src_to_mic)
 	{
 		buf = (unsigned char *) palloc(len * 4 + 1);
-		(*client_to_mic) (result, buf, len);
+		(*src_to_mic) (result, buf, len);
 		result = buf;
 		len = strlen(result);
 	}
-	if (server_from_mic)
+	if (dest_from_mic)
 	{
 		buf = (unsigned char *) palloc(len * 4 + 1);
-		(*server_from_mic) (result, buf, len);
-		if (result != s)
+		(*dest_from_mic) (result, buf, len);
+		if (result != src)
 			pfree(result);		/* release first buffer */
 		result = buf;
 	}
 	return result;
 }
 
+/*
+ * Convert string using encoding_nanme. We assume that string's
+ * encoding is same as DB encoding.
+ *
+ * TEXT convert(TEXT string, NAME encoding_name)
+ */
+Datum
+pg_convert(PG_FUNCTION_ARGS)
+{
+	text	*string = PG_GETARG_TEXT_P(0);
+	Name	s = PG_GETARG_NAME(1);
+	int encoding = pg_char_to_encoding(NameStr(*s));
+	int db_encoding = GetDatabaseEncoding();
+	void (*src)(), (*dest)();
+	unsigned char	*result;
+	text	*retval;
+
+	if (encoding < 0)
+	    elog(ERROR, "Invalid encoding name %s", NameStr(*s));
+
+	if (pg_find_encoding_converters(db_encoding, encoding, &src, &dest) < 0)
+	{
+	    char *encoding_name = (char *)pg_encoding_to_char(db_encoding);
+	    elog(ERROR, "Conversion from %s to %s is not possible", NameStr(*s), encoding_name);
+	}
+
+	result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ,
+					   src, dest);
+	if (result == NULL)
+	    elog(ERROR, "Encoding conversion failed");
+
+	retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result)));
+	if (result != (unsigned char *)VARDATA(string))
+	    pfree(result);
+
+	/* free memory if allocated by the toaster */
+	PG_FREE_IF_COPY(string, 0);
+
+	PG_RETURN_TEXT_P(retval);
+}
+
+/*
+ * Convert string using encoding_nanme.
+ *
+ * TEXT convert(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
+ */
+Datum
+pg_convert2(PG_FUNCTION_ARGS)
+{
+	text	*string = PG_GETARG_TEXT_P(0);
+	char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
+	int src_encoding = pg_char_to_encoding(src_encoding_name);
+	char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
+	int dest_encoding = pg_char_to_encoding(dest_encoding_name);
+	void (*src)(), (*dest)();
+	unsigned char	*result;
+	text	*retval;
+
+	if (src_encoding < 0)
+	    elog(ERROR, "Invalid source encoding name %s", src_encoding_name);
+	if (dest_encoding < 0)
+	    elog(ERROR, "Invalid destination encoding name %s", dest_encoding_name);
+
+	if (pg_find_encoding_converters(src_encoding, dest_encoding, &src, &dest) < 0)
+	{
+	    elog(ERROR, "Conversion from %s to %s is not possible",
+		 src_encoding_name, dest_encoding_name);
+	}
+
+	result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ,
+					   src, dest);
+	if (result == NULL)
+	    elog(ERROR, "Encoding conversion failed");
+
+	retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result)));
+	if (result != (unsigned char *)VARDATA(string))
+	    pfree(result);
+
+	/* free memory if allocated by the toaster */
+	PG_FREE_IF_COPY(string, 0);
+
+	PG_RETURN_TEXT_P(retval);
+}
+
+/*
+ * convert client encoding to server encoding.
+ *
+ * CASE 1: if no conversion is required, then the given pointer s is returned.
+ *
+ * CASE 2: if conversion is required, a palloc'd string is returned.
+ *
+ * Callers must check whether return value differs from passed value
+ * to determine whether to pfree the result or not!
+ *
+ * Note: we assume that conversion cannot cause more than a 4-to-1 growth
+ * in the length of the string --- is this enough?
+ */
+unsigned char *
+pg_client_to_server(unsigned char *s, int len)
+{
+	if (client_encoding == GetDatabaseEncoding())
+	    return s;
+
+	return pg_do_encoding_conversion(s, len, client_to_mic, server_from_mic);
+}
+
 /*
  * convert server encoding to client encoding.
  *
@@ -164,27 +299,10 @@ pg_client_to_server(unsigned char *s, int len)
 unsigned char *
 pg_server_to_client(unsigned char *s, int len)
 {
-	unsigned char *result = s;
-	unsigned char *buf;
-
 	if (client_encoding == GetDatabaseEncoding())
-		return result;
-	if (server_to_mic)
-	{
-		buf = (unsigned char *) palloc(len * 4 + 1);
-		(*server_to_mic) (result, buf, len);
-		result = buf;
-		len = strlen(result);
-	}
-	if (client_from_mic)
-	{
-		buf = (unsigned char *) palloc(len * 4 + 1);
-		(*client_from_mic) (result, buf, len);
-		if (result != s)
-			pfree(result);		/* release first buffer */
-		result = buf;
-	}
-	return result;
+		return s;
+
+	return pg_do_encoding_conversion(s, len, server_to_mic, client_from_mic);
 }
 
 /* convert a multi-byte string to a wchar */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 12b74364a6d..ee867e4d3a7 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.204 2001/08/14 22:21:58 tgl Exp $
+ * $Id: pg_proc.h,v 1.205 2001/08/15 07:07:40 ishii Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -2137,7 +2137,13 @@ DESCR("return portion of string");
 DATA(insert OID = 1039 (  getdatabaseencoding	   PGUID 12 f t f t 0 f 19 "0" 100 0 0 100	getdatabaseencoding - ));
 DESCR("encoding name of current database");
 
-DATA(insert OID = 1295 (  pg_char_to_encoding	   PGUID 12 f t f t 1 f 23 "19" 100 0 0 100  PG_char_to_encoding - ));
+DATA(insert OID = 1717 (  convert		   PGUID 12 f t f t 2 f 25 "25 19" 100 0 0 100  pg_convert - ));
+DESCR("convert string with specified destination encoding name");
+
+DATA(insert OID = 1813 (  convert		   PGUID 12 f t f t 3 f 25 "25 19 19" 100 0 0 100  pg_convert2 - ));
+DESCR("convert string with specified encoding names");
+
+DATA(insert OID = 1264 (  pg_char_to_encoding	   PGUID 12 f t f t 1 f 23 "19" 100 0 0 100  PG_char_to_encoding - ));
 DESCR("convert encoding name to encoding id");
 
 DATA(insert OID = 1597 (  pg_encoding_to_char	   PGUID 12 f t f t 1 f 19 "23" 100 0 0 100  PG_encoding_to_char - ));
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index a51aefa27d5..6df58708f67 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -1,4 +1,4 @@
-/* $Id: pg_wchar.h,v 1.27 2001/07/15 11:07:37 ishii Exp $ */
+/* $Id: pg_wchar.h,v 1.28 2001/08/15 07:07:40 ishii Exp $ */
 
 #ifndef PG_WCHAR_H
 #define PG_WCHAR_H
@@ -145,6 +145,8 @@ extern unsigned char *pg_server_to_client(unsigned char *, int);
 extern int	pg_valid_client_encoding(const char *);
 extern pg_encoding_conv_tbl *pg_get_enc_ent(int);
 extern int	pg_utf_mblen(const unsigned char *);
+extern int	pg_find_encoding_converters(int, int, void (**)(), void (**)());
+extern unsigned char *pg_do_encoding_conversion(unsigned char *, int, void (*)(), void (*)());
 
 /* internally-used versions of functions.  The PG_xxx forms of these
  * functions have fmgr-compatible interfaves.
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 97efb759c49..bb91e1166e1 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: builtins.h,v 1.161 2001/08/14 22:21:59 tgl Exp $
+ * $Id: builtins.h,v 1.162 2001/08/15 07:07:40 ishii Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -581,6 +581,8 @@ extern Datum RI_FKey_setdefault_upd(PG_FUNCTION_ARGS);
 extern Datum getdatabaseencoding(PG_FUNCTION_ARGS);
 extern Datum PG_encoding_to_char(PG_FUNCTION_ARGS);
 extern Datum PG_char_to_encoding(PG_FUNCTION_ARGS);
+extern Datum pg_convert(PG_FUNCTION_ARGS);
+extern Datum pg_convert2(PG_FUNCTION_ARGS);
 
 /* format_type.c */
 extern Datum format_type(PG_FUNCTION_ARGS);
-- 
GitLab