From ab9b6c45cf020d72f1600443fe76b9d7a4f8944c Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Wed, 15 Aug 2001 07:07:40 +0000 Subject: [PATCH] Add conver/convert2 functions. They are similar to the SQL99's convert. --- src/backend/utils/init/miscinit.c | 15 +- src/backend/utils/mb/mbutils.c | 268 +++++++++++++++++++++--------- src/include/catalog/pg_proc.h | 10 +- src/include/mb/pg_wchar.h | 4 +- src/include/utils/builtins.h | 4 +- 5 files changed, 221 insertions(+), 80 deletions(-) diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 86b46106cd3..a57f3d2624a 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.75 2001/08/06 18:17:42 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.76 2001/08/15 07:07:40 ishii Exp $ * *------------------------------------------------------------------------- */ @@ -209,6 +209,19 @@ PG_char_to_encoding(PG_FUNCTION_ARGS) PG_RETURN_INT32(0); } +Datum +pg_convert(PG_FUNCTION_ARGS) +{ + elog(ERROR, "convert is not supported. To use convert, you need to enable multibyte capability"); + return DirectFunctionCall1(textin, CStringGetDatum("")); +} + +Datum +pg_convert2(PG_FUNCTION_ARGS) +{ + elog(ERROR, "convert is not supported. To use convert, you need to enable multibyte capability"); + return DirectFunctionCall1(textin, CStringGetDatum("")); +} #endif /* ---------------------------------------------------------------- diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 7b5262da6c4..8e4fc56ef09 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -3,7 +3,7 @@ * client encoding and server internal encoding. * (currently mule internal code (mic) is used) * Tatsuo Ishii - * $Id: mbutils.c,v 1.18 2001/07/15 11:07:36 ishii Exp $ + * $Id: mbutils.c,v 1.19 2001/08/15 07:07:40 ishii Exp $ */ #include "postgres.h" @@ -34,67 +34,84 @@ pg_get_enc_ent(int encoding) } /* - * set the client encoding. if encoding conversion between - * client/server encoding is not supported, returns -1 + * Find appropriate encoding conversion functions. If no such + * functions found, returns -1. + * + * Arguments: + * + * src, dest (in): source and destination encoding ids + * + * src_to_mic (out): pointer to a function which converts src to + * mic/unicode according to dest. if src == mic/unicode or no + * appropriate function found, set to 0. + * + * dest_from_mic (out): pointer to a function which converts + * mic/unicode to dest according to src. if dest == mic/unicode or no + * appropriate function found, set to 0. */ int -pg_set_client_encoding(int encoding) +pg_find_encoding_converters(int src, int dest, void (**src_to_mic)(), void (**dest_from_mic)()) { - int current_server_encoding = GetDatabaseEncoding(); - - client_encoding = encoding; - - if (client_encoding == current_server_encoding) - { /* server == client? */ - client_to_mic = client_from_mic = 0; - server_to_mic = server_from_mic = 0; + if (src == dest) + { /* src == dest? */ + *src_to_mic = *dest_from_mic = 0; } - else if (current_server_encoding == MULE_INTERNAL) - { /* server == MULE_INETRNAL? */ - client_to_mic = pg_get_enc_ent(encoding)->to_mic; - client_from_mic = pg_get_enc_ent(encoding)->from_mic; - server_to_mic = server_from_mic = 0; - if (client_to_mic == 0 || client_from_mic == 0) + else if (src == MULE_INTERNAL) + { /* src == MULE_INETRNAL? */ + *dest_from_mic = pg_get_enc_ent(dest)->from_mic; + if (*dest_from_mic == 0) return (-1); + *src_to_mic = 0; } - else if (encoding == MULE_INTERNAL) - { /* client == MULE_INETRNAL? */ - client_to_mic = client_from_mic = 0; - server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic; - server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic; - if (server_to_mic == 0 || server_from_mic == 0) + else if (dest == MULE_INTERNAL) + { /* dest == MULE_INETRNAL? */ + *src_to_mic = pg_get_enc_ent(src)->to_mic; + if (*src_to_mic == 0) return (-1); + *dest_from_mic = 0; } - else if (current_server_encoding == UNICODE) - { /* server == UNICODE? */ - client_to_mic = pg_get_enc_ent(encoding)->to_unicode; - client_from_mic = pg_get_enc_ent(encoding)->from_unicode; - server_to_mic = server_from_mic = 0; - if (client_to_mic == 0 || client_from_mic == 0) + else if (src == UNICODE) + { /* src == UNICODE? */ + *dest_from_mic = pg_get_enc_ent(dest)->from_unicode; + if (*dest_from_mic == 0) return (-1); + *src_to_mic = 0; } - else if (encoding == UNICODE) - { /* client == UNICODE? */ - client_to_mic = client_from_mic = 0; - server_to_mic = pg_get_enc_ent(current_server_encoding)->to_unicode; - server_from_mic = pg_get_enc_ent(current_server_encoding)->from_unicode; - if (server_to_mic == 0 || server_from_mic == 0) + else if (dest == UNICODE) + { /* dest == UNICODE? */ + *src_to_mic = pg_get_enc_ent(src)->to_unicode; + if (*src_to_mic == 0) return (-1); + *dest_from_mic = 0; } else { - client_to_mic = pg_get_enc_ent(encoding)->to_mic; - client_from_mic = pg_get_enc_ent(encoding)->from_mic; - server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic; - server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic; - if (client_to_mic == 0 || client_from_mic == 0) - return (-1); - if (server_to_mic == 0 || server_from_mic == 0) + *src_to_mic = pg_get_enc_ent(src)->to_mic; + *dest_from_mic = pg_get_enc_ent(dest)->from_mic; + if (*src_to_mic == 0 || *dest_from_mic == 0) return (-1); } return (0); } +/* + * set the client encoding. if encoding conversion between + * client/server encoding is not supported, returns -1 + */ +int +pg_set_client_encoding(int encoding) +{ + int current_server_encoding = GetDatabaseEncoding(); + + if (pg_find_encoding_converters(encoding, current_server_encoding, &client_to_mic, &server_from_mic) < 0) + return (-1); + client_encoding = encoding; + + if (pg_find_encoding_converters(current_server_encoding, encoding, &server_to_mic, &client_from_mic) < 0) + return (-1); + return 0; +} + /* * returns the current client encoding */ @@ -110,7 +127,21 @@ pg_get_client_encoding() } /* - * convert client encoding to server encoding. + * Convert src encoding and returns it. Actual conversion is done by + * src_to_mic and dest_from_mic, which can be obtained by + * pg_find_encoding_converters(). The reason we require two conversion + * functions is that we have an intermediate encoding: MULE_INTERNAL + * Using intermediate encodings will reduce the number of functions + * doing encoding conversions. Special case is either src or dest is + * the intermediate encoding itself. In this case, you don't need src + * or dest (setting 0 will indicate there's no conversion + * function). Another case is you have direct-conversion function from + * src to dest. In this case either src_to_mic or dest_from_mic could + * be set to 0 also. + * + * Note that If src or dest is UNICODE, we have to do + * direct-conversion, since we don't support conversion bwteen UNICODE + * and MULE_INTERNAL, we cannot go through MULE_INTERNAL. * * CASE 1: if no conversion is required, then the given pointer s is returned. * @@ -120,34 +151,138 @@ pg_get_client_encoding() * to determine whether to pfree the result or not! * * Note: we assume that conversion cannot cause more than a 4-to-1 growth - * in the length of the string --- is this enough? - */ + * in the length of the string --- is this enough? */ + unsigned char * -pg_client_to_server(unsigned char *s, int len) +pg_do_encoding_conversion(unsigned char *src, int len, void (*src_to_mic)(), void (*dest_from_mic)()) { - unsigned char *result = s; + unsigned char *result = src; unsigned char *buf; - if (client_encoding == GetDatabaseEncoding()) - return result; - if (client_to_mic) + if (src_to_mic) { buf = (unsigned char *) palloc(len * 4 + 1); - (*client_to_mic) (result, buf, len); + (*src_to_mic) (result, buf, len); result = buf; len = strlen(result); } - if (server_from_mic) + if (dest_from_mic) { buf = (unsigned char *) palloc(len * 4 + 1); - (*server_from_mic) (result, buf, len); - if (result != s) + (*dest_from_mic) (result, buf, len); + if (result != src) pfree(result); /* release first buffer */ result = buf; } return result; } +/* + * Convert string using encoding_nanme. We assume that string's + * encoding is same as DB encoding. + * + * TEXT convert(TEXT string, NAME encoding_name) + */ +Datum +pg_convert(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_P(0); + Name s = PG_GETARG_NAME(1); + int encoding = pg_char_to_encoding(NameStr(*s)); + int db_encoding = GetDatabaseEncoding(); + void (*src)(), (*dest)(); + unsigned char *result; + text *retval; + + if (encoding < 0) + elog(ERROR, "Invalid encoding name %s", NameStr(*s)); + + if (pg_find_encoding_converters(db_encoding, encoding, &src, &dest) < 0) + { + char *encoding_name = (char *)pg_encoding_to_char(db_encoding); + elog(ERROR, "Conversion from %s to %s is not possible", NameStr(*s), encoding_name); + } + + result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ, + src, dest); + if (result == NULL) + elog(ERROR, "Encoding conversion failed"); + + retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result))); + if (result != (unsigned char *)VARDATA(string)) + pfree(result); + + /* free memory if allocated by the toaster */ + PG_FREE_IF_COPY(string, 0); + + PG_RETURN_TEXT_P(retval); +} + +/* + * Convert string using encoding_nanme. + * + * TEXT convert(TEXT string, NAME src_encoding_name, NAME dest_encoding_name) + */ +Datum +pg_convert2(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_P(0); + char *src_encoding_name = NameStr(*PG_GETARG_NAME(1)); + int src_encoding = pg_char_to_encoding(src_encoding_name); + char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2)); + int dest_encoding = pg_char_to_encoding(dest_encoding_name); + void (*src)(), (*dest)(); + unsigned char *result; + text *retval; + + if (src_encoding < 0) + elog(ERROR, "Invalid source encoding name %s", src_encoding_name); + if (dest_encoding < 0) + elog(ERROR, "Invalid destination encoding name %s", dest_encoding_name); + + if (pg_find_encoding_converters(src_encoding, dest_encoding, &src, &dest) < 0) + { + elog(ERROR, "Conversion from %s to %s is not possible", + src_encoding_name, dest_encoding_name); + } + + result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ, + src, dest); + if (result == NULL) + elog(ERROR, "Encoding conversion failed"); + + retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result))); + if (result != (unsigned char *)VARDATA(string)) + pfree(result); + + /* free memory if allocated by the toaster */ + PG_FREE_IF_COPY(string, 0); + + PG_RETURN_TEXT_P(retval); +} + +/* + * convert client encoding to server encoding. + * + * CASE 1: if no conversion is required, then the given pointer s is returned. + * + * CASE 2: if conversion is required, a palloc'd string is returned. + * + * Callers must check whether return value differs from passed value + * to determine whether to pfree the result or not! + * + * Note: we assume that conversion cannot cause more than a 4-to-1 growth + * in the length of the string --- is this enough? + */ +unsigned char * +pg_client_to_server(unsigned char *s, int len) +{ + if (client_encoding == GetDatabaseEncoding()) + return s; + + return pg_do_encoding_conversion(s, len, client_to_mic, server_from_mic); +} + /* * convert server encoding to client encoding. * @@ -164,27 +299,10 @@ pg_client_to_server(unsigned char *s, int len) unsigned char * pg_server_to_client(unsigned char *s, int len) { - unsigned char *result = s; - unsigned char *buf; - if (client_encoding == GetDatabaseEncoding()) - return result; - if (server_to_mic) - { - buf = (unsigned char *) palloc(len * 4 + 1); - (*server_to_mic) (result, buf, len); - result = buf; - len = strlen(result); - } - if (client_from_mic) - { - buf = (unsigned char *) palloc(len * 4 + 1); - (*client_from_mic) (result, buf, len); - if (result != s) - pfree(result); /* release first buffer */ - result = buf; - } - return result; + return s; + + return pg_do_encoding_conversion(s, len, server_to_mic, client_from_mic); } /* convert a multi-byte string to a wchar */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 12b74364a6d..ee867e4d3a7 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.204 2001/08/14 22:21:58 tgl Exp $ + * $Id: pg_proc.h,v 1.205 2001/08/15 07:07:40 ishii Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -2137,7 +2137,13 @@ DESCR("return portion of string"); DATA(insert OID = 1039 ( getdatabaseencoding PGUID 12 f t f t 0 f 19 "0" 100 0 0 100 getdatabaseencoding - )); DESCR("encoding name of current database"); -DATA(insert OID = 1295 ( pg_char_to_encoding PGUID 12 f t f t 1 f 23 "19" 100 0 0 100 PG_char_to_encoding - )); +DATA(insert OID = 1717 ( convert PGUID 12 f t f t 2 f 25 "25 19" 100 0 0 100 pg_convert - )); +DESCR("convert string with specified destination encoding name"); + +DATA(insert OID = 1813 ( convert PGUID 12 f t f t 3 f 25 "25 19 19" 100 0 0 100 pg_convert2 - )); +DESCR("convert string with specified encoding names"); + +DATA(insert OID = 1264 ( pg_char_to_encoding PGUID 12 f t f t 1 f 23 "19" 100 0 0 100 PG_char_to_encoding - )); DESCR("convert encoding name to encoding id"); DATA(insert OID = 1597 ( pg_encoding_to_char PGUID 12 f t f t 1 f 19 "23" 100 0 0 100 PG_encoding_to_char - )); diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index a51aefa27d5..6df58708f67 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -1,4 +1,4 @@ -/* $Id: pg_wchar.h,v 1.27 2001/07/15 11:07:37 ishii Exp $ */ +/* $Id: pg_wchar.h,v 1.28 2001/08/15 07:07:40 ishii Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -145,6 +145,8 @@ extern unsigned char *pg_server_to_client(unsigned char *, int); extern int pg_valid_client_encoding(const char *); extern pg_encoding_conv_tbl *pg_get_enc_ent(int); extern int pg_utf_mblen(const unsigned char *); +extern int pg_find_encoding_converters(int, int, void (**)(), void (**)()); +extern unsigned char *pg_do_encoding_conversion(unsigned char *, int, void (*)(), void (*)()); /* internally-used versions of functions. The PG_xxx forms of these * functions have fmgr-compatible interfaves. diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 97efb759c49..bb91e1166e1 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: builtins.h,v 1.161 2001/08/14 22:21:59 tgl Exp $ + * $Id: builtins.h,v 1.162 2001/08/15 07:07:40 ishii Exp $ * *------------------------------------------------------------------------- */ @@ -581,6 +581,8 @@ extern Datum RI_FKey_setdefault_upd(PG_FUNCTION_ARGS); extern Datum getdatabaseencoding(PG_FUNCTION_ARGS); extern Datum PG_encoding_to_char(PG_FUNCTION_ARGS); extern Datum PG_char_to_encoding(PG_FUNCTION_ARGS); +extern Datum pg_convert(PG_FUNCTION_ARGS); +extern Datum pg_convert2(PG_FUNCTION_ARGS); /* format_type.c */ extern Datum format_type(PG_FUNCTION_ARGS); -- GitLab