diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index c10a0862d368dc78700e2c70c8f05a15c8322403..5ce367a497e890c6aed2ad18573ac6911a815e83 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.8 2008/06/17 16:09:06 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.9 2008/06/18 18:42:54 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -16,125 +16,8 @@ #include "tsearch/ts_locale.h" #include "tsearch/ts_public.h" - #ifdef USE_WIDE_UPPER_LOWER -/* - * wchar2char --- convert wide characters to multibyte format - * - * This has the same API as the standard wcstombs() function; in particular, - * tolen is the maximum number of bytes to store at *to, and *from must be - * zero-terminated. The output will be zero-terminated iff there is room. - */ -size_t -wchar2char(char *to, const wchar_t *from, size_t tolen) -{ - if (tolen == 0) - return 0; - -#ifdef WIN32 - if (GetDatabaseEncoding() == PG_UTF8) - { - int r; - - r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, - NULL, NULL); - - if (r <= 0) - return (size_t) -1; - - Assert(r <= tolen); - - /* Microsoft counts the zero terminator in the result */ - return r - 1; - } -#endif /* WIN32 */ - - return wcstombs(to, from, tolen); -} - -/* - * char2wchar --- convert multibyte characters to wide characters - * - * This has almost the API of mbstowcs(), except that *from need not be - * null-terminated; instead, the number of input bytes is specified as - * fromlen. Also, we ereport() rather than returning -1 for invalid - * input encoding. tolen is the maximum number of wchar_t's to store at *to. - * The output will be zero-terminated iff there is room. - */ -size_t -char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) -{ - if (tolen == 0) - return 0; - -#ifdef WIN32 - if (GetDatabaseEncoding() == PG_UTF8) - { - int r; - - /* stupid Microsloth API does not work for zero-length input */ - if (fromlen == 0) - r = 0; - else - { - r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); - - if (r <= 0) - { - /* see notes in oracle_compat.c about error reporting */ - pg_verifymbstr(from, fromlen, false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - } - - Assert(r < tolen); - to[r] = 0; - - return r; - } -#endif /* WIN32 */ - - if (lc_ctype_is_c()) - { - /* - * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be - * allocated with sufficient space - */ - return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen); - } - else - { - /* - * mbstowcs requires ending '\0' - */ - char *str = pnstrdup(from, fromlen); - size_t result; - - result = mbstowcs(to, str, tolen); - - pfree(str); - - if (result == (size_t) -1) - { - pg_verifymbstr(from, fromlen, false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - - if (result < tolen) - to[result] = 0; - - return result; - } -} - - int t_isdigit(const char *ptr) { diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c index bdefaa6bc63c38dc259e0d3d75873fdf99c827c4..3708d02689f7bf9300c491eef445018086f60e37 100644 --- a/src/backend/tsearch/ts_utils.c +++ b/src/backend/tsearch/ts_utils.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -153,13 +153,3 @@ searchstoplist(StopList *s, char *key) bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false; } - -char * -pnstrdup(const char *in, int len) -{ - char *out = palloc(len + 1); - - memcpy(out, in, len); - out[len] = '\0'; - return out; -} diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 83e3a11c38f4841f631dd1192c2011c791d6cf39..e6b662199d94784adc986753148b45205af57b04 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -4,7 +4,7 @@ * (currently mule internal code (mic) is used) * Tatsuo Ishii * - * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $ */ #include "postgres.h" @@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ return result; } + + +#ifdef USE_WIDE_UPPER_LOWER + +/* + * wchar2char --- convert wide characters to multibyte format + * + * This has the same API as the standard wcstombs() function; in particular, + * tolen is the maximum number of bytes to store at *to, and *from must be + * zero-terminated. The output will be zero-terminated iff there is room. + */ +size_t +wchar2char(char *to, const wchar_t *from, size_t tolen) +{ + size_t result; + + if (tolen == 0) + return 0; + +#ifdef WIN32 + /* + * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, + * and for some reason mbstowcs and wcstombs won't do this for us, + * so we use MultiByteToWideChar(). + */ + if (GetDatabaseEncoding() == PG_UTF8) + { + result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, + NULL, NULL); + /* A zero return is failure */ + if (result <= 0) + result = -1; + else + { + Assert(result <= tolen); + /* Microsoft counts the zero terminator in the result */ + result--; + } + } + else +#endif /* WIN32 */ + result = wcstombs(to, from, tolen); + return result; +} + +/* + * char2wchar --- convert multibyte characters to wide characters + * + * This has almost the API of mbstowcs(), except that *from need not be + * null-terminated; instead, the number of input bytes is specified as + * fromlen. Also, we ereport() rather than returning -1 for invalid + * input encoding. tolen is the maximum number of wchar_t's to store at *to. + * The output will be zero-terminated iff there is room. + */ +size_t +char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) +{ + size_t result; + + if (tolen == 0) + return 0; + +#ifdef WIN32 + /* See WIN32 "Unicode" comment above */ + if (GetDatabaseEncoding() == PG_UTF8) + { + /* Win32 API does not work for zero-length input */ + if (fromlen == 0) + result = 0; + else + { + result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); + /* A zero return is failure */ + if (result == 0) + result = -1; + } + + if (result != -1) + { + Assert(result < tolen); + /* Append trailing null wchar (MultiByteToWideChar() does not) */ + to[result] = 0; + } + } + else +#endif /* WIN32 */ + { + if (lc_ctype_is_c()) + { + /* + * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be + * allocated with sufficient space + */ + result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen); + } + else + { + /* mbstowcs requires ending '\0' */ + char *str = pnstrdup(from, fromlen); + + result = mbstowcs(to, str, tolen); + pfree(str); + } + } + + if (result == -1) + { + /* + * Invalid multibyte character encountered. We try to give a useful + * error message by letting pg_verifymbstr check the string. But it's + * possible that the string is OK to us, and not OK to mbstowcs --- + * this suggests that the LC_CTYPE locale is different from the + * database encoding. Give a generic error message if verifymbstr + * can't find anything wrong. + */ + pg_verifymbstr(from, fromlen, false); /* might not return */ + /* but if it does ... */ + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte character for locale"), + errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); + } + + return result; +} + +#endif + /* convert a multibyte string to a wchar */ int pg_mb2wchar(const char *from, pg_wchar *to) diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index ebceca438f7adf90588fc2f287d4eacd34c41041..daeaaaf1871bfb3f8e04770b837f8edb5997bec6 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.63 2008/01/01 19:45:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.64 2008/06/18 18:42:54 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -624,6 +624,18 @@ repalloc(void *pointer, Size size) pointer, size); } +/* Like pstrdup(), but append null byte */ +char * +pnstrdup(const char *in, int len) +{ + char *out = palloc(len + 1); + + memcpy(out, in, len); + out[len] = '\0'; + return out; +} + + /* * MemoryContextSwitchTo * Returns the current context; installs the given context. diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index adfdee9b97329fcfeb699f412190a93ce9611f62..b29552fdeaf0856bf8ee3b12d5ac471066d547e4 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.78 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $ * * NOTES * This is used both by the backend and by libpq, but should not be @@ -362,6 +362,11 @@ extern int pg_mbcharcliplen(const char *mbstr, int len, int imit); extern int pg_encoding_max_length(int encoding); extern int pg_database_encoding_max_length(void); +#ifdef USE_WIDE_UPPER_LOWER +extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen); +extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen); +#endif + extern void SetDefaultClientEncoding(void); extern int SetClientEncoding(int encoding, bool doit); extern void InitializeClientEncoding(void); diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h index adeeebac187dc4ca2c076458594732e3d5c346a9..110efb191c1034b7577e51f9e22657b138610e07 100644 --- a/src/include/tsearch/ts_locale.h +++ b/src/include/tsearch/ts_locale.h @@ -5,7 +5,7 @@ * * Copyright (c) 1998-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -33,9 +33,6 @@ #ifdef USE_WIDE_UPPER_LOWER -extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen); -extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen); - extern int t_isdigit(const char *ptr); extern int t_isspace(const char *ptr); extern int t_isalpha(const char *ptr); diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index d08d35db1931a5f99664e2158f1edc5c65e102cf..5e3723fa8ec0e885231654f0cd4559d3b6e302e0 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -6,7 +6,7 @@ * * Copyright (c) 1998-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -62,8 +62,6 @@ typedef struct extern char *get_tsearch_config_filename(const char *basename, const char *extension); -extern char *pnstrdup(const char *in, int len); - /* * Often useful stopword list management */ diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h index a3e78580f861f913021191e812c0badd50cc5610..7e3c085a7aadac6202d368273513994ba433754a 100644 --- a/src/include/utils/palloc.h +++ b/src/include/utils/palloc.h @@ -21,7 +21,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.38 2008/01/01 19:45:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.39 2008/06/18 18:42:54 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -70,6 +70,8 @@ extern void pfree(void *pointer); extern void *repalloc(void *pointer, Size size); +extern char *pnstrdup(const char *in, int len); + /* * MemoryContextSwitchTo can't be a macro in standard C compilers. * But we can make it an inline function when using GCC.