diff --git a/src/backend/main/main.c b/src/backend/main/main.c index 8ea6c1f387405a23059a021f89602d827528dfde..d71885dba9d50f8ad0533f2697db48047851ea20 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -265,6 +265,10 @@ startup_hacks(const char *progname) /* * Help display should match the options accepted by PostmasterMain() * and PostgresMain(). + * + * XXX On Windows, non-ASCII localizations of these messages only display + * correctly if the console output code page covers the necessary characters. + * Messages emitted in write_console() do not exhibit this problem. */ static void help(const char *progname) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 7081b00500bec9230c41823ca992f99d7ebba00b..3d85e297d22481de8f3c1a2cb19aadee866d7251 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -131,14 +131,16 @@ static char *IsoLocaleName(const char *); /* MSVC specific */ /* * pg_perm_setlocale * - * This is identical to the libc function setlocale(), with the addition - * that if the operation is successful, the corresponding LC_XXX environment - * variable is set to match. By setting the environment variable, we ensure - * that any subsequent use of setlocale(..., "") will preserve the settings - * made through this routine. Of course, LC_ALL must also be unset to fully - * ensure that, but that has to be done elsewhere after all the individual - * LC_XXX variables have been set correctly. (Thank you Perl for making this - * kluge necessary.) + * This wraps the libc function setlocale(), with two additions. First, when + * changing LC_CTYPE, update gettext's encoding for the current message + * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but + * not on Windows. Second, if the operation is successful, the corresponding + * LC_XXX environment variable is set to match. By setting the environment + * variable, we ensure that any subsequent use of setlocale(..., "") will + * preserve the settings made through this routine. Of course, LC_ALL must + * also be unset to fully ensure that, but that has to be done elsewhere after + * all the individual LC_XXX variables have been set correctly. (Thank you + * Perl for making this kluge necessary.) */ char * pg_perm_setlocale(int category, const char *locale) @@ -172,6 +174,22 @@ pg_perm_setlocale(int category, const char *locale) if (result == NULL) return result; /* fall out immediately on failure */ + /* + * Use the right encoding in translated messages. Under ENABLE_NLS, let + * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message + * format strings are ASCII, but database-encoding strings may enter the + * message via %s. This makes the overall message encoding equal to the + * database encoding. + */ + if (category == LC_CTYPE) + { +#ifdef ENABLE_NLS + SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL))); +#else + SetMessageEncoding(GetDatabaseEncoding()); +#endif + } + switch (category) { case LC_COLLATE: diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 7f03f419dead8f7f325a8e24a345d23fb1667192..706c01eca55f37e6c8f0370e5162f00b512fb2a4 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -1813,6 +1813,22 @@ write_syslog(int level, const char *line) #endif /* HAVE_SYSLOG */ #ifdef WIN32 +/* + * Get the PostgreSQL equivalent of the Windows ANSI code page. "ANSI" system + * interfaces (e.g. CreateFileA()) expect string arguments in this encoding. + * Every process in a given system will find the same value at all times. + */ +static int +GetACPEncoding(void) +{ + static int encoding = -2; + + if (encoding == -2) + encoding = pg_codepage_to_encoding(GetACP()); + + return encoding; +} + /* * Write a message line to the windows event log */ @@ -1858,16 +1874,18 @@ write_eventlog(int level, const char *line, int len) } /* - * Convert message to UTF16 text and write it with ReportEventW, but - * fall-back into ReportEventA if conversion failed. + * If message character encoding matches the encoding expected by + * ReportEventA(), call it to avoid the hazards of conversion. Otherwise, + * try to convert the message to UTF16 and write it with ReportEventW(). + * Fall back on ReportEventA() if conversion failed. * * Also verify that we are not on our way into error recursion trouble due - * to error messages thrown deep inside pgwin32_toUTF16(). + * to error messages thrown deep inside pgwin32_message_to_UTF16(). */ - if (GetDatabaseEncoding() != GetPlatformEncoding() && - !in_error_recursion_trouble()) + if (!in_error_recursion_trouble() && + GetMessageEncoding() != GetACPEncoding()) { - utf16 = pgwin32_toUTF16(line, len, NULL); + utf16 = pgwin32_message_to_UTF16(line, len, NULL); if (utf16) { ReportEventW(evtHandle, @@ -1879,6 +1897,7 @@ write_eventlog(int level, const char *line, int len) 0, (LPCWSTR *) &utf16, NULL); + /* XXX Try ReportEventA() when ReportEventW() fails? */ pfree(utf16); return; @@ -1904,22 +1923,30 @@ write_console(const char *line, int len) #ifdef WIN32 /* - * WriteConsoleW() will fail if stdout is redirected, so just fall through + * Try to convert the message to UTF16 and write it with WriteConsoleW(). + * Fall back on write() if anything fails. + * + * In contrast to write_eventlog(), don't skip straight to write() based + * on the applicable encodings. Unlike WriteConsoleW(), write() depends + * on the suitability of the console output code page. Since we put + * stderr into binary mode in SubPostmasterMain(), write() skips the + * necessary translation anyway. + * + * WriteConsoleW() will fail if stderr is redirected, so just fall through * to writing unconverted to the logfile in this case. * * Since we palloc the structure required for conversion, also fall * through to writing unconverted if we have not yet set up * CurrentMemoryContext. */ - if (GetDatabaseEncoding() != GetPlatformEncoding() && - !in_error_recursion_trouble() && + if (!in_error_recursion_trouble() && !redirection_done && CurrentMemoryContext != NULL) { WCHAR *utf16; int utf16len; - utf16 = pgwin32_toUTF16(line, len, &utf16len); + utf16 = pgwin32_message_to_UTF16(line, len, &utf16len); if (utf16 != NULL) { HANDLE stdHandle; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index e0abff1145a3b8b241a33556dd396c9be6911d40..e0ea2e9ecfcf4aa2a28399677d51fa68738ca751 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -357,11 +357,6 @@ CheckMyDatabase(const char *name, bool am_superuser) SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE); SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE); - /* Use the right encoding in translated messages */ -#ifdef ENABLE_NLS - pg_bind_textdomain_codeset(textdomain(NULL)); -#endif - ReleaseSysCache(tup); } diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c index 9a05e573ffac7077a319efd6335fe45f1e4beb3b..772d4a5d056aff11cd947e9514a1521096176353 100644 --- a/src/backend/utils/mb/encnames.c +++ b/src/backend/utils/mb/encnames.c @@ -352,10 +352,13 @@ pg_enc2name pg_enc2name_tbl[] = /* ---------- * These are encoding names for gettext. + * + * This covers all encodings except MULE_INTERNAL, which is alien to gettext. * ---------- */ pg_enc2gettext pg_enc2gettext_tbl[] = { + {PG_SQL_ASCII, "US-ASCII"}, {PG_UTF8, "UTF-8"}, {PG_LATIN1, "LATIN1"}, {PG_LATIN2, "LATIN2"}, @@ -389,6 +392,13 @@ pg_enc2gettext pg_enc2gettext_tbl[] = {PG_EUC_KR, "EUC-KR"}, {PG_EUC_TW, "EUC-TW"}, {PG_EUC_JIS_2004, "EUC-JP"}, + {PG_SJIS, "SHIFT-JIS"}, + {PG_BIG5, "BIG5"}, + {PG_GBK, "GBK"}, + {PG_UHC, "UHC"}, + {PG_GB18030, "GB18030"}, + {PG_JOHAB, "JOHAB"}, + {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"}, {0, NULL} }; diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 4582219af73fc0c995051fa65e0c9a3029ed0285..6d1cd8e87590248f058092ee93f6e7fc5c0f4c09 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -53,11 +53,11 @@ static FmgrInfo *ToServerConvProc = NULL; static FmgrInfo *ToClientConvProc = NULL; /* - * These variables track the currently selected FE and BE encodings. + * These variables track the currently-selected encodings. */ static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; -static pg_enc2name *PlatformEncoding = NULL; +static pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; /* * During backend startup we can't set client encoding because we (a) @@ -881,46 +881,102 @@ SetDatabaseEncoding(int encoding) Assert(DatabaseEncoding->encoding == encoding); } -/* - * Bind gettext to the codeset equivalent with the database encoding. - */ void -pg_bind_textdomain_codeset(const char *domainname) +SetMessageEncoding(int encoding) { -#if defined(ENABLE_NLS) - int encoding = GetDatabaseEncoding(); - int i; + /* Some calls happen before we can elog()! */ + Assert(PG_VALID_ENCODING(encoding)); - /* - * gettext() uses the codeset specified by LC_CTYPE by default, so if that - * matches the database encoding we don't need to do anything. In CREATE - * DATABASE, we enforce or trust that the locale's codeset matches - * database encoding, except for the C locale. In C locale, we bind - * gettext() explicitly to the right codeset. - * - * On Windows, though, gettext() tends to get confused so we always bind - * it. - */ -#ifndef WIN32 - const char *ctype = setlocale(LC_CTYPE, NULL); + MessageEncoding = &pg_enc2name_tbl[encoding]; + Assert(MessageEncoding->encoding == encoding); +} - if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0) - return; -#endif +#ifdef ENABLE_NLS +/* + * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext + * codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also + * fail for gettext-internal causes like out-of-memory. + */ +static bool +raw_pg_bind_textdomain_codeset(const char *domainname, int encoding) +{ + bool elog_ok = (CurrentMemoryContext != NULL); + int i; for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++) { if (pg_enc2gettext_tbl[i].encoding == encoding) { if (bind_textdomain_codeset(domainname, - pg_enc2gettext_tbl[i].name) == NULL) + pg_enc2gettext_tbl[i].name) != NULL) + return true; + + if (elog_ok) elog(LOG, "bind_textdomain_codeset failed"); + else + write_stderr("bind_textdomain_codeset failed"); + break; } } + + return false; +} + +/* + * Bind a gettext message domain to the codeset corresponding to the database + * encoding. For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE. + * Return the MessageEncoding implied by the new settings. + * + * On most platforms, gettext defaults to the codeset implied by LC_CTYPE. + * When that matches the database encoding, we don't need to do anything. In + * CREATE DATABASE, we enforce or trust that the locale's codeset matches the + * database encoding, except for the C locale. (On Windows, we also permit a + * discrepancy under the UTF8 encoding.) For the C locale, explicitly bind + * gettext to the right codeset. + * + * On Windows, gettext defaults to the Windows ANSI code page. This is a + * convenient departure for software that passes the strings to Windows ANSI + * APIs, but we don't do that. Compel gettext to use database encoding or, + * failing that, the LC_CTYPE encoding as it would on other platforms. + * + * This function is called before elog() and palloc() are usable. + */ +int +pg_bind_textdomain_codeset(const char *domainname) +{ + bool elog_ok = (CurrentMemoryContext != NULL); + int encoding = GetDatabaseEncoding(); + int new_msgenc; + +#ifndef WIN32 + const char *ctype = setlocale(LC_CTYPE, NULL); + + if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0) #endif + if (encoding != PG_SQL_ASCII && + raw_pg_bind_textdomain_codeset(domainname, encoding)) + return encoding; + + new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok); + if (new_msgenc < 0) + new_msgenc = PG_SQL_ASCII; + +#ifdef WIN32 + if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc)) + /* On failure, the old message encoding remains valid. */ + return GetMessageEncoding(); +#endif + + return new_msgenc; } +#endif +/* + * The database encoding, also called the server encoding, represents the + * encoding of data stored in text-like data types. Affected types include + * cstring, text, varchar, name, xml, and json. + */ int GetDatabaseEncoding(void) { @@ -949,19 +1005,17 @@ pg_client_encoding(PG_FUNCTION_ARGS) return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name)); } +/* + * gettext() returns messages in this encoding. This often matches the + * database encoding, but it differs for SQL_ASCII databases, for processes + * not attached to a database, and under a database encoding lacking iconv + * support (MULE_INTERNAL). + */ int -GetPlatformEncoding(void) +GetMessageEncoding(void) { - if (PlatformEncoding == NULL) - { - /* try to determine encoding of server's environment locale */ - int encoding = pg_get_encoding_from_locale("", true); - - if (encoding < 0) - encoding = PG_SQL_ASCII; - PlatformEncoding = &pg_enc2name_tbl[encoding]; - } - return PlatformEncoding->encoding; + Assert(MessageEncoding); + return MessageEncoding->encoding; } #ifdef WIN32 @@ -971,13 +1025,13 @@ GetPlatformEncoding(void) * is also passed to utf16len if not null. Returns NULL iff failed. */ WCHAR * -pgwin32_toUTF16(const char *str, int len, int *utf16len) +pgwin32_message_to_UTF16(const char *str, int len, int *utf16len) { WCHAR *utf16; int dstlen; UINT codepage; - codepage = pg_enc2name_tbl[GetDatabaseEncoding()].codepage; + codepage = pg_enc2name_tbl[GetMessageEncoding()].codepage; /* * Use MultiByteToWideChar directly if there is a corresponding codepage, @@ -994,7 +1048,7 @@ pgwin32_toUTF16(const char *str, int len, int *utf16len) char *utf8; utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str, - len, GetDatabaseEncoding(), PG_UTF8); + len, GetMessageEncoding(), PG_UTF8); if (utf8 != str) len = strlen(utf8); diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 725865595a7e6feb856e606094eb6eaf229add4a..d255c64bc1a44f689355d0e461e92aa55528a1c9 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -481,8 +481,12 @@ extern const char *pg_get_client_encoding_name(void); extern void SetDatabaseEncoding(int encoding); extern int GetDatabaseEncoding(void); extern const char *GetDatabaseEncodingName(void); -extern int GetPlatformEncoding(void); -extern void pg_bind_textdomain_codeset(const char *domainname); +extern void SetMessageEncoding(int encoding); +extern int GetMessageEncoding(void); + +#ifdef ENABLE_NLS +extern int pg_bind_textdomain_codeset(const char *domainname); +#endif extern int pg_valid_client_encoding(const char *name); extern int pg_valid_server_encoding(const char *name); @@ -542,7 +546,7 @@ extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p, extern bool pg_utf8_islegal(const unsigned char *source, int length); #ifdef WIN32 -extern WCHAR *pgwin32_toUTF16(const char *str, int len, int *utf16len); +extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len); #endif #endif /* PG_WCHAR_H */ diff --git a/src/include/port.h b/src/include/port.h index 5eda5f0af55e2198b864faf31905990f328fd051..5ef4b0a0b11863d96213e4379ee958109691cb98 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -452,6 +452,10 @@ extern void qsort_arg(void *base, size_t nel, size_t elsize, /* port/chklocale.c */ extern int pg_get_encoding_from_locale(const char *ctype, bool write_message); +#if defined(WIN32) && !defined(FRONTEND) +extern int pg_codepage_to_encoding(UINT cp); +#endif + /* port/inet_net_ntop.c */ extern char *inet_net_ntop(int af, const void *src, int bits, char *dst, size_t size); diff --git a/src/port/chklocale.c b/src/port/chklocale.c index 9e889383f26ade91f1c92e0491adff51c592a676..8b8862ffb29a21936dd4fa9f1772485f649f3281 100644 --- a/src/port/chklocale.c +++ b/src/port/chklocale.c @@ -235,6 +235,32 @@ win32_langinfo(const char *ctype) return r; } + +#ifndef FRONTEND +/* + * Given a Windows code page identifier, find the corresponding PostgreSQL + * encoding. Issue a warning and return -1 if none found. + */ +int +pg_codepage_to_encoding(UINT cp) +{ + char sys[16]; + int i; + + sprintf(sys, "CP%u", cp); + + /* Check the table */ + for (i = 0; encoding_match_list[i].system_enc_name; i++) + if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0) + return encoding_match_list[i].pg_enc_code; + + ereport(WARNING, + (errmsg("could not determine encoding for codeset \"%s\"", sys), + errdetail("Please report this to <pgsql-bugs@postgresql.org>."))); + + return -1; +} +#endif #endif /* WIN32 */ #if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32) @@ -248,6 +274,9 @@ win32_langinfo(const char *ctype) * * If the result is PG_SQL_ASCII, callers should treat it as being compatible * with any desired encoding. + * + * If running in the backend and write_message is false, this function must + * cope with the possibility that elog() and palloc() are not yet usable. */ int pg_get_encoding_from_locale(const char *ctype, bool write_message)