Skip to content
Snippets Groups Projects
Select Git revision
  • 4f2ad52269750c9f04121251a23e7f6ed1fb911c
  • master default
  • benchmark-tools
  • postgres-lambda
  • REL9_4_25
  • REL9_5_20
  • REL9_6_16
  • REL_10_11
  • REL_11_6
  • REL_12_1
  • REL_12_0
  • REL_12_RC1
  • REL_12_BETA4
  • REL9_4_24
  • REL9_5_19
  • REL9_6_15
  • REL_10_10
  • REL_11_5
  • REL_12_BETA3
  • REL9_4_23
  • REL9_5_18
  • REL9_6_14
  • REL_10_9
  • REL_11_4
24 results

win32setlocale.c

Blame
  • user avatar
    Tom Lane authored
    It emerges that recent versions of Windows (at least 2016 Standard)
    spell this locale name as "Norwegian Bokmål_Norway.1252", defeating
    our mapping code that translates "Norwegian (Bokmål)_Norway" to
    something that's all-ASCII (cf commits db29620d and aa1d2fc5).
    Add another mapping entry to handle this spelling.
    
    Per bug #16068 from Robert Ford.  Like the previous patches,
    back-patch to all supported branches.
    
    Discussion: https://postgr.es/m/16068-4cb6eeaa7eb46d93@postgresql.org
    4f2ad522
    History
    win32setlocale.c 5.95 KiB
    /*-------------------------------------------------------------------------
     *
     * win32setlocale.c
     *		Wrapper to work around bugs in Windows setlocale() implementation
     *
     * Copyright (c) 2011-2019, PostgreSQL Global Development Group
     *
     * IDENTIFICATION
     *	  src/port/win32setlocale.c
     *
     *
     * The setlocale() function in Windows is broken in two ways. First, it
     * has a problem with locale names that have a dot in the country name. For
     * example:
     *
     * "Chinese (Traditional)_Hong Kong S.A.R..950"
     *
     * For some reason, setlocale() doesn't accept that as argument, even though
     * setlocale(LC_ALL, NULL) returns exactly that. Fortunately, it accepts
     * various alternative names for such countries, so to work around the broken
     * setlocale() function, we map the troublemaking locale names to accepted
     * aliases, before calling setlocale().
     *
     * The second problem is that the locale name for "Norwegian (Bokmål)"
     * contains a non-ASCII character. That's problematic, because it's not clear
     * what encoding the locale name itself is supposed to be in, when you
     * haven't yet set a locale. Also, it causes problems when the cluster
     * contains databases with different encodings, as the locale name is stored
     * in the pg_database system catalog. To work around that, when setlocale()
     * returns that locale name, map it to a pure-ASCII alias for the same
     * locale.
     *-------------------------------------------------------------------------
     */
    
    #include "c.h"
    
    #undef setlocale
    
    struct locale_map
    {
    	/*
    	 * String in locale name to replace. Can be a single string (end is NULL),
    	 * or separate start and end strings. If two strings are given, the locale
    	 * name must contain both of them, and everything between them is
    	 * replaced. This is used for a poor-man's regexp search, allowing
    	 * replacement of "start.*end".
    	 */
    	const char *locale_name_start;
    	const char *locale_name_end;
    
    	const char *replacement;	/* string to replace the match with */
    };
    
    /*
     * Mappings applied before calling setlocale(), to the argument.
     */
    static const struct locale_map locale_map_argument[] = {
    	/*
    	 * "HKG" is listed here:
    	 * http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx
    	 * (Country/Region Strings).
    	 *
    	 * "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the
    	 * above list, but seems to work anyway.
    	 */
    	{"Hong Kong S.A.R.", NULL, "HKG"},
    	{"U.A.E.", NULL, "ARE"},
    
    	/*
    	 * The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't
    	 * seem to recognize that. And Macau isn't listed in the table of accepted
    	 * abbreviations linked above. Fortunately, "ZHM" seems to be accepted as
    	 * an alias for "Chinese (Traditional)_Macau S.A.R..950". I'm not sure
    	 * where "ZHM" comes from, must be some legacy naming scheme. But hey, it
    	 * works.
    	 *
    	 * Note that unlike HKG and ARE, ZHM is an alias for the *whole* locale
    	 * name, not just the country part.
    	 *
    	 * Some versions of Windows spell it "Macau", others "Macao".
    	 */
    	{"Chinese (Traditional)_Macau S.A.R..950", NULL, "ZHM"},
    	{"Chinese_Macau S.A.R..950", NULL, "ZHM"},
    	{"Chinese (Traditional)_Macao S.A.R..950", NULL, "ZHM"},
    	{"Chinese_Macao S.A.R..950", NULL, "ZHM"},
    	{NULL, NULL, NULL}
    };
    
    /*
     * Mappings applied after calling setlocale(), to its return value.
     */
    static const struct locale_map locale_map_result[] = {
    	/*
    	 * "Norwegian (Bokmål)" locale name contains the a-ring character.
    	 * Map it to a pure-ASCII alias.
    	 *
    	 * It's not clear what encoding setlocale() uses when it returns the
    	 * locale name, so to play it safe, we search for "Norwegian (Bok*l)".
    	 *
    	 * Just to make life even more complicated, some versions of Windows spell
    	 * the locale name without parentheses.  Translate that too.
    	 */
    	{"Norwegian (Bokm", "l)_Norway", "Norwegian_Norway"},
    	{"Norwegian Bokm", "l_Norway", "Norwegian_Norway"},
    	{NULL, NULL, NULL}
    };
    
    #define MAX_LOCALE_NAME_LEN		100
    
    static const char *
    map_locale(const struct locale_map *map, const char *locale)
    {
    	static char aliasbuf[MAX_LOCALE_NAME_LEN];
    	int			i;
    
    	/* Check if the locale name matches any of the problematic ones. */
    	for (i = 0; map[i].locale_name_start != NULL; i++)
    	{
    		const char *needle_start = map[i].locale_name_start;
    		const char *needle_end = map[i].locale_name_end;
    		const char *replacement = map[i].replacement;
    		char	   *match;
    		char	   *match_start = NULL;
    		char	   *match_end = NULL;
    
    		match = strstr(locale, needle_start);
    		if (match)
    		{
    			/*
    			 * Found a match for the first part. If this was a two-part
    			 * replacement, find the second part.
    			 */
    			match_start = match;
    			if (needle_end)
    			{
    				match = strstr(match_start + strlen(needle_start), needle_end);
    				if (match)
    					match_end = match + strlen(needle_end);
    				else
    					match_start = NULL;
    			}
    			else
    				match_end = match_start + strlen(needle_start);
    		}
    
    		if (match_start)
    		{
    			/* Found a match. Replace the matched string. */
    			int			matchpos = match_start - locale;
    			int			replacementlen = strlen(replacement);
    			char	   *rest = match_end;
    			int			restlen = strlen(rest);
    
    			/* check that the result fits in the static buffer */
    			if (matchpos + replacementlen + restlen + 1 > MAX_LOCALE_NAME_LEN)
    				return NULL;
    
    			memcpy(&aliasbuf[0], &locale[0], matchpos);
    			memcpy(&aliasbuf[matchpos], replacement, replacementlen);
    			/* includes null terminator */
    			memcpy(&aliasbuf[matchpos + replacementlen], rest, restlen + 1);
    
    			return aliasbuf;
    		}
    	}
    
    	/* no match, just return the original string */
    	return locale;
    }
    
    char *
    pgwin32_setlocale(int category, const char *locale)
    {
    	const char *argument;
    	char	   *result;
    
    	if (locale == NULL)
    		argument = NULL;
    	else
    		argument = map_locale(locale_map_argument, locale);
    
    	/* Call the real setlocale() function */
    	result = setlocale(category, argument);
    
    	/*
    	 * setlocale() is specified to return a "char *" that the caller is
    	 * forbidden to modify, so casting away the "const" is innocuous.
    	 */
    	if (result)
    		result = unconstify(char *, map_locale(locale_map_result, result));
    
    	return result;
    }