From 02e14562a806a96f38120c96421d39dfa7394192 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter_e@gmx.net>
Date: Sat, 19 Feb 2011 08:54:58 +0200
Subject: [PATCH] Set psql client encoding from locale by default

Add a new libpq connection option client_encoding (which includes the
existing PGCLIENTENCODING environment variable), which besides an
encoding name accepts a special value "auto" that tries to determine
the encoding from the locale in the client's environment, using the
mechanisms that have been in use in initdb.

psql sets this new connection option to "auto" when running from a
terminal and not overridden by setting PGCLIENTENCODING.

original code by Heikki Linnakangas, with subsequent contributions by
Jaime Casanova, Peter Eisentraut, Stephen Frost, Ibrar Ahmed
---
 doc/src/sgml/libpq.sgml             | 36 +++++++++++------
 doc/src/sgml/ref/psql-ref.sgml      | 11 ++++++
 src/bin/psql/command.c              |  8 ++--
 src/bin/psql/startup.c              |  8 ++--
 src/interfaces/libpq/.gitignore     |  1 +
 src/interfaces/libpq/Makefile       |  6 +--
 src/interfaces/libpq/fe-connect.c   | 24 ++++++++++--
 src/interfaces/libpq/fe-protocol2.c | 60 +++++++++++++++++++++++++++++
 src/interfaces/libpq/fe-protocol3.c |  3 ++
 src/interfaces/libpq/libpq-int.h    |  3 ++
 10 files changed, 136 insertions(+), 24 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 2d15e78fd08..49edc51dbac 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -259,6 +259,21 @@ PGconn *PQconnectdbParams(const char **keywords, const char **values, int expand
          </listitem>
         </varlistentry>
 
+        <varlistentry id="libpq-connect-client-encoding" xreflabel="client_encoding">
+         <term><literal>client_encoding</literal></term>
+         <listitem>
+         <para>
+          This sets the <varname>client_encoding</varname>
+          configuration parameter for this connection.  In addition to
+          the values accepted by the corresponding server option, you
+          can use <literal>auto</literal> to determine the right
+          encoding from the current locale in the client
+          (<envar>LC_CTYPE</envar> environment variable on Unix
+          systems).
+         </para>
+         </listitem>
+        </varlistentry>
+
         <varlistentry id="libpq-connect-options" xreflabel="options">
          <term><literal>options</literal></term>
          <listitem>
@@ -6345,6 +6360,16 @@ myEventProc(PGEventId evtId, void *evtInfo, void *passThrough)
       linkend="libpq-connect-connect-timeout"> connection parameter.
      </para>
     </listitem>
+
+    <listitem>
+     <para>
+      <indexterm>
+       <primary><envar>PGCLIENTENCODING</envar></primary>
+      </indexterm>
+      <envar>PGCLIENTENCODING</envar> behaves the same as the <xref
+      linkend="libpq-connect-client-encoding"> connection parameter.
+     </para>
+    </listitem>
    </itemizedlist>
   </para>
 
@@ -6378,17 +6403,6 @@ myEventProc(PGEventId evtId, void *evtInfo, void *passThrough)
      </para>
     </listitem>
 
-    <listitem>
-     <para>
-      <indexterm>
-       <primary><envar>PGCLIENTENCODING</envar></primary>
-      </indexterm>
-      <envar>PGCLIENTENCODING</envar> sets the default client character
-      set encoding.  (Equivalent to <literal>SET client_encoding TO
-      ...</literal>.)
-     </para>
-    </listitem>
-
     <listitem>
      <para>
       <indexterm>
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index ff60a72059e..baefb578826 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -593,6 +593,17 @@ $ <userinput>psql "service=myservice sslmode=require"</userinput>
     privileges, server is not running on the targeted host, etc.),
     <application>psql</application> will return an error and terminate.
     </para>
+
+    <para>
+     If at least one of standard input or standard output are a
+     terminal, then <application>psql</application> sets the client
+     encoding to <quote>auto</quote>, which will detect the
+     appropriate client encoding from the locale settings
+     (<envar>LC_CTYPE</envar> environment variable on Unix systems).
+     If this doesn't work out as expected, the client encoding can be
+     overridden using the environment
+     variable <envar>PGCLIENTENCODING</envar>.
+    </para>
   </refsect2>
 
   <refsect2 id="R2-APP-PSQL-4">
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index d1268848d5b..d7cdcf64344 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -1487,7 +1487,7 @@ do_connect(char *dbname, char *user, char *host, char *port)
 
 	while (true)
 	{
-#define PARAMS_ARRAY_SIZE	7
+#define PARAMS_ARRAY_SIZE	8
 		const char **keywords = pg_malloc(PARAMS_ARRAY_SIZE * sizeof(*keywords));
 		const char **values = pg_malloc(PARAMS_ARRAY_SIZE * sizeof(*values));
 
@@ -1503,8 +1503,10 @@ do_connect(char *dbname, char *user, char *host, char *port)
 		values[4] = dbname;
 		keywords[5] = "fallback_application_name";
 		values[5] = pset.progname;
-		keywords[6] = NULL;
-		values[6] = NULL;
+		keywords[6] = "client_encoding";
+		values[6] = (pset.notty || getenv("PGCLIENTENCODING")) ? NULL : "auto";
+		keywords[7] = NULL;
+		values[7] = NULL;
 
 		n_conn = PQconnectdbParams(keywords, values, true);
 
diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c
index 10713e9b15d..7b8078c21e4 100644
--- a/src/bin/psql/startup.c
+++ b/src/bin/psql/startup.c
@@ -171,7 +171,7 @@ main(int argc, char *argv[])
 	/* loop until we have a password if requested by backend */
 	do
 	{
-#define PARAMS_ARRAY_SIZE	7
+#define PARAMS_ARRAY_SIZE	8
 		const char **keywords = pg_malloc(PARAMS_ARRAY_SIZE * sizeof(*keywords));
 		const char **values = pg_malloc(PARAMS_ARRAY_SIZE * sizeof(*values));
 
@@ -189,8 +189,10 @@ main(int argc, char *argv[])
 			"postgres" : options.dbname;
 		keywords[5] = "fallback_application_name";
 		values[5] = pset.progname;
-		keywords[6] = NULL;
-		values[6] = NULL;
+		keywords[6] = "client_encoding";
+		values[6] = (pset.notty || getenv("PGCLIENTENCODING")) ? NULL : "auto";
+		keywords[7] = NULL;
+		values[7] = NULL;
 
 		new_pass = false;
 		pset.db = PQconnectdbParams(keywords, values, true);
diff --git a/src/interfaces/libpq/.gitignore b/src/interfaces/libpq/.gitignore
index 366adeb9080..29024ae67ce 100644
--- a/src/interfaces/libpq/.gitignore
+++ b/src/interfaces/libpq/.gitignore
@@ -1,4 +1,5 @@
 /exports.list
+/chklocale.c
 /crypt.c
 /getaddrinfo.c
 /inet_aton.c
diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile
index f4111c4b766..18795446d5b 100644
--- a/src/interfaces/libpq/Makefile
+++ b/src/interfaces/libpq/Makefile
@@ -35,7 +35,7 @@ OBJS=	fe-auth.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-lobj.o \
 	fe-protocol2.o fe-protocol3.o pqexpbuffer.o pqsignal.o fe-secure.o \
 	libpq-events.o
 # libpgport C files we always use
-OBJS += inet_net_ntop.o noblock.o pgstrcasecmp.o thread.o
+OBJS += chklocale.o inet_net_ntop.o noblock.o pgstrcasecmp.o thread.o
 # libpgport C files that are needed if identified by configure
 OBJS += $(filter crypt.o getaddrinfo.o inet_aton.o open.o snprintf.o strerror.o strlcpy.o win32error.o, $(LIBOBJS))
 # backend/libpq
@@ -88,7 +88,7 @@ backend_src = $(top_srcdir)/src/backend
 # For some libpgport modules, this only happens if configure decides 
 # the module is needed (see filter hack in OBJS, above).
 
-crypt.c getaddrinfo.c inet_aton.c inet_net_ntop.c noblock.c open.c pgsleep.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c win32error.c: % : $(top_srcdir)/src/port/%
+chklocale.c crypt.c getaddrinfo.c inet_aton.c inet_net_ntop.c noblock.c open.c pgsleep.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c win32error.c: % : $(top_srcdir)/src/port/%
 	rm -f $@ && $(LN_S) $< .
 
 ip.c md5.c: % : $(backend_src)/libpq/%
@@ -135,7 +135,7 @@ clean distclean: clean-lib
 # Might be left over from a Win32 client-only build
 	rm -f pg_config_paths.h
 	rm -f inet_net_ntop.c noblock.c pgstrcasecmp.c thread.c
-	rm -f crypt.c getaddrinfo.c inet_aton.c open.c snprintf.c strerror.c strlcpy.c win32error.c
+	rm -f chklocale.c crypt.c getaddrinfo.c inet_aton.c open.c snprintf.c strerror.c strlcpy.c win32error.c
 	rm -f pgsleep.c	
 	rm -f md5.c ip.c
 	rm -f encnames.c wchar.c
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index b8013ed42fc..7766c7eabae 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -175,6 +175,9 @@ static const PQconninfoOption PQconninfoOptions[] = {
 	{"port", "PGPORT", DEF_PGPORT_STR, NULL,
 	"Database-Port", "", 6},
 
+	{"client_encoding", "PGCLIENTENCODING", NULL, NULL,
+	"Client-Encoding", "", 10},
+
 	/*
 	 * "tty" is no longer used either, but keep it present for backwards
 	 * compatibility.
@@ -270,9 +273,6 @@ static const PQEnvironmentOption EnvironmentOptions[] =
 	{
 		"PGTZ", "timezone"
 	},
-	{
-		"PGCLIENTENCODING", "client_encoding"
-	},
 	/* internal performance-related settings */
 	{
 		"PGGEQO", "geqo"
@@ -612,6 +612,8 @@ fillPGconn(PGconn *conn, PQconninfoOption *connOptions)
 	conn->pgpass = tmp ? strdup(tmp) : NULL;
 	tmp = conninfo_getval(connOptions, "connect_timeout");
 	conn->connect_timeout = tmp ? strdup(tmp) : NULL;
+	tmp = conninfo_getval(connOptions, "client_encoding");
+	conn->client_encoding_initial = tmp ? strdup(tmp) : NULL;
 	tmp = conninfo_getval(connOptions, "keepalives");
 	conn->keepalives = tmp ? strdup(tmp) : NULL;
 	tmp = conninfo_getval(connOptions, "keepalives_idle");
@@ -786,6 +788,16 @@ connectOptions2(PGconn *conn)
 	else
 		conn->sslmode = strdup(DefaultSSLMode);
 
+	/*
+	 * Resolve special "auto" client_encoding from the locale
+	 */
+	if (conn->client_encoding_initial &&
+		strcmp(conn->client_encoding_initial, "auto") == 0)
+	{
+		free(conn->client_encoding_initial);
+		conn->client_encoding_initial = strdup(pg_encoding_to_char(pg_get_encoding_from_locale(NULL, true)));
+	}
+
 	/*
 	 * Only if we get this far is it appropriate to try to connect. (We need a
 	 * state flag, rather than just the boolean result of this function, in
@@ -2508,7 +2520,7 @@ keep_going:						/* We will come back to here until there is
 				if (PG_PROTOCOL_MAJOR(conn->pversion) < 3)
 				{
 					conn->status = CONNECTION_SETENV;
-					conn->setenv_state = SETENV_STATE_OPTION_SEND;
+					conn->setenv_state = SETENV_STATE_CLIENT_ENCODING_SEND;
 					conn->next_eo = EnvironmentOptions;
 					return PGRES_POLLING_WRITING;
 				}
@@ -4661,6 +4673,10 @@ PQsetClientEncoding(PGconn *conn, const char *encoding)
 	if (!encoding)
 		return -1;
 
+	/* Resolve special "auto" value from the locale */
+	if (strcmp(encoding, "auto") == 0)
+		encoding = pg_encoding_to_char(pg_get_encoding_from_locale(NULL, true));
+
 	/* check query buffer overflow */
 	if (sizeof(qbuf) < (sizeof(query) + strlen(encoding)))
 		return -1;
diff --git a/src/interfaces/libpq/fe-protocol2.c b/src/interfaces/libpq/fe-protocol2.c
index 058a25b8033..05357de1c74 100644
--- a/src/interfaces/libpq/fe-protocol2.c
+++ b/src/interfaces/libpq/fe-protocol2.c
@@ -58,6 +58,7 @@ pqSetenvPoll(PGconn *conn)
 	switch (conn->setenv_state)
 	{
 			/* These are reading states */
+		case SETENV_STATE_CLIENT_ENCODING_WAIT:
 		case SETENV_STATE_OPTION_WAIT:
 		case SETENV_STATE_QUERY1_WAIT:
 		case SETENV_STATE_QUERY2_WAIT:
@@ -74,6 +75,7 @@ pqSetenvPoll(PGconn *conn)
 			}
 
 			/* These are writing states, so we just proceed. */
+		case SETENV_STATE_CLIENT_ENCODING_SEND:
 		case SETENV_STATE_OPTION_SEND:
 		case SETENV_STATE_QUERY1_SEND:
 		case SETENV_STATE_QUERY2_SEND:
@@ -98,6 +100,39 @@ pqSetenvPoll(PGconn *conn)
 	{
 		switch (conn->setenv_state)
 		{
+			/*
+			 * The _CLIENT_ENCODING_SEND code is slightly different
+			 * from _OPTION_SEND below (e.g., no getenv() call), which
+			 * is why a different state is used.
+			 */
+			case SETENV_STATE_CLIENT_ENCODING_SEND:
+				{
+					char		setQuery[100];	/* note length limit in
+												 * sprintf below */
+					const char *val = conn->client_encoding_initial;
+
+					if (val)
+					{
+						if (pg_strcasecmp(val, "default") == 0)
+							sprintf(setQuery, "SET client_encoding = DEFAULT");
+						else
+							sprintf(setQuery, "SET client_encoding = '%.60s'",
+									val);
+#ifdef CONNECTDEBUG
+						fprintf(stderr,
+								"Sending client_encoding with %s\n",
+								setQuery);
+#endif
+						if (!PQsendQuery(conn, setQuery))
+							goto error_return;
+
+						conn->setenv_state = SETENV_STATE_CLIENT_ENCODING_WAIT;
+					}
+					else
+						conn->setenv_state = SETENV_STATE_OPTION_SEND;
+					break;
+				}
+
 			case SETENV_STATE_OPTION_SEND:
 				{
 					/*
@@ -142,6 +177,31 @@ pqSetenvPoll(PGconn *conn)
 					break;
 				}
 
+			case SETENV_STATE_CLIENT_ENCODING_WAIT:
+				{
+					if (PQisBusy(conn))
+						return PGRES_POLLING_READING;
+
+					res = PQgetResult(conn);
+
+					if (res)
+					{
+						if (PQresultStatus(res) != PGRES_COMMAND_OK)
+						{
+							PQclear(res);
+							goto error_return;
+						}
+						PQclear(res);
+						/* Keep reading until PQgetResult returns NULL */
+					}
+					else
+					{
+						/* Query finished, so send the next option */
+						conn->setenv_state = SETENV_STATE_OPTION_SEND;
+					}
+					break;
+				}
+
 			case SETENV_STATE_OPTION_WAIT:
 				{
 					if (PQisBusy(conn))
diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c
index 2a8dbdfd3aa..cf0b91a9bc1 100644
--- a/src/interfaces/libpq/fe-protocol3.c
+++ b/src/interfaces/libpq/fe-protocol3.c
@@ -1933,6 +1933,9 @@ build_startup_packet(const PGconn *conn, char *packet,
 			ADD_STARTUP_OPTION("application_name", val);
 	}
 
+	if (conn->client_encoding_initial && conn->client_encoding_initial[0])
+		ADD_STARTUP_OPTION("client_encoding", conn->client_encoding_initial);
+
 	/* Add any environment-driven GUC settings needed */
 	for (next_eo = options; next_eo->envName; next_eo++)
 	{
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index e9a2b718d33..25c779acd7e 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -235,6 +235,8 @@ typedef enum
 /* (this is used only for 2.0-protocol connections) */
 typedef enum
 {
+	SETENV_STATE_CLIENT_ENCODING_SEND,	/* About to send an Environment Option */
+	SETENV_STATE_CLIENT_ENCODING_WAIT,	/* Waiting for above send to complete */
 	SETENV_STATE_OPTION_SEND,	/* About to send an Environment Option */
 	SETENV_STATE_OPTION_WAIT,	/* Waiting for above send to complete */
 	SETENV_STATE_QUERY1_SEND,	/* About to send a status query */
@@ -293,6 +295,7 @@ struct pg_conn
 	char	   *pgtty;			/* tty on which the backend messages is
 								 * displayed (OBSOLETE, NOT USED) */
 	char	   *connect_timeout;	/* connection timeout (numeric string) */
+	char	   *client_encoding_initial; /* encoding to use */
 	char	   *pgoptions;		/* options to start the backend with */
 	char	   *appname;		/* application name */
 	char	   *fbappname;		/* fallback application name */
-- 
GitLab