From e86fdb0ab224eaa73d907ab16a2dd0e0058699e0 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 26 Aug 2011 10:41:31 -0400
Subject: [PATCH] Support non-ASCII letters in psql variable names.

As in the backend, the implementation actually accepts any non-ASCII
character, but we only document that you can use letters.
---
 doc/src/sgml/ref/psql-ref.sgml | 25 +++++-----
 src/bin/psql/command.c         |  6 +--
 src/bin/psql/psqlscan.l        | 87 +++++++++++++++++++++++++++-------
 src/bin/psql/variables.c       | 34 ++++++++++++-
 src/bin/psql/variables.h       |  4 --
 5 files changed, 119 insertions(+), 37 deletions(-)

diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 2db4adff4b9..7e30c57c35b 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -2206,7 +2206,7 @@ lo_import 152801
         </para>
 
         <para>
-        Valid variable names can contain characters, digits, and
+        Valid variable names can contain letters, digits, and
         underscores. See the section <xref
         linkend="APP-PSQL-variables"
         endterm="APP-PSQL-variables-title"> below for details.
@@ -2461,8 +2461,12 @@ lo_import 152801
     <application>psql</application> provides variable substitution
     features similar to common Unix command shells.
     Variables are simply name/value pairs, where the value
-    can be any string of any length. To set variables, use the
-    <application>psql</application> meta-command
+    can be any string of any length.  The name must consist of letters
+    (including non-Latin letters), digits, and underscores.
+    </para>
+
+    <para>
+    To set a variable, use the <application>psql</application> meta-command
     <command>\set</command>:
 <programlisting>
 testdb=&gt; <userinput>\set foo bar</userinput>
@@ -2498,16 +2502,15 @@ bar
     </para>
 
     <para>
-    <application>psql</application>'s internal variable names can
-    consist of letters, numbers, and underscores in any order and any
-    number of them. A number of these variables are treated specially
-    by <application>psql</application>. They indicate certain option
+    A number of these variables are treated specially
+    by <application>psql</application>. They represent certain option
     settings that can be changed at run time by altering the value of
-    the variable or that represent some state of the application. Although
-    you can use these variables for any other purpose, this is not
+    the variable, or in some cases represent changeable state of
+    <application>psql</application>. Although
+    you can use these variables for other purposes, this is not
     recommended, as the program behavior might grow really strange
-    really quickly. By convention, all specially treated variables
-    consist of all upper-case letters (and possibly numbers and
+    really quickly. By convention, all specially treated variables' names
+    consist of all upper-case ASCII letters (and possibly digits and
     underscores). To ensure maximum compatibility in the future, avoid
     using such variable names for your own purposes. A list of all specially
     treated variables follows.
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index d6a925e435b..6d9cd6492f6 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -995,7 +995,7 @@ exec_command(const char *cmd,
 
 			if (!SetVariable(pset.vars, opt, result))
 			{
-				psql_error("\\%s: error\n", cmd);
+				psql_error("\\%s: error while setting variable\n", cmd);
 				success = false;
 			}
 
@@ -1096,7 +1096,7 @@ exec_command(const char *cmd,
 
 			if (!SetVariable(pset.vars, opt0, newval))
 			{
-				psql_error("\\%s: error\n", cmd);
+				psql_error("\\%s: error while setting variable\n", cmd);
 				success = false;
 			}
 			free(newval);
@@ -1272,7 +1272,7 @@ exec_command(const char *cmd,
 		}
 		else if (!SetVariable(pset.vars, opt, NULL))
 		{
-			psql_error("\\%s: error\n", cmd);
+			psql_error("\\%s: error while setting variable\n", cmd);
 			success = false;
 		}
 		free(opt);
diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l
index 8439c865bfe..1df8f3aa4f6 100644
--- a/src/bin/psql/psqlscan.l
+++ b/src/bin/psql/psqlscan.l
@@ -120,6 +120,7 @@ static bool var_is_current_source(PsqlScanState state, const char *varname);
 static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
 									  char **txtcopy);
 static void emit(const char *txt, int len);
+static char *extract_substring(const char *txt, int len);
 static void escape_variable(bool as_ident);
 
 #define ECHO emit(yytext, yyleng)
@@ -384,6 +385,9 @@ realfail2		({integer}|{decimal})[Ee][-+]
 
 param			\${integer}
 
+/* psql-specific: characters allowed in variable names */
+variable_char	[A-Za-z\200-\377_0-9]
+
 other			.
 
 /*
@@ -680,11 +684,12 @@ other			.
 					return LEXRES_BACKSLASH;
 				}
 
-:[A-Za-z0-9_]+	{
+:{variable_char}+	{
 					/* Possible psql variable substitution */
-					const char *varname = yytext + 1;
+					char   *varname;
 					const char *value;
 
+					varname = extract_substring(yytext + 1, yyleng - 1);
 					value = GetVariable(pset.vars, varname);
 
 					if (value)
@@ -713,13 +718,15 @@ other			.
 						 */
 						ECHO;
 					}
+
+					free(varname);
 				}
 
-:'[A-Za-z0-9_]+'	{
+:'{variable_char}+'	{
 					escape_variable(false);
 				}
 
-:\"[A-Za-z0-9_]+\"	{
+:\"{variable_char}+\"	{
 					escape_variable(true);
 				}
 
@@ -728,13 +735,13 @@ other			.
 	 * two rules above fails to match completely.
 	 */
 
-:'[A-Za-z0-9_]*	{
+:'{variable_char}*	{
 					/* Throw back everything but the colon */
 					yyless(1);
 					ECHO;
 				}
 
-:\"[A-Za-z0-9_]*	{
+:\"{variable_char}*	{
 					/* Throw back everything but the colon */
 					yyless(1);
 					ECHO;
@@ -930,15 +937,18 @@ other			.
 					}
 				}
 
-:[A-Za-z0-9_]+	{
+:{variable_char}+	{
 					/* Possible psql variable substitution */
 					if (option_type == OT_VERBATIM)
 						ECHO;
 					else
 					{
+						char   *varname;
 						const char *value;
 
-						value = GetVariable(pset.vars, yytext + 1);
+						varname = extract_substring(yytext + 1, yyleng - 1);
+						value = GetVariable(pset.vars, varname);
+						free(varname);
 
 						/*
 						 * The variable value is just emitted without any
@@ -956,7 +966,7 @@ other			.
 					return LEXRES_OK;
 				}
 
-:'[A-Za-z0-9_]+'	{
+:'{variable_char}+'	{
 					if (option_type == OT_VERBATIM)
 						ECHO;
 					else
@@ -967,7 +977,7 @@ other			.
 				}
 
 
-:\"[A-Za-z0-9_]+\"	{
+:\"{variable_char}+\"	{
 					if (option_type == OT_VERBATIM)
 						ECHO;
 					else
@@ -977,14 +987,14 @@ other			.
 					}
 				}
 
-:'[A-Za-z0-9_]*	{
+:'{variable_char}*	{
 					/* Throw back everything but the colon */
 					yyless(1);
 					ECHO;
 					BEGIN(xslashdefaultarg);
 				}
 
-:\"[A-Za-z0-9_]*	{
+:\"{variable_char}*	{
 					/* Throw back everything but the colon */
 					yyless(1);
 					ECHO;
@@ -1844,16 +1854,58 @@ emit(const char *txt, int len)
 	}
 }
 
+/*
+ * extract_substring --- fetch the true value of (part of) the current token
+ *
+ * This is like emit(), except that the data is returned as a malloc'd string
+ * rather than being pushed directly to output_buf.
+ */
+static char *
+extract_substring(const char *txt, int len)
+{
+	char	   *result = (char *) pg_malloc(len + 1);
+
+	if (cur_state->safe_encoding)
+		memcpy(result, txt, len);
+	else
+	{
+		/* Gotta do it the hard way */
+		const char *reference = cur_state->refline;
+		int		i;
+
+		reference += (txt - cur_state->curline);
+
+		for (i = 0; i < len; i++)
+		{
+			char	ch = txt[i];
+
+			if (ch == (char) 0xFF)
+				ch = reference[i];
+			result[i] = ch;
+		}
+	}
+	result[len] = '\0';
+	return result;
+}
+
+/*
+ * escape_variable --- process :'VARIABLE' or :"VARIABLE"
+ *
+ * If the variable name is found, escape its value using the appropriate
+ * quoting method and emit the value to output_buf.  (Since the result is
+ * surely quoted, there is never any reason to rescan it.)  If we don't
+ * find the variable or the escaping function fails, emit the token as-is.
+ */
 static void
 escape_variable(bool as_ident)
 {
-	char		saved_char;
+	char	   *varname;
 	const char *value;
 
 	/* Variable lookup. */
-	saved_char = yytext[yyleng - 1];
-	yytext[yyleng - 1] = '\0';
-	value = GetVariable(pset.vars, yytext + 2);
+	varname = extract_substring(yytext + 2, yyleng - 3);
+	value = GetVariable(pset.vars, varname);
+	free(varname);
 
 	/* Escaping. */
 	if (value)
@@ -1870,9 +1922,11 @@ escape_variable(bool as_ident)
 			else
 				escaped_value =
 					PQescapeLiteral(pset.db, value, strlen(value));
+
 			if (escaped_value == NULL)
 			{
 				const char *error = PQerrorMessage(pset.db);
+
 				psql_error("%s", error);
 			}
 			else
@@ -1888,6 +1942,5 @@ escape_variable(bool as_ident)
 	 * If we reach this point, some kind of error has occurred.  Emit the
 	 * original text into the output buffer.
 	 */
-	yytext[yyleng - 1] = saved_char;
 	emit(yytext, yyleng);
 }
diff --git a/src/bin/psql/variables.c b/src/bin/psql/variables.c
index 38252893377..a43c786bf02 100644
--- a/src/bin/psql/variables.c
+++ b/src/bin/psql/variables.c
@@ -6,10 +6,40 @@
  * src/bin/psql/variables.c
  */
 #include "postgres_fe.h"
+
 #include "common.h"
 #include "variables.h"
 
 
+/*
+ * Check whether a variable's name is allowed.
+ *
+ * We allow any non-ASCII character, as well as ASCII letters, digits, and
+ * underscore.  Keep this in sync with the definition of variable_char in
+ * psqlscan.l.
+ */
+static bool
+valid_variable_name(const char *name)
+{
+	const unsigned char *ptr = (const unsigned char *) name;
+
+	/* Mustn't be zero-length */
+	if (*ptr == '\0')
+		return false;
+
+	while (*ptr)
+	{
+		if (IS_HIGHBIT_SET(*ptr) ||
+			strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"
+				   "_0123456789", *ptr) != NULL)
+			ptr++;
+		else
+			return false;
+	}
+
+	return true;
+}
+
 /*
  * A "variable space" is represented by an otherwise-unused struct _variable
  * that serves as list header.
@@ -158,7 +188,7 @@ SetVariable(VariableSpace space, const char *name, const char *value)
 	if (!space)
 		return false;
 
-	if (strspn(name, VALID_VARIABLE_CHARS) != strlen(name))
+	if (!valid_variable_name(name))
 		return false;
 
 	if (!value)
@@ -202,7 +232,7 @@ SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook
 	if (!space)
 		return false;
 
-	if (strspn(name, VALID_VARIABLE_CHARS) != strlen(name))
+	if (!valid_variable_name(name))
 		return false;
 
 	for (previous = space, current = space->next;
diff --git a/src/bin/psql/variables.h b/src/bin/psql/variables.h
index 4197069b4b7..865391dba7b 100644
--- a/src/bin/psql/variables.h
+++ b/src/bin/psql/variables.h
@@ -32,10 +32,6 @@ struct _variable
 
 typedef struct _variable *VariableSpace;
 
-/* Allowed chars in a variable's name */
-#define VALID_VARIABLE_CHARS "abcdefghijklmnopqrstuvwxyz"\
-							 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789_"
-
 VariableSpace CreateVariableSpace(void);
 const char *GetVariable(VariableSpace space, const char *name);
 
-- 
GitLab