From 54cd4f04576833abc394e131288bf3dd7dcf4806 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 8 May 2010 16:39:53 +0000
Subject: [PATCH] Work around a subtle portability problem in use of printf %s
 format. Depending on which spec you read, field widths and precisions in %s
 may be counted either in bytes or characters.  Our code was assuming bytes,
 which is wrong at least for glibc's implementation, and in any case libc
 might have a different idea of the prevailing encoding than we do.  Hence,
 for portable results we must avoid using anything more complex than just "%s"
 unless the string to be printed is known to be all-ASCII.

This patch fixes the cases I could find, including the psql formatting
failure reported by Hernan Gonzalez.  In HEAD only, I also added comments
to some places where it appears safe to continue using "%.*s".
---
 src/backend/lib/stringinfo.c               |  5 ++--
 src/backend/parser/scansup.c               | 16 +++++++++---
 src/backend/tsearch/wparser_def.c          |  9 ++++++-
 src/backend/utils/adt/datetime.c           | 11 +++++++-
 src/backend/utils/error/elog.c             |  6 ++---
 src/bin/psql/command.c                     |  9 ++++++-
 src/bin/psql/help.c                        | 18 +++++++-------
 src/bin/psql/print.c                       | 26 +++++++++++++++----
 src/interfaces/ecpg/ecpglib/error.c        |  3 ++-
 src/interfaces/ecpg/pgtypeslib/dt_common.c | 10 +++++++-
 src/interfaces/libpq/fe-misc.c             | 29 +++++++++++++++++++---
 11 files changed, 111 insertions(+), 31 deletions(-)

diff --git a/src/backend/lib/stringinfo.c b/src/backend/lib/stringinfo.c
index 88db51871e9..9ae2455000f 100644
--- a/src/backend/lib/stringinfo.c
+++ b/src/backend/lib/stringinfo.c
@@ -9,7 +9,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- *	  $PostgreSQL: pgsql/src/backend/lib/stringinfo.c,v 1.52 2010/01/02 16:57:45 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/lib/stringinfo.c,v 1.53 2010/05/08 16:39:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -226,7 +226,8 @@ appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
 
 	/*
 	 * Keep a trailing null in place, even though it's probably useless for
-	 * binary data...
+	 * binary data.  (Some callers are dealing with text but call this
+	 * because their input isn't null-terminated.)
 	 */
 	str->data[str->len] = '\0';
 }
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 5bc6d8d6071..417c79dd14e 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.39 2010/01/02 16:57:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.40 2010/05/08 16:39:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -176,10 +176,20 @@ truncate_identifier(char *ident, int len, bool warn)
 	{
 		len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
 		if (warn)
+		{
+			/*
+			 * Cannot use %.*s here because some machines interpret %s's
+			 * precision in characters, others in bytes.
+			 */
+			char	buf[NAMEDATALEN];
+
+			memcpy(buf, ident, len);
+			buf[len] = '\0';
 			ereport(NOTICE,
 					(errcode(ERRCODE_NAME_TOO_LONG),
-					 errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
-							ident, len, ident)));
+					 errmsg("identifier \"%s\" will be truncated to \"%s\"",
+							ident, buf)));
+		}
 		ident[len] = '\0';
 	}
 }
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index a2da9210c4c..d2e47ceaf5e 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.30 2010/04/28 02:04:16 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.31 2010/05/08 16:39:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -322,6 +322,12 @@ TParserInit(char *str, int len)
 	prs->state->state = TPS_Base;
 
 #ifdef WPARSER_TRACE
+	/*
+	 * Use of %.*s here is not portable when the string contains multibyte
+	 * characters: some machines interpret the length in characters, others
+	 * in bytes.  Since it's only a debugging aid, we haven't bothered to
+	 * fix this.
+	 */
 	fprintf(stderr, "parsing \"%.*s\"\n", len, str);
 #endif
 
@@ -361,6 +367,7 @@ TParserCopyInit(const TParser *orig)
 	prs->state->state = TPS_Base;
 
 #ifdef WPARSER_TRACE
+	/* See note above about %.*s */
 	fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
 #endif
 
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index f9e40f115fc..743ca8345d6 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/datetime.c,v 1.210 2010/01/02 16:57:53 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/datetime.c,v 1.211 2010/05/08 16:39:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3740,6 +3740,14 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, int *tzp, char **tzn, int style,
 
 			AppendTimestampSeconds(str + strlen(str), tm, fsec);
 
+			/*
+			 * Note: the uses of %.*s in this function would be unportable
+			 * if the timezone names ever contain non-ASCII characters,
+			 * since some platforms think the string length is measured
+			 * in characters not bytes.  However, all TZ abbreviations in
+			 * the Olson database are plain ASCII.
+			 */
+
 			if (tzp != NULL && tm->tm_isdst >= 0)
 			{
 				if (*tzn != NULL)
@@ -4091,6 +4099,7 @@ CheckDateTokenTable(const char *tablename, const datetkn *base, int nel)
 	{
 		if (strncmp(base[i - 1].token, base[i].token, TOKMAXLEN) >= 0)
 		{
+			/* %.*s is safe since all our tokens are ASCII */
 			elog(LOG, "ordering error in %s table: \"%.*s\" >= \"%.*s\"",
 				 tablename,
 				 TOKMAXLEN, base[i - 1].token,
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index a6992e65d94..b2fab359b87 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -42,7 +42,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.223 2010/02/26 02:01:12 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.224 2010/05/08 16:39:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1871,7 +1871,7 @@ log_line_prefix(StringInfo buf, ErrorData *edata)
 					int			displen;
 
 					psdisp = get_ps_display(&displen);
-					appendStringInfo(buf, "%.*s", displen, psdisp);
+					appendBinaryStringInfo(buf, psdisp, displen);
 				}
 				break;
 			case 'r':
@@ -2029,7 +2029,7 @@ write_csvlog(ErrorData *edata)
 		initStringInfo(&msgbuf);
 
 		psdisp = get_ps_display(&displen);
-		appendStringInfo(&msgbuf, "%.*s", displen, psdisp);
+		appendBinaryStringInfo(&msgbuf, psdisp, displen);
 		appendCSVLiteral(&buf, msgbuf.data);
 
 		pfree(msgbuf.data);
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index 4a94a1ace12..b9624451c35 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2010, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/command.c,v 1.218 2010/04/03 20:55:57 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/command.c,v 1.219 2010/05/08 16:39:51 tgl Exp $
  */
 #include "postgres_fe.h"
 #include "command.h"
@@ -651,6 +651,13 @@ exec_command(const char *cmd,
 	{
 		char	   *opt = psql_scan_slash_option(scan_state,
 												 OT_WHOLE_LINE, NULL, false);
+		size_t		len;
+
+		/* strip any trailing spaces and semicolons */
+		len = strlen(opt);
+		while (len > 0 &&
+			   (isspace((unsigned char) opt[len - 1]) || opt[len - 1] == ';'))
+			opt[--len] = '\0';
 
 		helpSQL(opt, pset.popt.topt.pager);
 		free(opt);
diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c
index a591dec792a..98f13750bf2 100644
--- a/src/bin/psql/help.c
+++ b/src/bin/psql/help.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2010, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/help.c,v 1.157 2010/03/07 17:02:34 mha Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/help.c,v 1.158 2010/05/08 16:39:51 tgl Exp $
  */
 #include "postgres_fe.h"
 
@@ -284,6 +284,7 @@ slashUsage(unsigned short int pager)
 /*
  * helpSQL -- help with SQL commands
  *
+ * Note: we assume caller removed any trailing spaces in "topic".
  */
 void
 helpSQL(const char *topic, unsigned short int pager)
@@ -352,17 +353,16 @@ helpSQL(const char *topic, unsigned short int pager)
 					wordlen;
 		int			nl_count = 0;
 
-		/* User gets two chances: exact match, then the first word */
-
-		/* First pass : strip trailing spaces and semicolons */
+		/*
+		 * We first try exact match, then first + second words, then first
+		 * word only.
+		 */
 		len = strlen(topic);
-		while (topic[len - 1] == ' ' || topic[len - 1] == ';')
-			len--;
 
-		for (x = 1; x <= 3; x++)	/* Three chances to guess that word... */
+		for (x = 1; x <= 3; x++)
 		{
 			if (x > 1)			/* Nothing on first pass - try the opening
-								 * words */
+								 * word(s) */
 			{
 				wordlen = j = 1;
 				while (topic[j] != ' ' && j++ < len)
@@ -423,7 +423,7 @@ helpSQL(const char *topic, unsigned short int pager)
 		}
 
 		if (!help_found)
-			fprintf(output, _("No help available for \"%-.*s\".\nTry \\h with no arguments to see available help.\n"), (int) len, topic);
+			fprintf(output, _("No help available for \"%s\".\nTry \\h with no arguments to see available help.\n"), topic);
 
 		/* Only close if we used the pager */
 		if (output != stdout)
diff --git a/src/bin/psql/print.c b/src/bin/psql/print.c
index d62b46d0103..1d73fd5790f 100644
--- a/src/bin/psql/print.c
+++ b/src/bin/psql/print.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2010, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/print.c,v 1.124 2010/03/01 21:27:26 heikki Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/print.c,v 1.125 2010/05/08 16:39:52 tgl Exp $
  */
 #include "postgres_fe.h"
 
@@ -252,6 +252,20 @@ format_numeric_locale(const char *my_str)
 }
 
 
+/*
+ * fputnbytes: print exactly N bytes to a file
+ *
+ * Think not to use fprintf with a %.*s format for this.  Some machines
+ * believe %s's precision is measured in characters, others in bytes.
+ */
+static void
+fputnbytes(FILE *f, const char *str, size_t n)
+{
+	while (n-- > 0)
+		fputc(*str++, f);
+}
+
+
 /*************************/
 /* Unaligned text		 */
 /*************************/
@@ -913,14 +927,16 @@ print_aligned_text(const printTableContent *cont, FILE *fout)
 					{
 						/* spaces first */
 						fprintf(fout, "%*s", width_wrap[j] - chars_to_output, "");
-						fprintf(fout, "%.*s", bytes_to_output,
-								this_line->ptr + bytes_output[j]);
+						fputnbytes(fout,
+								   this_line->ptr + bytes_output[j],
+								   bytes_to_output);
 					}
 					else	/* Left aligned cell */
 					{
 						/* spaces second */
-						fprintf(fout, "%.*s", bytes_to_output,
-								this_line->ptr + bytes_output[j]);
+						fputnbytes(fout,
+								   this_line->ptr + bytes_output[j],
+								   bytes_to_output);
 					}
 
 					bytes_output[j] += bytes_to_output;
diff --git a/src/interfaces/ecpg/ecpglib/error.c b/src/interfaces/ecpg/ecpglib/error.c
index ea48f082dca..5451fd29819 100644
--- a/src/interfaces/ecpg/ecpglib/error.c
+++ b/src/interfaces/ecpg/ecpglib/error.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/src/interfaces/ecpg/ecpglib/error.c,v 1.25 2010/03/08 12:15:24 meskes Exp $ */
+/* $PostgreSQL: pgsql/src/interfaces/ecpg/ecpglib/error.c,v 1.26 2010/05/08 16:39:52 tgl Exp $ */
 
 #define POSTGRES_ECPG_INTERNAL
 #include "postgres_fe.h"
@@ -332,6 +332,7 @@ ecpg_raise_backend(int line, PGresult *result, PGconn *conn, int compat)
 	else
 		sqlca->sqlcode = ECPG_PGSQL;
 
+	/* %.*s is safe here as long as sqlstate is all-ASCII */
 	ecpg_log("raising sqlstate %.*s (sqlcode %d): %s\n",
 			 sizeof(sqlca->sqlstate), sqlca->sqlstate, sqlca->sqlcode, sqlca->sqlerrm.sqlerrmc);
 
diff --git a/src/interfaces/ecpg/pgtypeslib/dt_common.c b/src/interfaces/ecpg/pgtypeslib/dt_common.c
index 9fb6357ddaf..dc5fbe7fd96 100644
--- a/src/interfaces/ecpg/pgtypeslib/dt_common.c
+++ b/src/interfaces/ecpg/pgtypeslib/dt_common.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/src/interfaces/ecpg/pgtypeslib/dt_common.c,v 1.51 2009/06/11 14:49:13 momjian Exp $ */
+/* $PostgreSQL: pgsql/src/interfaces/ecpg/pgtypeslib/dt_common.c,v 1.52 2010/05/08 16:39:52 tgl Exp $ */
 
 #include "postgres_fe.h"
 
@@ -855,6 +855,14 @@ EncodeDateTime(struct tm * tm, fsec_t fsec, int *tzp, char **tzn, int style, cha
 			if (tm->tm_year <= 0)
 				sprintf(str + strlen(str), " BC");
 
+			/*
+			 * Note: the uses of %.*s in this function would be unportable
+			 * if the timezone names ever contain non-ASCII characters,
+			 * since some platforms think the string length is measured
+			 * in characters not bytes.  However, all TZ abbreviations in
+			 * the Olson database are plain ASCII.
+			 */
+
 			if (tzp != NULL && tm->tm_isdst >= 0)
 			{
 				if (*tzn != NULL)
diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index 5176cf56251..096e0d84a7a 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -23,7 +23,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.141 2010/01/02 16:58:12 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.142 2010/05/08 16:39:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,6 +67,20 @@ static int pqSocketCheck(PGconn *conn, int forRead, int forWrite,
 static int	pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time);
 
 
+/*
+ * fputnbytes: print exactly N bytes to a file
+ *
+ * Think not to use fprintf with a %.*s format for this.  Some machines
+ * believe %s's precision is measured in characters, others in bytes.
+ */
+static void
+fputnbytes(FILE *f, const char *str, size_t n)
+{
+	while (n-- > 0)
+		fputc(*str++, f);
+}
+
+
 /*
  * pqGetc: get 1 character from the connection
  *
@@ -187,8 +201,11 @@ pqGetnchar(char *s, size_t len, PGconn *conn)
 	conn->inCursor += len;
 
 	if (conn->Pfdebug)
-		fprintf(conn->Pfdebug, "From backend (%lu)> %.*s\n",
-				(unsigned long) len, (int) len, s);
+	{
+		fprintf(conn->Pfdebug, "From backend (%lu)> ", (unsigned long) len);
+		fputnbytes(conn->Pfdebug, s, len);
+		fprintf(conn->Pfdebug, "\n");
+	}
 
 	return 0;
 }
@@ -204,7 +221,11 @@ pqPutnchar(const char *s, size_t len, PGconn *conn)
 		return EOF;
 
 	if (conn->Pfdebug)
-		fprintf(conn->Pfdebug, "To backend> %.*s\n", (int) len, s);
+	{
+		fprintf(conn->Pfdebug, "To backend> ");
+		fputnbytes(conn->Pfdebug, s, len);
+		fprintf(conn->Pfdebug, "\n");
+	}
 
 	return 0;
 }
-- 
GitLab