From ed437e2b27c48219a78f3504b0d05c17c2082d02 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 9 May 2010 02:16:00 +0000
Subject: [PATCH] Adjust comments about avoiding use of printf's %.*s. My
 initial impression that glibc was measuring the precision in characters
 (which is what the Linux man page says it does) was incorrect.  It does take
 the precision to be in bytes, but it also tries to truncate the string at a
 character boundary.  The bottom line remains the same: it will mess up if the
 string is not in the encoding it expects, so we need to avoid %.*s anytime
 there's a significant risk of that.  Previous code changes are still good,
 but adjust the comments to reflect this knowledge.  Per research by Hernan
 Gonzalez.

---
 src/backend/parser/scansup.c               |  6 +++---
 src/backend/tsearch/wparser_def.c          |  9 ++++-----
 src/backend/utils/adt/datetime.c           | 10 ++++------
 src/bin/psql/print.c                       |  6 +++---
 src/interfaces/ecpg/pgtypeslib/dt_common.c | 10 ++++------
 src/interfaces/libpq/fe-misc.c             |  6 +++---
 6 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 417c79dd14e..94082f77a04 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.40 2010/05/08 16:39:49 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.41 2010/05/09 02:15:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -178,8 +178,8 @@ truncate_identifier(char *ident, int len, bool warn)
 		if (warn)
 		{
 			/*
-			 * Cannot use %.*s here because some machines interpret %s's
-			 * precision in characters, others in bytes.
+			 * We avoid using %.*s here because it can misbehave if the data
+			 * is not valid in what libc thinks is the prevailing encoding.
 			 */
 			char	buf[NAMEDATALEN];
 
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index d2e47ceaf5e..cda28103c24 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.31 2010/05/08 16:39:49 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.32 2010/05/09 02:15:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -323,10 +323,9 @@ TParserInit(char *str, int len)
 
 #ifdef WPARSER_TRACE
 	/*
-	 * Use of %.*s here is not portable when the string contains multibyte
-	 * characters: some machines interpret the length in characters, others
-	 * in bytes.  Since it's only a debugging aid, we haven't bothered to
-	 * fix this.
+	 * Use of %.*s here is a bit risky since it can misbehave if the data
+	 * is not in what libc thinks is the prevailing encoding.  However,
+	 * since this is just a debugging aid, we choose to live with that.
 	 */
 	fprintf(stderr, "parsing \"%.*s\"\n", len, str);
 #endif
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 743ca8345d6..65d2b875f07 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/datetime.c,v 1.211 2010/05/08 16:39:51 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/datetime.c,v 1.212 2010/05/09 02:15:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3741,11 +3741,9 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, int *tzp, char **tzn, int style,
 			AppendTimestampSeconds(str + strlen(str), tm, fsec);
 
 			/*
-			 * Note: the uses of %.*s in this function would be unportable
-			 * if the timezone names ever contain non-ASCII characters,
-			 * since some platforms think the string length is measured
-			 * in characters not bytes.  However, all TZ abbreviations in
-			 * the Olson database are plain ASCII.
+			 * Note: the uses of %.*s in this function would be risky if the
+			 * timezone names ever contain non-ASCII characters.  However, all
+			 * TZ abbreviations in the Olson database are plain ASCII.
 			 */
 
 			if (tzp != NULL && tm->tm_isdst >= 0)
diff --git a/src/bin/psql/print.c b/src/bin/psql/print.c
index 1d73fd5790f..86e8a09a13b 100644
--- a/src/bin/psql/print.c
+++ b/src/bin/psql/print.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2010, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/print.c,v 1.125 2010/05/08 16:39:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/print.c,v 1.126 2010/05/09 02:15:59 tgl Exp $
  */
 #include "postgres_fe.h"
 
@@ -255,8 +255,8 @@ format_numeric_locale(const char *my_str)
 /*
  * fputnbytes: print exactly N bytes to a file
  *
- * Think not to use fprintf with a %.*s format for this.  Some machines
- * believe %s's precision is measured in characters, others in bytes.
+ * We avoid using %.*s here because it can misbehave if the data
+ * is not valid in what libc thinks is the prevailing encoding.
  */
 static void
 fputnbytes(FILE *f, const char *str, size_t n)
diff --git a/src/interfaces/ecpg/pgtypeslib/dt_common.c b/src/interfaces/ecpg/pgtypeslib/dt_common.c
index dc5fbe7fd96..52bb5d2ca17 100644
--- a/src/interfaces/ecpg/pgtypeslib/dt_common.c
+++ b/src/interfaces/ecpg/pgtypeslib/dt_common.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/src/interfaces/ecpg/pgtypeslib/dt_common.c,v 1.52 2010/05/08 16:39:52 tgl Exp $ */
+/* $PostgreSQL: pgsql/src/interfaces/ecpg/pgtypeslib/dt_common.c,v 1.53 2010/05/09 02:15:59 tgl Exp $ */
 
 #include "postgres_fe.h"
 
@@ -856,11 +856,9 @@ EncodeDateTime(struct tm * tm, fsec_t fsec, int *tzp, char **tzn, int style, cha
 				sprintf(str + strlen(str), " BC");
 
 			/*
-			 * Note: the uses of %.*s in this function would be unportable
-			 * if the timezone names ever contain non-ASCII characters,
-			 * since some platforms think the string length is measured
-			 * in characters not bytes.  However, all TZ abbreviations in
-			 * the Olson database are plain ASCII.
+			 * Note: the uses of %.*s in this function would be risky if the
+			 * timezone names ever contain non-ASCII characters.  However, all
+			 * TZ abbreviations in the Olson database are plain ASCII.
 			 */
 
 			if (tzp != NULL && tm->tm_isdst >= 0)
diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index 096e0d84a7a..f76168ccd44 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -23,7 +23,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.142 2010/05/08 16:39:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.143 2010/05/09 02:16:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -70,8 +70,8 @@ static int	pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time);
 /*
  * fputnbytes: print exactly N bytes to a file
  *
- * Think not to use fprintf with a %.*s format for this.  Some machines
- * believe %s's precision is measured in characters, others in bytes.
+ * We avoid using %.*s here because it can misbehave if the data
+ * is not valid in what libc thinks is the prevailing encoding.
  */
 static void
 fputnbytes(FILE *f, const char *str, size_t n)
-- 
GitLab