From b6159202c99d4021fb078cede90b26f94883143d Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 23 Jun 2017 12:22:06 -0400
Subject: [PATCH] Fix memory leakage in ICU encoding conversion, and other code
 review.

Callers of icu_to_uchar() neglected to pfree the result string when done
with it.  This results in catastrophic memory leaks in varstr_cmp(),
because of our prevailing assumption that btree comparison functions don't
leak memory.  For safety, make all the call sites clean up leaks, though
I suspect that we could get away without it in formatting.c.  I audited
callers of icu_from_uchar() as well, but found no places that seemed to
have a comparable issue.

Add function API specifications for icu_to_uchar() and icu_from_uchar();
the lack of any thought-through specification is perhaps not unrelated
to the existence of this bug in the first place.  Fix icu_to_uchar()
to guarantee a nul-terminated result; although no existing caller appears
to care, the fact that it would have been nul-terminated except in
extreme corner cases seems ideally designed to bite someone on the rear
someday.  Fix ucnv_fromUChars() destCapacity argument --- in the worst
case, that could perhaps have led to a non-nul-terminated result, too.
Fix icu_from_uchar() to have a more reasonable definition of the function
result --- no callers are actually paying attention, so this isn't a live
bug, but it's certainly sloppily designed.  Const-ify icu_from_uchar()'s
input string for consistency.

That is not the end of what needs to be done to these functions, but
it's as much as I have the patience for right now.

Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
---
 src/backend/commands/collationcmds.c | 15 +++++++++---
 src/backend/utils/adt/formatting.c   |  3 +++
 src/backend/utils/adt/pg_locale.c    | 35 ++++++++++++++++++++++++----
 src/backend/utils/adt/varlena.c      | 16 +++++++++++--
 src/include/utils/pg_locale.h        |  2 +-
 5 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index bdfd73906bb..1a62b3e30aa 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -381,6 +381,10 @@ normalize_libc_locale_name(char *new, const char *old)
 
 
 #ifdef USE_ICU
+/*
+ * Get the ICU language tag for a locale name.
+ * The result is a palloc'd string.
+ */
 static char *
 get_icu_language_tag(const char *localename)
 {
@@ -397,7 +401,10 @@ get_icu_language_tag(const char *localename)
 	return pstrdup(buf);
 }
 
-
+/*
+ * Get a comment (specifically, the display name) for an ICU locale.
+ * The result is a palloc'd string.
+ */
 static char *
 get_icu_locale_comment(const char *localename)
 {
@@ -407,10 +414,12 @@ get_icu_locale_comment(const char *localename)
 	char	   *result;
 
 	status = U_ZERO_ERROR;
-	len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
+	len_uchar = uloc_getDisplayName(localename, "en",
+									&displayname[0], sizeof(displayname),
+									&status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
-				(errmsg("could get display name for locale \"%s\": %s",
+				(errmsg("could not get display name for locale \"%s\": %s",
 						localename, u_errorName(status))));
 
 	icu_from_uchar(&result, displayname, len_uchar);
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 31f04ff5ed5..46f45f66541 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1561,6 +1561,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 			len_conv = icu_convert_case(u_strToLower, mylocale,
 										&buff_conv, buff_uchar, len_uchar);
 			icu_from_uchar(&result, buff_conv, len_conv);
+			pfree(buff_uchar);
 		}
 		else
 #endif
@@ -1684,6 +1685,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 			len_conv = icu_convert_case(u_strToUpper, mylocale,
 										&buff_conv, buff_uchar, len_uchar);
 			icu_from_uchar(&result, buff_conv, len_conv);
+			pfree(buff_uchar);
 		}
 		else
 #endif
@@ -1808,6 +1810,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 			len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
 										&buff_conv, buff_uchar, len_uchar);
 			icu_from_uchar(&result, buff_conv, len_conv);
+			pfree(buff_uchar);
 		}
 		else
 #endif
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index ad3fe74bbd6..0f5ec954c3a 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1486,6 +1486,18 @@ init_icu_converter(void)
 	icu_converter = conv;
 }
 
+/*
+ * Convert a string in the database encoding into a string of UChars.
+ *
+ * The source string at buff is of length nbytes
+ * (it needn't be nul-terminated)
+ *
+ * *buff_uchar receives a pointer to the palloc'd result string, and
+ * the function's result is the number of UChars generated.
+ *
+ * The result string is nul-terminated, though most callers rely on the
+ * result length instead.
+ */
 int32_t
 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
 {
@@ -1494,18 +1506,30 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
 
 	init_icu_converter();
 
-	len_uchar = 2 * nbytes;		/* max length per docs */
+	len_uchar = 2 * nbytes + 1; /* max length per docs */
 	*buff_uchar = palloc(len_uchar * sizeof(**buff_uchar));
 	status = U_ZERO_ERROR;
-	len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status);
+	len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar,
+							  buff, nbytes, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
 	return len_uchar;
 }
 
+/*
+ * Convert a string of UChars into the database encoding.
+ *
+ * The source string at buff_uchar is of length len_uchar
+ * (it needn't be nul-terminated)
+ *
+ * *result receives a pointer to the palloc'd result string, and the
+ * function's result is the number of bytes generated (not counting nul).
+ *
+ * The result string is nul-terminated.
+ */
 int32_t
-icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
+icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
 {
 	UErrorCode	status;
 	int32_t		len_result;
@@ -1515,13 +1539,14 @@ icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
 	len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter));
 	*result = palloc(len_result + 1);
 	status = U_ZERO_ERROR;
-	ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status);
+	len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
+								 buff_uchar, len_uchar, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
 	return len_result;
 }
-#endif
+#endif							/* USE_ICU */
 
 /*
  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 2407394b068..ebfb823fb8c 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1569,6 +1569,9 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
 					result = ucol_strcoll(mylocale->info.icu.ucol,
 										  uchar1, ulen1,
 										  uchar2, ulen2);
+
+					pfree(uchar1);
+					pfree(uchar2);
 				}
 #else							/* not USE_ICU */
 				/* shouldn't happen */
@@ -2155,6 +2158,9 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 				result = ucol_strcoll(sss->locale->info.icu.ucol,
 									  uchar1, ulen1,
 									  uchar2, ulen2);
+
+				pfree(uchar1);
+				pfree(uchar2);
 			}
 #else							/* not USE_ICU */
 			/* shouldn't happen */
@@ -2279,7 +2285,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 		Size		bsize;
 #ifdef USE_ICU
 		int32_t		ulen = -1;
-		UChar	   *uchar;
+		UChar	   *uchar = NULL;
 #endif
 
 		/*
@@ -2354,7 +2360,8 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 												 &status);
 					if (U_FAILURE(status))
 						ereport(ERROR,
-								(errmsg("sort key generation failed: %s", u_errorName(status))));
+								(errmsg("sort key generation failed: %s",
+										u_errorName(status))));
 				}
 				else
 					bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
@@ -2394,6 +2401,11 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 		 * okay.  See remarks on bytea case above.)
 		 */
 		memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
+
+#ifdef USE_ICU
+		if (uchar)
+			pfree(uchar);
+#endif
 	}
 
 	/*
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 5e3711f0088..a02d27ba266 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -93,7 +93,7 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol
 
 #ifdef USE_ICU
 extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
-extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar);
+extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
 #endif
 
 /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
-- 
GitLab