From 5e00913dafba41cec619446e1901f737d335e416 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 13 Mar 2008 18:31:56 +0000
Subject: [PATCH] Fix varstr_cmp's special case for UTF8 encoding on Windows so
 that strings that are reported as "equal" by wcscoll() are checked to see if
 they really are bitwise equal, and are sorted per strcmp() if not.  We made
 this happen a couple of years ago in the regular code path, but it
 unaccountably got left out of the Windows/UTF8 case (probably brain fade on
 my part at the time).  As in the prior set of changes, affected users may
 need to reindex indexes on textual columns.

Backpatch as far as 8.2, which is the oldest release we are still supporting
on Windows.
---
 src/backend/utils/adt/varlena.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 52adbbb163f..75832856b6c 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.162 2008/01/01 19:45:53 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.163 2008/03/13 18:31:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1003,6 +1003,19 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 				ereport(ERROR,
 						(errmsg("could not compare Unicode strings: %m")));
 
+			/*
+			 * In some locales wcscoll() can claim that nonidentical strings
+			 * are equal.  Believing that would be bad news for a number of
+			 * reasons, so we follow Perl's lead and sort "equal" strings
+			 * according to strcmp (on the UTF-8 representation).
+			 */
+			if (result == 0)
+			{
+				result = strncmp(arg1, arg2, Min(len1, len2));
+				if ((result == 0) && (len1 != len2))
+					result = (len1 < len2) ? -1 : 1;
+			}
+
 			if (a1p != a1buf)
 				pfree(a1p);
 			if (a2p != a2buf)
-- 
GitLab