From 572fda27118f7e54c7e4ebb75b48d33896e5f776 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Thu, 8 Mar 2001 00:24:34 +0000 Subject: [PATCH] Modify wchar conversion routines to not fetch the next byte past the end of a counted input string. Marinos Yannikos' recent crash report turns out to be due to applying pg_ascii2wchar_with_len to a TEXT object that is smack up against the end of memory. This is the second just-barely- reproducible bug report I have seen that traces to some bit of code fetching one more byte than it is allowed to. Let's be more careful out there, boys and girls. While at it, I changed the code to not risk a similar crash when there is a truncated multibyte character at the end of an input string. The output in this case might not be the most reasonable output possible; if anyone wants to improve it further, step right up... --- src/backend/utils/mb/mbutils.c | 8 ++--- src/backend/utils/mb/wchar.c | 60 ++++++++++++++++------------------ 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 0d3d8cb69bd..2abae59d62b 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -3,7 +3,7 @@ * client encoding and server internal encoding. * (currently mule internal code (mic) is used) * Tatsuo Ishii - * $Id: mbutils.c,v 1.15 2001/02/10 02:31:27 tgl Exp $ + * $Id: mbutils.c,v 1.16 2001/03/08 00:24:34 tgl Exp $ */ #include "postgres.h" @@ -230,7 +230,7 @@ pg_mbstrlen_with_len(const unsigned char *mbstr, int limit) int len = 0; int l; - while (*mbstr && limit > 0) + while (limit > 0 && *mbstr) { l = pg_mblen(mbstr); limit -= l; @@ -252,7 +252,7 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit) int clen = 0; int l; - while (*mbstr && len > 0) + while (len > 0 && *mbstr) { l = pg_mblen(mbstr); if ((clen + l) > limit) @@ -267,7 +267,7 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit) } /* - * fuctions for utils/init + * functions for utils/init */ static int DatabaseEncoding = MULTIBYTE; diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index a4bf1131ad2..6d10cad020a 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multi-byte streams. * Tatsuo Ishii - * $Id: wchar.c,v 1.15 2001/02/11 01:59:22 ishii Exp $ + * $Id: wchar.c,v 1.16 2001/03/08 00:24:34 tgl Exp $ * * WIN1250 client encoding updated by Pavel Behal * @@ -27,7 +27,7 @@ static int pg_ascii2wchar_with_len { int cnt = 0; - while (*from && len > 0) + while (len > 0 && *from) { *to++ = *from++; len--; @@ -52,23 +52,22 @@ static int pg_euc2wchar_with_len { int cnt = 0; - while (*from && len > 0) + while (len > 0 && *from) { - if (*from == SS2) + if (*from == SS2 && len >= 2) { from++; - len--; *to = 0xff & *from++; - len--; + len -= 2; } - else if (*from == SS3) + else if (*from == SS3 && len >= 3) { from++; *to = *from++ << 8; *to |= 0x3f & *from++; len -= 3; } - else if (*from & 0x80) + else if ((*from & 0x80) && len >= 2) { *to = *from++ << 8; *to |= *from++; @@ -140,24 +139,23 @@ static int pg_euccn2wchar_with_len { int cnt = 0; - while (*from && len > 0) + while (len > 0 && *from) { - if (*from == SS2) + if (*from == SS2 && len >= 3) { from++; - len--; *to = 0x3f00 & (*from++ << 8); *to = *from++; - len -= 2; + len -= 3; } - else if (*from == SS3) + else if (*from == SS3 && len >= 3) { from++; *to = *from++ << 8; *to |= 0x3f & *from++; len -= 3; } - else if (*from & 0x80) + else if ((*from & 0x80) && len >= 2) { *to = *from++ << 8; *to |= *from++; @@ -195,25 +193,24 @@ static int pg_euctw2wchar_with_len { int cnt = 0; - while (*from && len > 0) + while (len > 0 && *from) { - if (*from == SS2) + if (*from == SS2 && len >= 4) { from++; - len--; *to = *from++ << 16; *to |= *from++ << 8; *to |= *from++; - len -= 3; + len -= 4; } - else if (*from == SS3) + else if (*from == SS3 && len >= 3) { from++; *to = *from++ << 8; *to |= 0x3f & *from++; len -= 3; } - else if (*from & 0x80) + else if ((*from & 0x80) && len >= 2) { *to = *from++ << 8; *to |= *from++; @@ -261,30 +258,30 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar * to, int len) c3; int cnt = 0; - while (*from && len > 0) + while (len > 0 && *from) { if ((*from & 0x80) == 0) { *to = *from++; len--; } - else if ((*from & 0xe0) == 0xc0) + else if ((*from & 0xe0) == 0xc0 && len >= 2) { c1 = *from++ & 0x1f; c2 = *from++ & 0x3f; - len -= 2; *to = c1 << 6; *to |= c2; + len -= 2; } - else if ((*from & 0xe0) == 0xe0) + else if ((*from & 0xe0) == 0xe0 && len >= 3) { c1 = *from++ & 0x0f; c2 = *from++ & 0x3f; c3 = *from++ & 0x3f; - len -= 3; *to = c1 << 12; *to |= c2 << 6; *to |= c3; + len -= 3; } else { @@ -326,29 +323,29 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar * to, int len) { int cnt = 0; - while (*from && len > 0) + while (len > 0 && *from) { - if (IS_LC1(*from)) + if (IS_LC1(*from) && len >= 2) { *to = *from++ << 16; *to |= *from++; len -= 2; } - else if (IS_LCPRV1(*from)) + else if (IS_LCPRV1(*from) && len >= 3) { from++; *to = *from++ << 16; *to |= *from++; len -= 3; } - else if (IS_LC2(*from)) + else if (IS_LC2(*from) && len >= 3) { *to = *from++ << 16; *to |= *from++ << 8; *to |= *from++; len -= 3; } - else if (IS_LCPRV2(*from)) + else if (IS_LCPRV2(*from) && len >= 4) { from++; *to = *from++ << 16; @@ -396,9 +393,10 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar * to, int len) { int cnt = 0; - while (*from && len-- > 0) + while (len > 0 && *from) { *to++ = *from++; + len--; cnt++; } *to = 0; -- GitLab