Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
postgres-lambda-diff
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jakob Huber
postgres-lambda-diff
Commits
4ea4f8bd
Commit
4ea4f8bd
authored
20 years ago
by
Bruce Momjian
Browse files
Options
Downloads
Patches
Plain Diff
Fix for Unicode characters above 0x10000.
John Hansen
parent
917c8bb4
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/backend/utils/mb/wchar.c
+72
-44
72 additions, 44 deletions
src/backend/utils/mb/wchar.c
src/include/mb/pg_wchar.h
+11
-1
11 additions, 1 deletion
src/include/mb/pg_wchar.h
with
83 additions
and
45 deletions
src/backend/utils/mb/wchar.c
+
72
−
44
View file @
4ea4f8bd
/*
/*
* conversion functions between pg_wchar and multibyte streams.
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
* Tatsuo Ishii
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.3
8
2004/
09/17 21:59:57 petere
Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.3
9
2004/
12/02 22:37:13 momjian
Exp $
*
*
* WIN1250 client encoding updated by Pavel Behal
* WIN1250 client encoding updated by Pavel Behal
*
*
...
@@ -343,6 +343,31 @@ pg_johab_dsplen(const unsigned char *s)
...
@@ -343,6 +343,31 @@ pg_johab_dsplen(const unsigned char *s)
return
(
pg_euc_dsplen
(
s
));
return
(
pg_euc_dsplen
(
s
));
}
}
bool
isLegalUTF8
(
const
UTF8
*
source
,
int
len
)
{
UTF8
a
;
const
UTF8
*
srcptr
=
source
+
len
;
if
(
!
source
||
(
pg_utf_mblen
(
source
)
!=
len
))
return
false
;
switch
(
len
)
{
default:
return
false
;
/* Everything else falls through when "true"... */
case
6
:
if
((
a
=
(
*--
srcptr
))
<
0x80
||
a
>
0xBF
)
return
false
;
case
5
:
if
((
a
=
(
*--
srcptr
))
<
0x80
||
a
>
0xBF
)
return
false
;
case
4
:
if
((
a
=
(
*--
srcptr
))
<
0x80
||
a
>
0xBF
)
return
false
;
case
3
:
if
((
a
=
(
*--
srcptr
))
<
0x80
||
a
>
0xBF
)
return
false
;
case
2
:
if
((
a
=
(
*--
srcptr
))
>
0xBF
)
return
false
;
switch
(
*
source
)
{
/* no fall-through in this inner switch */
case
0xE0
:
if
(
a
<
0xA0
)
return
false
;
break
;
case
0xF0
:
if
(
a
<
0x90
)
return
false
;
break
;
case
0xF4
:
if
(
a
>
0x8F
)
return
false
;
break
;
default:
if
(
a
<
0x80
)
return
false
;
}
case
1
:
if
(
*
source
>=
0x80
&&
*
source
<
0xC2
)
return
false
;
if
(
*
source
>
0xFD
)
return
false
;
}
return
true
;
}
/*
/*
* convert UTF-8 string to pg_wchar (UCS-2)
* convert UTF-8 string to pg_wchar (UCS-2)
* caller should allocate enough space for "to"
* caller should allocate enough space for "to"
...
@@ -398,7 +423,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
...
@@ -398,7 +423,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
* returns the byte length of a UTF-8 word pointed to by s
* returns the byte length of a UTF-8 word pointed to by s
*/
*/
int
int
pg_utf_mblen
(
const
unsigned
char
*
s
)
pg_utf_mblen
(
const
UTF8
*
s
)
{
{
int
len
=
1
;
int
len
=
1
;
...
@@ -406,13 +431,19 @@ pg_utf_mblen(const unsigned char *s)
...
@@ -406,13 +431,19 @@ pg_utf_mblen(const unsigned char *s)
len
=
1
;
len
=
1
;
else
if
((
*
s
&
0xe0
)
==
0xc0
)
else
if
((
*
s
&
0xe0
)
==
0xc0
)
len
=
2
;
len
=
2
;
else
if
((
*
s
&
0x
e
0
)
==
0xe0
)
else
if
((
*
s
&
0x
f
0
)
==
0xe0
)
len
=
3
;
len
=
3
;
else
if
((
*
s
&
0xf8
)
==
0xf0
)
len
=
4
;
else
if
((
*
s
&
0xfc
)
==
0xf8
)
len
=
5
;
else
if
((
*
s
&
0xfe
)
==
0xfc
)
len
=
6
;
return
(
len
);
return
(
len
);
}
}
static
int
static
int
pg_utf_dsplen
(
const
unsigned
char
*
s
)
pg_utf_dsplen
(
const
UTF8
*
s
)
{
{
return
1
;
/* XXX fix me! */
return
1
;
/* XXX fix me! */
}
}
...
@@ -721,7 +752,7 @@ pg_wchar_tbl pg_wchar_table[] = {
...
@@ -721,7 +752,7 @@ pg_wchar_tbl pg_wchar_table[] = {
{
pg_euckr2wchar_with_len
,
pg_euckr_mblen
,
pg_euckr_dsplen
,
3
},
/* 3; PG_EUC_KR */
{
pg_euckr2wchar_with_len
,
pg_euckr_mblen
,
pg_euckr_dsplen
,
3
},
/* 3; PG_EUC_KR */
{
pg_euctw2wchar_with_len
,
pg_euctw_mblen
,
pg_euctw_dsplen
,
3
},
/* 4; PG_EUC_TW */
{
pg_euctw2wchar_with_len
,
pg_euctw_mblen
,
pg_euctw_dsplen
,
3
},
/* 4; PG_EUC_TW */
{
pg_johab2wchar_with_len
,
pg_johab_mblen
,
pg_johab_dsplen
,
3
},
/* 5; PG_JOHAB */
{
pg_johab2wchar_with_len
,
pg_johab_mblen
,
pg_johab_dsplen
,
3
},
/* 5; PG_JOHAB */
{
pg_utf2wchar_with_len
,
pg_utf_mblen
,
pg_utf_dsplen
,
3
},
/* 6; PG_UNICODE */
{
pg_utf2wchar_with_len
,
pg_utf_mblen
,
pg_utf_dsplen
,
6
},
/* 6; PG_UNICODE */
{
pg_mule2wchar_with_len
,
pg_mule_mblen
,
pg_mule_dsplen
,
3
},
/* 7; PG_MULE_INTERNAL */
{
pg_mule2wchar_with_len
,
pg_mule_mblen
,
pg_mule_dsplen
,
3
},
/* 7; PG_MULE_INTERNAL */
{
pg_latin12wchar_with_len
,
pg_latin1_mblen
,
pg_latin1_dsplen
,
1
},
/* 8; PG_LATIN1 */
{
pg_latin12wchar_with_len
,
pg_latin1_mblen
,
pg_latin1_dsplen
,
1
},
/* 8; PG_LATIN1 */
{
pg_latin12wchar_with_len
,
pg_latin1_mblen
,
pg_latin1_dsplen
,
1
},
/* 9; PG_LATIN2 */
{
pg_latin12wchar_with_len
,
pg_latin1_mblen
,
pg_latin1_dsplen
,
1
},
/* 9; PG_LATIN2 */
...
@@ -822,18 +853,15 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
...
@@ -822,18 +853,15 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
while
(
len
>
0
&&
*
mbstr
)
while
(
len
>
0
&&
*
mbstr
)
{
{
/* special UTF-8 check */
if
(
encoding
==
PG_UTF8
&&
(
*
mbstr
&
0xf8
)
==
0xf0
)
{
if
(
noError
)
return
false
;
ereport
(
ERROR
,
(
errcode
(
ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
errmsg
(
"Unicode characters greater than or equal to 0x10000 are not supported"
)));
}
l
=
pg_mblen
(
mbstr
);
l
=
pg_mblen
(
mbstr
);
/* special UTF-8 check */
if
(
encoding
==
PG_UTF8
)
{
if
(
!
isLegalUTF8
(
mbstr
,
l
))
{
if
(
noError
)
return
false
;
ereport
(
ERROR
,(
errcode
(
ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
errmsg
(
"Invalid UNICODE byte sequence detected near character %c"
,
*
mbstr
)));
}
}
else
{
for
(
i
=
1
;
i
<
l
;
i
++
)
for
(
i
=
1
;
i
<
l
;
i
++
)
{
{
/*
/*
...
@@ -863,10 +891,10 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
...
@@ -863,10 +891,10 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
}
}
}
}
}
len
-=
l
;
len
-=
l
;
mbstr
+=
l
;
mbstr
+=
l
;
}
}
return
true
;
return
true
;
}
}
...
...
This diff is collapsed.
Click to expand it.
src/include/mb/pg_wchar.h
+
11
−
1
View file @
4ea4f8bd
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.5
3
2004/12/02 22:
14:38
momjian Exp $ */
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.5
4
2004/12/02 22:
37:14
momjian Exp $ */
#ifndef PG_WCHAR_H
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
#define PG_WCHAR_H
...
@@ -17,6 +17,14 @@
...
@@ -17,6 +17,14 @@
*/
*/
typedef
unsigned
int
pg_wchar
;
typedef
unsigned
int
pg_wchar
;
/*
* The UTF types
*/
typedef
unsigned
int
UTF32
;
/* at least 32 bits */
typedef
unsigned
short
UTF16
;
/* at least 16 bits */
typedef
unsigned
char
UTF8
;
/* typically 8 bits */
/*
/*
* various definitions for EUC
* various definitions for EUC
*/
*/
...
@@ -340,4 +348,6 @@ extern void mic2latin(unsigned char *mic, unsigned char *p, int len, int lc);
...
@@ -340,4 +348,6 @@ extern void mic2latin(unsigned char *mic, unsigned char *p, int len, int lc);
extern
void
latin2mic_with_table
(
unsigned
char
*
l
,
unsigned
char
*
p
,
int
len
,
int
lc
,
unsigned
char
*
tab
);
extern
void
latin2mic_with_table
(
unsigned
char
*
l
,
unsigned
char
*
p
,
int
len
,
int
lc
,
unsigned
char
*
tab
);
extern
void
mic2latin_with_table
(
unsigned
char
*
mic
,
unsigned
char
*
p
,
int
len
,
int
lc
,
unsigned
char
*
tab
);
extern
void
mic2latin_with_table
(
unsigned
char
*
mic
,
unsigned
char
*
p
,
int
len
,
int
lc
,
unsigned
char
*
tab
);
extern
bool
isLegalUTF8
(
const
UTF8
*
source
,
int
len
);
#endif
/* PG_WCHAR_H */
#endif
/* PG_WCHAR_H */
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment