From 1b658473eaa411cc722f0eeb0c576a5f459ca360 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <peter_e@gmx.net> Date: Sat, 18 Feb 2006 16:15:23 +0000 Subject: [PATCH] Add support for Windows codepages 1253, 1254, 1255, and 1257 and clean up a bunch of the support utilities. In src/backend/utils/mb/Unicode remove nearly duplicate copies of the UCS_to_XXX perl script and replace with one version to handle all generic files. Update the Makefile so that it knows about all the map files. This produces a slight difference in some of the map files, using a uniform naming convention and not mapping the null character. In src/backend/utils/mb/conversion_procs create a master utf8<->win codepage function like the ISO 8859 versions instead of having a separate handler for each conversion. There is an externally visible change in the name of the win1258 to utf8 conversion. According to the documentation notes, it was named incorrectly and this changes it to a standard name. Running the Unicode mapping perl scripts has shown some additional mapping changes in koi8r and iso8859-7. --- doc/src/sgml/charset.sgml | 54 +++++- doc/src/sgml/func.sgml | 26 ++- src/backend/utils/mb/README | 12 +- src/backend/utils/mb/Unicode/Makefile | 91 ++++++---- src/backend/utils/mb/Unicode/UCS_to_8859.pl | 110 ------------ src/backend/utils/mb/Unicode/UCS_to_BIG5.pl | 111 ------------- src/backend/utils/mb/Unicode/UCS_to_GBK.pl | 112 ------------- src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl | 111 ------------- src/backend/utils/mb/Unicode/UCS_to_UHC.pl | 111 ------------- src/backend/utils/mb/Unicode/UCS_to_WIN874.pl | 111 ------------- .../utils/mb/Unicode/UCS_to_cyrillic.pl | 112 ------------- .../{UCS_to_WINX.pl => UCS_to_most.pl} | 64 +++++-- .../utils/mb/Unicode/iso8859_7_to_utf8.map | 5 +- .../utils/mb/Unicode/koi8r_to_utf8.map | 16 +- .../utils/mb/Unicode/utf8_to_iso8859_7.map | 7 +- .../utils/mb/Unicode/utf8_to_koi8r.map | 18 +- .../utils/mb/Unicode/utf8_to_win1250.map | 3 +- .../utils/mb/Unicode/utf8_to_win1251.map | 4 +- .../utils/mb/Unicode/utf8_to_win1252.map | 3 +- .../utils/mb/Unicode/utf8_to_win1253.map | 113 +++++++++++++ .../utils/mb/Unicode/utf8_to_win1254.map | 123 ++++++++++++++ .../utils/mb/Unicode/utf8_to_win1255.map | 107 ++++++++++++ .../utils/mb/Unicode/utf8_to_win1257.map | 118 +++++++++++++ .../utils/mb/Unicode/utf8_to_win1258.map | 3 +- .../utils/mb/Unicode/utf8_to_win866.map | 2 +- .../utils/mb/Unicode/utf8_to_win874.map | 3 +- .../utils/mb/Unicode/win1250_to_utf8.map | 7 +- .../utils/mb/Unicode/win1251_to_utf8.map | 5 +- .../utils/mb/Unicode/win1252_to_utf8.map | 7 +- .../utils/mb/Unicode/win1253_to_utf8.map | 113 +++++++++++++ .../utils/mb/Unicode/win1254_to_utf8.map | 123 ++++++++++++++ .../utils/mb/Unicode/win1255_to_utf8.map | 107 ++++++++++++ .../utils/mb/Unicode/win1257_to_utf8.map | 118 +++++++++++++ .../utils/mb/Unicode/win1258_to_utf8.map | 11 +- .../utils/mb/Unicode/win874_to_utf8.map | 35 +--- .../utils/mb/conversion_procs/Makefile | 44 +++-- .../utf8_and_cyrillic/utf8_and_cyrillic.c | 85 +--------- .../Makefile | 6 +- .../utf8_and_win/utf8_and_win.c | 156 ++++++++++++++++++ .../utf8_and_win1250/Makefile | 12 -- .../utf8_and_win1250/utf8_and_win1250.c | 69 -------- .../utf8_and_win1252/Makefile | 12 -- .../utf8_and_win1252/utf8_and_win1252.c | 69 -------- .../utf8_and_win1256/Makefile | 12 -- .../utf8_and_win1256/utf8_and_win1256.c | 69 -------- .../utf8_and_win1258/Makefile | 12 -- .../utf8_and_win1258/utf8_and_win1258.c | 68 -------- .../utf8_and_win874/utf8_and_win874.c | 69 -------- src/backend/utils/mb/encnames.c | 38 ++++- src/backend/utils/mb/wchar.c | 16 +- src/bin/initdb/initdb.c | 6 +- src/include/mb/pg_wchar.h | 8 +- src/test/regress/expected/conversion.out | 108 +++++++++++- src/test/regress/sql/conversion.sql | 28 +++- 54 files changed, 1527 insertions(+), 1436 deletions(-) delete mode 100755 src/backend/utils/mb/Unicode/UCS_to_8859.pl delete mode 100755 src/backend/utils/mb/Unicode/UCS_to_BIG5.pl delete mode 100644 src/backend/utils/mb/Unicode/UCS_to_GBK.pl delete mode 100644 src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl delete mode 100644 src/backend/utils/mb/Unicode/UCS_to_UHC.pl delete mode 100644 src/backend/utils/mb/Unicode/UCS_to_WIN874.pl delete mode 100644 src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl rename src/backend/utils/mb/Unicode/{UCS_to_WINX.pl => UCS_to_most.pl} (56%) create mode 100644 src/backend/utils/mb/Unicode/utf8_to_win1253.map create mode 100644 src/backend/utils/mb/Unicode/utf8_to_win1254.map create mode 100644 src/backend/utils/mb/Unicode/utf8_to_win1255.map create mode 100644 src/backend/utils/mb/Unicode/utf8_to_win1257.map create mode 100644 src/backend/utils/mb/Unicode/win1253_to_utf8.map create mode 100644 src/backend/utils/mb/Unicode/win1254_to_utf8.map create mode 100644 src/backend/utils/mb/Unicode/win1255_to_utf8.map create mode 100644 src/backend/utils/mb/Unicode/win1257_to_utf8.map rename src/backend/utils/mb/conversion_procs/{utf8_and_win874 => utf8_and_win}/Makefile (69%) create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1250/Makefile delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1256/Makefile delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1258/Makefile delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1258/utf8_and_win1258.c delete mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index aee43acfd11..c25f72a73eb 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.75 2005/11/04 23:13:59 petere Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.76 2006/02/18 16:15:21 petere Exp $ --> <chapter id="charset"> <title>Localization</> @@ -571,6 +571,27 @@ initdb --locale=sv_SE <entry>1</entry> <entry></entry> </row> + <row> + <entry><literal>WIN1253</literal></entry> + <entry>Windows CP1253</entry> + <entry>Greek</entry> + <entry>1</entry> + <entry></entry> + </row> + <row> + <entry><literal>WIN1254</literal></entry> + <entry>Windows CP1254</entry> + <entry>Turkish</entry> + <entry>1</entry> + <entry></entry> + </row> + <row> + <entry><literal>WIN1255</literal></entry> + <entry>Windows CP1255</entry> + <entry>Hebrew</entry> + <entry>1</entry> + <entry></entry> + </row> <row> <entry><literal>WIN1256</literal></entry> <entry>Windows CP1256</entry> @@ -578,6 +599,13 @@ initdb --locale=sv_SE <entry>1</entry> <entry></entry> </row> + <row> + <entry><literal>WIN1257</literal></entry> + <entry>Windows CP1257</entry> + <entry>Baltic</entry> + <entry>1</entry> + <entry></entry> + </row> <row> <entry><literal>WIN1258</literal></entry> <entry>Windows CP1258</entry> @@ -952,12 +980,36 @@ $ <userinput>psql -l</userinput> <literal>UTF8</literal> </entry> </row> + <row> + <entry><literal>WIN1253</literal></entry> + <entry><emphasis>WIN1253</emphasis>, + <literal>UTF8</literal> + </entry> + </row> + <row> + <entry><literal>WIN1254</literal></entry> + <entry><emphasis>WIN1254</emphasis>, + <literal>UTF8</literal> + </entry> + </row> + <row> + <entry><literal>WIN1255</literal></entry> + <entry><emphasis>WIN1255</emphasis>, + <literal>UTF8</literal> + </entry> + </row> <row> <entry><literal>WIN1256</literal></entry> <entry><emphasis>WIN1256</emphasis>, <literal>UTF8</literal> </entry> </row> + <row> + <entry><literal>WIN1257</literal></entry> + <entry><emphasis>WIN1257</emphasis>, + <literal>UTF8</literal> + </entry> + </row> <row> <entry><literal>WIN1258</literal></entry> <entry><emphasis>WIN1258</emphasis>, diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 8ab070c7c60..8f0975af703 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.306 2006/02/12 04:44:15 momjian Exp $ +$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.307 2006/02/18 16:15:21 petere Exp $ PostgreSQL documentation --> @@ -2172,12 +2172,36 @@ PostgreSQL documentation <entry><literal>WIN1252</literal></entry> </row> + <row> + <entry><literal>utf8_to_windows_1253</literal></entry> + <entry><literal>UTF8</literal></entry> + <entry><literal>WIN1253</literal></entry> + </row> + + <row> + <entry><literal>utf8_to_windows_1254</literal></entry> + <entry><literal>UTF8</literal></entry> + <entry><literal>WIN1254</literal></entry> + </row> + + <row> + <entry><literal>utf8_to_windows_1255</literal></entry> + <entry><literal>UTF8</literal></entry> + <entry><literal>WIN1255</literal></entry> + </row> + <row> <entry><literal>utf8_to_windows_1256</literal></entry> <entry><literal>UTF8</literal></entry> <entry><literal>WIN1256</literal></entry> </row> + <row> + <entry><literal>utf8_to_windows_1257</literal></entry> + <entry><literal>UTF8</literal></entry> + <entry><literal>WIN1257</literal></entry> + </row> + <row> <entry><literal>utf8_to_windows_866</literal></entry> <entry><literal>UTF8</literal></entry> diff --git a/src/backend/utils/mb/README b/src/backend/utils/mb/README index 8ee732ca048..d106b3120bc 100644 --- a/src/backend/utils/mb/README +++ b/src/backend/utils/mb/README @@ -1,15 +1,11 @@ -common.c: public functions for both the backend and the frontend. - requires conv.c and wchar.c +encnames.c: public functions for both the backend and the frontend. conv.c: static functions and a public table for code conversion wchar.c: mostly static functions and a public table for mb string and multibyte conversion -mbutilc.c: public functions for the backend only. +mbutils.c: public functions for the backend only. requires conv.c and wchar.c wstrcmp.c: strcmp for mb wstrncmp.c: strncmp for mb -alt.c: a tool to generate KOI8 <--> CP866 conversion table +win866.c: a tool to generate KOI8 <--> CP866 conversion table iso.c: a tool to generate KOI8 <--> ISO8859-5 conversion table -win.c: a tool to generate KOI8 <--> CP1251 conversion table -big5.c: conversion between BIG5 and Mule Internal Code(CNS 116643-1992 - plane 1 and plane 2). -utftest.c: test driver for utf2wchar() +win1251.c: a tool to generate KOI8 <--> CP1251 conversion table diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile index ccf5ddedbdf..b98b00d1f99 100644 --- a/src/backend/utils/mb/Unicode/Makefile +++ b/src/backend/utils/mb/Unicode/Makefile @@ -4,7 +4,7 @@ # # Copyright (c) 2001-2005, PostgreSQL Global Development Group # -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.9 2005/03/07 04:30:52 momjian Exp $ +# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.10 2006/02/18 16:15:22 petere Exp $ # #------------------------------------------------------------------------- @@ -12,36 +12,64 @@ subdir = src/backend/utils/mb/Unicode top_builddir = ../../../../.. include $(top_builddir)/src/Makefile.global -ISO8859MAPS=iso8859_2_to_utf8.map iso8859_3_to_utf8.map \ - iso8859_4_to_utf8.map iso8859_5_to_utf8.map \ - utf8_to_iso8859_2.map utf8_to_iso8859_3.map \ - utf8_to_iso8859_4.map utf8_to_iso8859_5.map - - -CYRILLICMAPS=koi8r_to_utf8.map win1251_to_utf8.map win866_to_utf8.map\ - utf8_to_koi8r.map utf8_to_win1251.map utf8_to_win866.map - -MAPS= $(ISO8859MAPS) $(CYRILLICMAPS)\ - big5_to_utf8.map euc_cn_to_utf8.map euc_jp_to_utf8.map \ - euc_kr_to_utf8.map euc_tw_to_utf8.map sjis_to_utf8.map \ - utf8_to_big5.map utf8_to_euc_cn.map utf8_to_euc_jp.map \ - utf8_to_euc_kr.map utf8_to_euc_tw.map utf8_to_iso8859_2.map \ - utf8_to_sjis.map gb18030_to_utf8.map utf8_to_gb18030.map - -ISO8859TEXTS= 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT -CYRILLICTEXTS=cp866.txt cp1251.txt koi8-r.txt - -TEXTS=$(ISO8859TEXTS) $(CYRILLICTEXTS) \ - BIG5.TXT CNS11643.TXT GB2312.TXT \ - JIS0201.TXT JIS0208.TXT JIS0212.TXT \ - OLD5601.TXT SHIFTJIS.TXT ISO10646-GB18030.TXT +ISO8859MAPS = iso8859_2_to_utf8.map utf8_to_iso8859_2.map \ + iso8859_3_to_utf8.map utf8_to_iso8859_3.map \ + iso8859_4_to_utf8.map utf8_to_iso8859_4.map \ + iso8859_5_to_utf8.map utf8_to_iso8859_5.map \ + iso8859_6_to_utf8.map utf8_to_iso8859_6.map \ + iso8859_7_to_utf8.map utf8_to_iso8859_7.map \ + iso8859_8_to_utf8.map utf8_to_iso8859_8.map \ + iso8859_9_to_utf8.map utf8_to_iso8859_9.map \ + iso8859_10_to_utf8.map utf8_to_iso8859_10.map \ + iso8859_13_to_utf8.map utf8_to_iso8859_13.map \ + iso8859_14_to_utf8.map utf8_to_iso8859_14.map \ + iso8859_15_to_utf8.map utf8_to_iso8859_15.map \ + iso8859_16_to_utf8.map utf8_to_iso8859_16.map + +WINMAPS = win866_to_utf8.map utf8_to_win866.map \ + win874_to_utf8.map utf8_to_win874.map \ + win1250_to_utf8.map utf8_to_win1250.map \ + win1251_to_utf8.map utf8_to_win1251.map \ + win1252_to_utf8.map utf8_to_win1252.map \ + win1253_to_utf8.map utf8_to_win1253.map \ + win1254_to_utf8.map utf8_to_win1254.map \ + win1255_to_utf8.map utf8_to_win1255.map \ + win1256_to_utf8.map utf8_to_win1256.map \ + win1257_to_utf8.map utf8_to_win1257.map \ + win1258_to_utf8.map utf8_to_win1258.map + +GENERICMAPS = $(ISO8859MAPS) $(WINMAPS) \ + big5_to_utf8.map utf8_to_big5.map \ + johab_to_utf8.map utf8_to_johab.map \ + uhc_to_utf8.map utf8_to_uhc.map \ + gbk_to_utf8.map utf8_to_gbk.map \ + koi8r_to_utf8.map utf8_to_koi8r.map + +SPECIALMAPS = euc_cn_to_utf8.map utf8_to_euc_cn.map \ + euc_jp_to_utf8.map utf8_to_euc_jp.map \ + euc_kr_to_utf8.map utf8_to_euc_kr.map \ + euc_tw_to_utf8.map utf8_to_euc_tw.map \ + sjis_to_utf8.map utf8_to_sjis.map \ + gb18030_to_utf8.map utf8_to_gb18030.map + +MAPS = $(GENERICMAPS) $(SPECIALMAPS) + +ISO8859TEXTS = 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT \ + 8859-6.TXT 8859-7.TXT 8859-8.TXT 8859-9.TXT \ + 8859-10.TXT 8859-13.TXT 8859-14.TXT 8859-15.TXT \ + 8859-16.TXT + +WINTEXTS = CP866.TXT CP874.TXT CP1250.TXT CP1251.TXT \ + CP1252.TXT CP1253.TXT CP1254.TXT CP1255.TXT \ + CP1256.TXT CP1257.TXT CP1258.TXT + +GENERICTEXTS = $(ISO8859TEXTS) $(WINTEXTS) \ + KOI8-R.TXT CP936.TXT CP949.TXT JOHAB.TXT BIG5.TXT all: $(MAPS) -$(ISO8859MAPS) : $(ISO8859TEXTS) - ./UCS_to_8859.pl -$(CYRILLICMAPS) : $(CYRILLICTEXTS) - ./UCS_to_cyrillic.pl +$(GENERICMAPS) : $(GENERICTEXTS) + ./UCS_to_most.pl euc_jp_to_utf8.map utf8_to_euc_jp.map : JIS0201.TXT JIS0208.TXT JIS0212.TXT ./UCS_to_EUC_JP.pl @@ -49,18 +77,15 @@ euc_jp_to_utf8.map utf8_to_euc_jp.map : JIS0201.TXT JIS0208.TXT JIS0212.TXT euc_cn_to_utf8.map utf8_to_euc_cn.map : GB2312.TXT ./UCS_to_EUC_CN.pl -euc_kr_to_utf8.map utf8_to_euc_kr.map : OLD5601.TXT +euc_kr_to_utf8.map utf8_to_euc_kr.map : KSX1001.TXT ./UCS_to_EUC_KR.pl euc_tw_to_utf8.map utf8_to_euc_tw.map : CNS11643.TXT ./UCS_to_EUC_TW.pl -sjis_to_utf8.map utf8_to_sjis.map : SHIFTJIS.TXT +sjis_to_utf8.map utf8_to_sjis.map : CP932.TXT ./UCS_to_SJIS.pl -big5_to_utf8.map utf8_to_big5.map : BIG5.TXT - ./UCS_to_BIG5.pl - gb18030_to_utf8.map utf8_to_gb18030.map : ISO10646-GB18030.TXT ./UCS_to_GB18030.pl clean: diff --git a/src/backend/utils/mb/Unicode/UCS_to_8859.pl b/src/backend/utils/mb/Unicode/UCS_to_8859.pl deleted file mode 100755 index 131b97129d5..00000000000 --- a/src/backend/utils/mb/Unicode/UCS_to_8859.pl +++ /dev/null @@ -1,110 +0,0 @@ -#! /usr/bin/perl -# -# Copyright (c) 2001-2005, PostgreSQL Global Development Group -# -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_8859.pl,v 1.8 2005/03/07 04:30:52 momjian Exp $ -# -# Generate UTF-8 <--> ISO8859 code conversion tables from -# map files provided by Unicode organization. -# Unfortunately it is prohibited by the organization -# to distribute the map files. So if you try to use this script, -# you have to obtain "8859-[2-16].TXT" from the organization's ftp site. -# We assume the file include three tab-separated columns: -# ISO/IEC 8859 code in hex -# UCS-2 code in hex -# # and Unicode name (not used in this script) - -require "ucs2utf.pl"; - -@charsets = (2,3,4,5,6,7,8,9,10,13,14,15,16); -foreach $charset (@charsets) { - -# -# first, generate UTF8->ISO8859 table -# - $in_file = "8859-${charset}.TXT"; - - open( FILE, $in_file ) || die( "cannot open $in_file" ); - - reset 'array'; - - while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - $array{ $utf } = $code; - } - } - close( FILE ); - - $file = "utf8_to_iso8859_${charset}.map"; - open( FILE, "> $file" ) || die( "cannot open $file" ); - print FILE "static pg_utf_to_local ULmapISO8859_${charset}[ $count ] = {\n"; - - for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $code; - } - } - - print FILE "};\n"; - close(FILE); - -# -# then generate ISO885->UTF8 table -# - open( FILE, $in_file ) || die( "cannot open $in_file" ); - - reset 'array'; - - while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if($code >= 0x80){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - $array{ $code } = $utf; - } - } - close( FILE ); - - $file = "iso8859_${charset}_to_utf8.map"; - open( FILE, "> $file" ) || die( "cannot open $file" ); - print FILE "static pg_local_to_utf LUmapISO8859_${charset}[ $count ] = {\n"; - for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; - } - } - - print FILE "};\n"; - close(FILE); -} diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl deleted file mode 100755 index 72e9f2c7a6e..00000000000 --- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl +++ /dev/null @@ -1,111 +0,0 @@ -#! /usr/bin/perl -# -# Copyright (c) 2001-2005, PostgreSQL Global Development Group -# -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl,v 1.7 2005/03/07 04:30:52 momjian Exp $ -# -# Generate UTF-8 <--> BIG5 code conversion tables from -# map files provided by Unicode organization. -# Unfortunately it is prohibited by the organization -# to distribute the map files. So if you try to use this script, -# you have to obtain OLD5601.TXT from -# the organization's ftp site. -# -# OLD5601.TXT format: -# KSC5601 code in hex -# UCS-2 code in hex -# # and Unicode name (not used in this script) - -require "ucs2utf.pl"; - -# first generate UTF-8 --> BIG5 table - -$in_file = "BIG5.TXT"; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - - $array{ $utf } = $code; - } -} -close( FILE ); - -# -# first, generate UTF8 --> BIG5 table -# - -$file = "utf8_to_big5.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_utf_to_local ULmapBIG5[ $count ] = {\n"; - -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $code; - } -} - -print FILE "};\n"; -close(FILE); - -# -# then generate EUC_JP --> UTF8 table -# -reset 'array'; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; - next; - } - $count++; - - $array{ $code } = $utf; - } -} -close( FILE ); - -$file = "big5_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_local_to_utf LUmapBIG5[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; - } -} - -print FILE "};\n"; -close(FILE); diff --git a/src/backend/utils/mb/Unicode/UCS_to_GBK.pl b/src/backend/utils/mb/Unicode/UCS_to_GBK.pl deleted file mode 100644 index b8bc7eaaf42..00000000000 --- a/src/backend/utils/mb/Unicode/UCS_to_GBK.pl +++ /dev/null @@ -1,112 +0,0 @@ -#! /usr/bin/perl -# -# Copyright (c) 2001-2005, PostgreSQL Global Development Group -# -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_GBK.pl,v 1.6 2005/03/07 04:30:52 momjian Exp $ -# -# -# Generate UTF-8 <--> GBK code conversion tables from -# map files provided by Unicode organization. -# Unfortunately it is prohibited by the organization -# to distribute the map files. So if you try to use this script, -# you have to obtain CP936.TXT from -# the organization's ftp site. -# -# CP936.TXT format: -# GBK code in hex -# UCS-2 code in hex -# # and Unicode name (not used in this script) - -require "ucs2utf.pl"; - -# first generate UTF-8 --> GBK table - -$in_file = "CP936.TXT"; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - - $array{ $utf } = $code; - } -} -close( FILE ); - -# -# first, generate UTF8 --> WIN949 table -# - -$file = "utf8_to_gbk.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_utf_to_local ULmapGBK[ $count ] = {\n"; - -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $code; - } -} - -print FILE "};\n"; -close(FILE); - -# -# then generate WIN936 --> UTF8 table -# -reset 'array'; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; - next; - } - $count++; - - $array{ $code } = $utf; - } -} -close( FILE ); - -$file = "gbk_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_local_to_utf LUmapGBK[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; - } -} - -print FILE "};\n"; -close(FILE); diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl deleted file mode 100644 index 6dbc5174c09..00000000000 --- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl +++ /dev/null @@ -1,111 +0,0 @@ -#! /usr/bin/perl -# -# Copyright (c) 2001-2005, PostgreSQL Global Development Group -# -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl,v 1.6 2005/03/07 04:30:52 momjian Exp $ -# -# Generate UTF-8 <--> JOHAB code conversion tables from -# map files provided by Unicode organization. -# Unfortunately it is prohibited by the organization -# to distribute the map files. So if you try to use this script, -# you have to obtain JOHAB.TXT from -# the organization's ftp site. -# -# JOHAB.TXT format: -# JOHAB code in hex -# UCS-2 code in hex -# # and Unicode name (not used in this script) - -require "ucs2utf.pl"; - -# first generate UTF-8 --> JOHAB table - -$in_file = "JOHAB.TXT"; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - - $array{ $utf } = $code; - } -} -close( FILE ); - -# -# first, generate UTF8 --> JOHAB table -# - -$file = "utf8_to_johab.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_utf_to_local ULmapJOHAB[ $count ] = {\n"; - -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $code; - } -} - -print FILE "};\n"; -close(FILE); - -# -# then generate JOHAB --> UTF8 table -# -reset 'array'; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; - next; - } - $count++; - - $array{ $code } = $utf; - } -} -close( FILE ); - -$file = "johab_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_local_to_utf LUmapJOHAB[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; - } -} - -print FILE "};\n"; -close(FILE); diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl deleted file mode 100644 index de518c46765..00000000000 --- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl +++ /dev/null @@ -1,111 +0,0 @@ -#! /usr/bin/perl -# -# Copyright (c) 2001-2005, PostgreSQL Global Development Group -# -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_UHC.pl,v 1.6 2005/03/07 04:30:52 momjian Exp $ -# -# Generate UTF-8 <--> BIG5 code conversion tables from -# map files provided by Unicode organization. -# Unfortunately it is prohibited by the organization -# to distribute the map files. So if you try to use this script, -# you have to obtain OLD5601.TXT from -# the organization's ftp site. -# -# CP949.TXT format: -# UHC code in hex -# UCS-2 code in hex -# # and Unicode name (not used in this script) - -require "ucs2utf.pl"; - -# first generate UTF-8 --> WIN949 table - -$in_file = "CP949.TXT"; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - - $array{ $utf } = $code; - } -} -close( FILE ); - -# -# first, generate UTF8 --> UHC table -# - -$file = "utf8_to_uhc.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_utf_to_local ULmapUHC[ $count ] = {\n"; - -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $code; - } -} - -print FILE "};\n"; -close(FILE); - -# -# then generate UHC --> UTF8 table -# -reset 'array'; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; - next; - } - $count++; - - $array{ $code } = $utf; - } -} -close( FILE ); - -$file = "uhc_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_local_to_utf LUmapUHC[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; - } -} - -print FILE "};\n"; -close(FILE); diff --git a/src/backend/utils/mb/Unicode/UCS_to_WIN874.pl b/src/backend/utils/mb/Unicode/UCS_to_WIN874.pl deleted file mode 100644 index b53ca9f0480..00000000000 --- a/src/backend/utils/mb/Unicode/UCS_to_WIN874.pl +++ /dev/null @@ -1,111 +0,0 @@ -#! /usr/bin/perl -# -# Copyright (c) 2001-2005, PostgreSQL Global Development Group -# -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_WIN874.pl,v 1.6 2005/03/07 04:30:52 momjian Exp $ -# -# Generate UTF-8 <--> WIN874 code conversion tables from -# map files provided by Unicode organization. -# Unfortunately it is prohibited by the organization -# to distribute the map files. So if you try to use this script, -# you have to obtain OLD5601.TXT from -# the organization's ftp site. -# -# OLD5601.TXT format: -# KSC5601 code in hex -# UCS-2 code in hex -# # and Unicode name (not used in this script) - -require "ucs2utf.pl"; - -# first generate UTF-8 --> WIN949 table - -$in_file = "CP874.TXT"; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - - $array{ $utf } = $code; - } -} -close( FILE ); - -# -# first, generate UTF8 --> WIN874 table -# - -$file = "utf8_to_win874.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_utf_to_local ULmapWIN874[ $count ] = {\n"; - -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $code; - } -} - -print FILE "};\n"; -close(FILE); - -# -# then generate WIN874 --> UTF8 table -# -reset 'array'; - -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; - next; - } - $count++; - - $array{ $code } = $utf; - } -} -close( FILE ); - -$file = "win874_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); -print FILE "static pg_local_to_utf LUmapWIN874[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; - } -} - -print FILE "};\n"; -close(FILE); diff --git a/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl b/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl deleted file mode 100644 index f4969594482..00000000000 --- a/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl +++ /dev/null @@ -1,112 +0,0 @@ -#! /usr/bin/perl -# -# Copyright (c) 2001-2005, PostgreSQL Global Development Group -# -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl,v 1.7 2005/03/07 04:30:52 momjian Exp $ -# -# Generate UTF-8 <--> ISO8859 code conversion tables from -# map files provided by Unicode organization. -# Unfortunately it is prohibited by the organization -# to distribute the map files. So if you try to use this script, -# you have to obtain "8859-[2-5].TXT" from the organization's ftp site. -# We assume the file include three tab-separated columns: -# ISO/IEC 8859 code in hex -# UCS-2 code in hex -# # and Unicode name (not used in this script) - -require "ucs2utf.pl"; -%filename = ('KOI8R'=>'koi8-r.txt', - 'WIN1251'=>'cp1251.txt', - 'WIN866'=>'cp866.txt'); -@charsets = ('KOI8R','WIN866','WIN1251'); -foreach $charset (@charsets) { - -# -# first, generate UTF8->ISO8859 table -# - $in_file = $filename{$charset}; - - open( FILE, $in_file ) || die( "cannot open $in_file" ); - - reset 'array'; - - while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - $array{ $utf } = $code; - } - } - close( FILE ); - - $file = "utf8_to_${charset}.map"; - open( FILE, "> $file" ) || die( "cannot open $file" ); - print FILE "static pg_utf_to_local ULmap_${charset}[ $count ] = {\n"; - - for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $code; - } - } - - print FILE "};\n"; - close(FILE); - -# -# then generate ISO885->UTF8 table -# - open( FILE, $in_file ) || die( "cannot open $in_file" ); - - reset 'array'; - - while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if($code >= 0x80){ - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; - next; - } - $count++; - $array{ $code } = $utf; - } - } - close( FILE ); - - $file = "${charset}_to_utf8.map"; - open( FILE, "> $file" ) || die( "cannot open $file" ); - print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n"; - for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; - $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { - printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; - } - } - - print FILE "};\n"; - close(FILE); -} diff --git a/src/backend/utils/mb/Unicode/UCS_to_WINX.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl similarity index 56% rename from src/backend/utils/mb/Unicode/UCS_to_WINX.pl rename to src/backend/utils/mb/Unicode/UCS_to_most.pl index 156507ff552..b9c95d89a75 100644 --- a/src/backend/utils/mb/Unicode/UCS_to_WINX.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl @@ -2,28 +2,58 @@ # # Copyright (c) 2001-2005, PostgreSQL Global Development Group # -# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_WINX.pl,v 1.6 2005/03/07 04:30:52 momjian Exp $ +# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.1 2006/02/18 16:15:22 petere Exp $ # -# Generate UTF-8 <--> WINX code conversion tables from +# Generate UTF-8 <--> character code conversion tables from # map files provided by Unicode organization. # Unfortunately it is prohibited by the organization # to distribute the map files. So if you try to use this script, -# you have to obtain "8859-[2-5].TXT" from the organization's ftp site. +# you have to obtain the map files from the organization's ftp site. +# ftp://www.unicode.org/Public/MAPPINGS/ # We assume the file include three tab-separated columns: -# ISO/IEC 8859 code in hex +# source character set code in hex # UCS-2 code in hex # # and Unicode name (not used in this script) require "ucs2utf.pl"; -%filename = ('WIN1256'=>'CP1256.TXT', - 'WIN1258'=>'CP1258.TXT', - 'WIN874'=>'CP874.TXT', - 'WIN1250'=>'CP1250.TXT'); -@charsets = ('WIN1256','WIN1258','WIN874','WIN1250'); + +%filename = ( + 'WIN866' => 'CP866.TXT', + 'WIN874' => 'CP874.TXT', + 'WIN1250' => 'CP1250.TXT', + 'WIN1251' => 'CP1251.TXT', + 'WIN1252' => 'CP1252.TXT', + 'WIN1253' => 'CP1253.TXT', + 'WIN1254' => 'CP1254.TXT', + 'WIN1255' => 'CP1255.TXT', + 'WIN1256' => 'CP1256.TXT', + 'WIN1257' => 'CP1257.TXT', + 'WIN1258' => 'CP1258.TXT', + 'ISO8859_2' => '8859-2.TXT', + 'ISO8859_3' => '8859-3.TXT', + 'ISO8859_4' => '8859-4.TXT', + 'ISO8859_5' => '8859-5.TXT', + 'ISO8859_6' => '8859-6.TXT', + 'ISO8859_7' => '8859-7.TXT', + 'ISO8859_8' => '8859-8.TXT', + 'ISO8859_9' => '8859-9.TXT', + 'ISO8859_10' => '8859-10.TXT', + 'ISO8859_13' => '8859-13.TXT', + 'ISO8859_14' => '8859-14.TXT', + 'ISO8859_15' => '8859-15.TXT', + 'ISO8859_16' => '8859-16.TXT', + 'KOI8R' => 'KOI8-R.TXT', + 'GBK' => 'CP936.TXT', + 'UHC' => 'CP949.TXT', + 'JOHAB' => 'JOHAB.TXT', + 'BIG5' => 'BIG5.TXT', +); + +@charsets = keys(filename); foreach $charset (@charsets) { # -# first, generate UTF8->ISO8859 table +# first, generate UTF8-> charset table # $in_file = $filename{$charset}; @@ -39,7 +69,7 @@ foreach $charset (@charsets) { ( $c, $u, $rest ) = split; $ucs = hex($u); $code = hex($c); - if( $code >= 0x80){ + if( $code >= 0x80 && $ucs >= 0x0080){ $utf = &ucs2utf($ucs); if( $array{ $utf } ne "" ){ printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; @@ -51,9 +81,9 @@ foreach $charset (@charsets) { } close( FILE ); - $file = lower("utf8_to_${charset}.map"); + $file = lc("utf8_to_${charset}.map"); open( FILE, "> $file" ) || die( "cannot open $file" ); - print FILE "static pg_utf_to_local ULmap_${charset}[ $count ] = {\n"; + print FILE "static pg_utf_to_local ULmap${charset}[ $count ] = {\n"; for $index ( sort {$a <=> $b} keys( %array ) ){ $code = $array{ $index }; @@ -69,7 +99,7 @@ foreach $charset (@charsets) { close(FILE); # -# then generate ISO885->UTF8 table +# then generate character set code ->UTF8 table # open( FILE, $in_file ) || die( "cannot open $in_file" ); @@ -83,9 +113,9 @@ foreach $charset (@charsets) { ( $c, $u, $rest ) = split; $ucs = hex($u); $code = hex($c); - if($code >= 0x80){ + if($code >= 0x80 && $ucs >= 0x0080){ $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ + if( $array{ $code } ne "" ){ printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; next; } @@ -95,7 +125,7 @@ foreach $charset (@charsets) { } close( FILE ); - $file = lower("${charset}_to_utf8.map"); + $file = lc("${charset}_to_utf8.map"); open( FILE, "> $file" ) || die( "cannot open $file" ); print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n"; for $index ( sort {$a <=> $b} keys( %array ) ){ diff --git a/src/backend/utils/mb/Unicode/iso8859_7_to_utf8.map b/src/backend/utils/mb/Unicode/iso8859_7_to_utf8.map index 36b966dca4b..9f2db13fb2c 100644 --- a/src/backend/utils/mb/Unicode/iso8859_7_to_utf8.map +++ b/src/backend/utils/mb/Unicode/iso8859_7_to_utf8.map @@ -1,4 +1,4 @@ -static pg_local_to_utf LUmapISO8859_7[ 122 ] = { +static pg_local_to_utf LUmapISO8859_7[ 125 ] = { {0x0080, 0xc280}, {0x0081, 0xc281}, {0x0082, 0xc282}, @@ -35,10 +35,13 @@ static pg_local_to_utf LUmapISO8859_7[ 122 ] = { {0x00a1, 0xe28098}, {0x00a2, 0xe28099}, {0x00a3, 0xc2a3}, + {0x00a4, 0xe282ac}, + {0x00a5, 0xe282af}, {0x00a6, 0xc2a6}, {0x00a7, 0xc2a7}, {0x00a8, 0xc2a8}, {0x00a9, 0xc2a9}, + {0x00aa, 0xcdba}, {0x00ab, 0xc2ab}, {0x00ac, 0xc2ac}, {0x00ad, 0xc2ad}, diff --git a/src/backend/utils/mb/Unicode/koi8r_to_utf8.map b/src/backend/utils/mb/Unicode/koi8r_to_utf8.map index e8eaa7095f2..c02aec45fcb 100644 --- a/src/backend/utils/mb/Unicode/koi8r_to_utf8.map +++ b/src/backend/utils/mb/Unicode/koi8r_to_utf8.map @@ -35,32 +35,32 @@ static pg_local_to_utf LUmapKOI8R[ 128 ] = { {0x00a1, 0xe29591}, {0x00a2, 0xe29592}, {0x00a3, 0xd191}, - {0x00a4, 0xd194}, + {0x00a4, 0xe29593}, {0x00a5, 0xe29594}, - {0x00a6, 0xd196}, - {0x00a7, 0xd197}, + {0x00a6, 0xe29595}, + {0x00a7, 0xe29596}, {0x00a8, 0xe29597}, {0x00a9, 0xe29598}, {0x00aa, 0xe29599}, {0x00ab, 0xe2959a}, {0x00ac, 0xe2959b}, - {0x00ad, 0xd291}, + {0x00ad, 0xe2959c}, {0x00ae, 0xe2959d}, {0x00af, 0xe2959e}, {0x00b0, 0xe2959f}, {0x00b1, 0xe295a0}, {0x00b2, 0xe295a1}, {0x00b3, 0xd081}, - {0x00b4, 0xd084}, + {0x00b4, 0xe295a2}, {0x00b5, 0xe295a3}, - {0x00b6, 0xd086}, - {0x00b7, 0xd087}, + {0x00b6, 0xe295a4}, + {0x00b7, 0xe295a5}, {0x00b8, 0xe295a6}, {0x00b9, 0xe295a7}, {0x00ba, 0xe295a8}, {0x00bb, 0xe295a9}, {0x00bc, 0xe295aa}, - {0x00bd, 0xd290}, + {0x00bd, 0xe295ab}, {0x00be, 0xe295ac}, {0x00bf, 0xc2a9}, {0x00c0, 0xd18e}, diff --git a/src/backend/utils/mb/Unicode/utf8_to_iso8859_7.map b/src/backend/utils/mb/Unicode/utf8_to_iso8859_7.map index e46ebe18b03..8e4b7d0eb00 100644 --- a/src/backend/utils/mb/Unicode/utf8_to_iso8859_7.map +++ b/src/backend/utils/mb/Unicode/utf8_to_iso8859_7.map @@ -1,4 +1,4 @@ -static pg_utf_to_local ULmapISO8859_7[ 122 ] = { +static pg_utf_to_local ULmapISO8859_7[ 125 ] = { {0xc280, 0x0080}, {0xc281, 0x0081}, {0xc282, 0x0082}, @@ -47,6 +47,7 @@ static pg_utf_to_local ULmapISO8859_7[ 122 ] = { {0xc2b7, 0x00b7}, {0xc2bb, 0x00bb}, {0xc2bd, 0x00bd}, + {0xcdba, 0x00aa}, {0xce84, 0x00b4}, {0xce85, 0x00b5}, {0xce86, 0x00b6}, @@ -120,5 +121,7 @@ static pg_utf_to_local ULmapISO8859_7[ 122 ] = { {0xcf8e, 0x00fe}, {0xe28095, 0x00af}, {0xe28098, 0x00a1}, - {0xe28099, 0x00a2} + {0xe28099, 0x00a2}, + {0xe282ac, 0x00a4}, + {0xe282af, 0x00a5} }; diff --git a/src/backend/utils/mb/Unicode/utf8_to_koi8r.map b/src/backend/utils/mb/Unicode/utf8_to_koi8r.map index 2134ec280f1..97ab485e195 100644 --- a/src/backend/utils/mb/Unicode/utf8_to_koi8r.map +++ b/src/backend/utils/mb/Unicode/utf8_to_koi8r.map @@ -1,4 +1,4 @@ -static pg_utf_to_local ULmap_KOI8R[ 128 ] = { +static pg_utf_to_local ULmapKOI8R[ 128 ] = { {0xc2a0, 0x009a}, {0xc2a9, 0x00bf}, {0xc2b0, 0x009c}, @@ -6,9 +6,6 @@ static pg_utf_to_local ULmap_KOI8R[ 128 ] = { {0xc2b7, 0x009e}, {0xc3b7, 0x009f}, {0xd081, 0x00b3}, - {0xd084, 0x00b4}, - {0xd086, 0x00b6}, - {0xd087, 0x00b7}, {0xd090, 0x00e1}, {0xd091, 0x00e2}, {0xd092, 0x00f7}, @@ -74,11 +71,6 @@ static pg_utf_to_local ULmap_KOI8R[ 128 ] = { {0xd18e, 0x00c0}, {0xd18f, 0x00d1}, {0xd191, 0x00a3}, - {0xd194, 0x00a4}, - {0xd196, 0x00a6}, - {0xd197, 0x00a7}, - {0xd290, 0x00bd}, - {0xd291, 0x00ad}, {0xe28899, 0x0095}, {0xe2889a, 0x0096}, {0xe28988, 0x0097}, @@ -100,23 +92,31 @@ static pg_utf_to_local ULmap_KOI8R[ 128 ] = { {0xe29590, 0x00a0}, {0xe29591, 0x00a1}, {0xe29592, 0x00a2}, + {0xe29593, 0x00a4}, {0xe29594, 0x00a5}, + {0xe29595, 0x00a6}, + {0xe29596, 0x00a7}, {0xe29597, 0x00a8}, {0xe29598, 0x00a9}, {0xe29599, 0x00aa}, {0xe2959a, 0x00ab}, {0xe2959b, 0x00ac}, + {0xe2959c, 0x00ad}, {0xe2959d, 0x00ae}, {0xe2959e, 0x00af}, {0xe2959f, 0x00b0}, {0xe295a0, 0x00b1}, {0xe295a1, 0x00b2}, + {0xe295a2, 0x00b4}, {0xe295a3, 0x00b5}, + {0xe295a4, 0x00b6}, + {0xe295a5, 0x00b7}, {0xe295a6, 0x00b8}, {0xe295a7, 0x00b9}, {0xe295a8, 0x00ba}, {0xe295a9, 0x00bb}, {0xe295aa, 0x00bc}, + {0xe295ab, 0x00bd}, {0xe295ac, 0x00be}, {0xe29680, 0x008b}, {0xe29684, 0x008c}, diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1250.map b/src/backend/utils/mb/Unicode/utf8_to_win1250.map index 3c6f4320981..ef0381dc434 100644 --- a/src/backend/utils/mb/Unicode/utf8_to_win1250.map +++ b/src/backend/utils/mb/Unicode/utf8_to_win1250.map @@ -1,5 +1,4 @@ -static pg_utf_to_local ULmapWIN1250[ 124 ] = { - {0x0000, 0x0081}, +static pg_utf_to_local ULmapWIN1250[ 123 ] = { {0xc2a0, 0x00a0}, {0xc2a4, 0x00a4}, {0xc2a6, 0x00a6}, diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1251.map b/src/backend/utils/mb/Unicode/utf8_to_win1251.map index 434c3f88f61..e69fd6573e2 100644 --- a/src/backend/utils/mb/Unicode/utf8_to_win1251.map +++ b/src/backend/utils/mb/Unicode/utf8_to_win1251.map @@ -1,5 +1,4 @@ -static pg_utf_to_local ULmap_WIN1251[ 127 ] = { - {0x0000, 0x0088}, +static pg_utf_to_local ULmapWIN1251[ 127 ] = { {0xc2a0, 0x00a0}, {0xc2a4, 0x00a4}, {0xc2a6, 0x00a6}, @@ -124,6 +123,7 @@ static pg_utf_to_local ULmap_WIN1251[ 127 ] = { {0xe280b0, 0x0089}, {0xe280b9, 0x008b}, {0xe280ba, 0x009b}, + {0xe282ac, 0x0088}, {0xe28496, 0x00b9}, {0xe284a2, 0x0099} }; diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1252.map b/src/backend/utils/mb/Unicode/utf8_to_win1252.map index 6dc3c6c07e9..ba9594b74cf 100644 --- a/src/backend/utils/mb/Unicode/utf8_to_win1252.map +++ b/src/backend/utils/mb/Unicode/utf8_to_win1252.map @@ -1,5 +1,4 @@ -static pg_utf_to_local ULmapWIN1252[ 124 ] = { - {0x0000, 0x0081}, +static pg_utf_to_local ULmapWIN1252[ 123 ] = { {0xc2a0, 0x00a0}, {0xc2a1, 0x00a1}, {0xc2a2, 0x00a2}, diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1253.map b/src/backend/utils/mb/Unicode/utf8_to_win1253.map new file mode 100644 index 00000000000..a7961e95f12 --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_win1253.map @@ -0,0 +1,113 @@ +static pg_utf_to_local ULmapWIN1253[ 111 ] = { + {0xc2a0, 0x00a0}, + {0xc2a3, 0x00a3}, + {0xc2a4, 0x00a4}, + {0xc2a5, 0x00a5}, + {0xc2a6, 0x00a6}, + {0xc2a7, 0x00a7}, + {0xc2a8, 0x00a8}, + {0xc2a9, 0x00a9}, + {0xc2ab, 0x00ab}, + {0xc2ac, 0x00ac}, + {0xc2ad, 0x00ad}, + {0xc2ae, 0x00ae}, + {0xc2b0, 0x00b0}, + {0xc2b1, 0x00b1}, + {0xc2b2, 0x00b2}, + {0xc2b3, 0x00b3}, + {0xc2b5, 0x00b5}, + {0xc2b6, 0x00b6}, + {0xc2b7, 0x00b7}, + {0xc2bb, 0x00bb}, + {0xc2bd, 0x00bd}, + {0xc692, 0x0083}, + {0xce84, 0x00b4}, + {0xce85, 0x00a1}, + {0xce86, 0x00a2}, + {0xce88, 0x00b8}, + {0xce89, 0x00b9}, + {0xce8a, 0x00ba}, + {0xce8c, 0x00bc}, + {0xce8e, 0x00be}, + {0xce8f, 0x00bf}, + {0xce90, 0x00c0}, + {0xce91, 0x00c1}, + {0xce92, 0x00c2}, + {0xce93, 0x00c3}, + {0xce94, 0x00c4}, + {0xce95, 0x00c5}, + {0xce96, 0x00c6}, + {0xce97, 0x00c7}, + {0xce98, 0x00c8}, + {0xce99, 0x00c9}, + {0xce9a, 0x00ca}, + {0xce9b, 0x00cb}, + {0xce9c, 0x00cc}, + {0xce9d, 0x00cd}, + {0xce9e, 0x00ce}, + {0xce9f, 0x00cf}, + {0xcea0, 0x00d0}, + {0xcea1, 0x00d1}, + {0xcea3, 0x00d3}, + {0xcea4, 0x00d4}, + {0xcea5, 0x00d5}, + {0xcea6, 0x00d6}, + {0xcea7, 0x00d7}, + {0xcea8, 0x00d8}, + {0xcea9, 0x00d9}, + {0xceaa, 0x00da}, + {0xceab, 0x00db}, + {0xceac, 0x00dc}, + {0xcead, 0x00dd}, + {0xceae, 0x00de}, + {0xceaf, 0x00df}, + {0xceb0, 0x00e0}, + {0xceb1, 0x00e1}, + {0xceb2, 0x00e2}, + {0xceb3, 0x00e3}, + {0xceb4, 0x00e4}, + {0xceb5, 0x00e5}, + {0xceb6, 0x00e6}, + {0xceb7, 0x00e7}, + {0xceb8, 0x00e8}, + {0xceb9, 0x00e9}, + {0xceba, 0x00ea}, + {0xcebb, 0x00eb}, + {0xcebc, 0x00ec}, + {0xcebd, 0x00ed}, + {0xcebe, 0x00ee}, + {0xcebf, 0x00ef}, + {0xcf80, 0x00f0}, + {0xcf81, 0x00f1}, + {0xcf82, 0x00f2}, + {0xcf83, 0x00f3}, + {0xcf84, 0x00f4}, + {0xcf85, 0x00f5}, + {0xcf86, 0x00f6}, + {0xcf87, 0x00f7}, + {0xcf88, 0x00f8}, + {0xcf89, 0x00f9}, + {0xcf8a, 0x00fa}, + {0xcf8b, 0x00fb}, + {0xcf8c, 0x00fc}, + {0xcf8d, 0x00fd}, + {0xcf8e, 0x00fe}, + {0xe28093, 0x0096}, + {0xe28094, 0x0097}, + {0xe28095, 0x00af}, + {0xe28098, 0x0091}, + {0xe28099, 0x0092}, + {0xe2809a, 0x0082}, + {0xe2809c, 0x0093}, + {0xe2809d, 0x0094}, + {0xe2809e, 0x0084}, + {0xe280a0, 0x0086}, + {0xe280a1, 0x0087}, + {0xe280a2, 0x0095}, + {0xe280a6, 0x0085}, + {0xe280b0, 0x0089}, + {0xe280b9, 0x008b}, + {0xe280ba, 0x009b}, + {0xe282ac, 0x0080}, + {0xe284a2, 0x0099} +}; diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1254.map b/src/backend/utils/mb/Unicode/utf8_to_win1254.map new file mode 100644 index 00000000000..e1afbe8f40d --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_win1254.map @@ -0,0 +1,123 @@ +static pg_utf_to_local ULmapWIN1254[ 121 ] = { + {0xc2a0, 0x00a0}, + {0xc2a1, 0x00a1}, + {0xc2a2, 0x00a2}, + {0xc2a3, 0x00a3}, + {0xc2a4, 0x00a4}, + {0xc2a5, 0x00a5}, + {0xc2a6, 0x00a6}, + {0xc2a7, 0x00a7}, + {0xc2a8, 0x00a8}, + {0xc2a9, 0x00a9}, + {0xc2aa, 0x00aa}, + {0xc2ab, 0x00ab}, + {0xc2ac, 0x00ac}, + {0xc2ad, 0x00ad}, + {0xc2ae, 0x00ae}, + {0xc2af, 0x00af}, + {0xc2b0, 0x00b0}, + {0xc2b1, 0x00b1}, + {0xc2b2, 0x00b2}, + {0xc2b3, 0x00b3}, + {0xc2b4, 0x00b4}, + {0xc2b5, 0x00b5}, + {0xc2b6, 0x00b6}, + {0xc2b7, 0x00b7}, + {0xc2b8, 0x00b8}, + {0xc2b9, 0x00b9}, + {0xc2ba, 0x00ba}, + {0xc2bb, 0x00bb}, + {0xc2bc, 0x00bc}, + {0xc2bd, 0x00bd}, + {0xc2be, 0x00be}, + {0xc2bf, 0x00bf}, + {0xc380, 0x00c0}, + {0xc381, 0x00c1}, + {0xc382, 0x00c2}, + {0xc383, 0x00c3}, + {0xc384, 0x00c4}, + {0xc385, 0x00c5}, + {0xc386, 0x00c6}, + {0xc387, 0x00c7}, + {0xc388, 0x00c8}, + {0xc389, 0x00c9}, + {0xc38a, 0x00ca}, + {0xc38b, 0x00cb}, + {0xc38c, 0x00cc}, + {0xc38d, 0x00cd}, + {0xc38e, 0x00ce}, + {0xc38f, 0x00cf}, + {0xc391, 0x00d1}, + {0xc392, 0x00d2}, + {0xc393, 0x00d3}, + {0xc394, 0x00d4}, + {0xc395, 0x00d5}, + {0xc396, 0x00d6}, + {0xc397, 0x00d7}, + {0xc398, 0x00d8}, + {0xc399, 0x00d9}, + {0xc39a, 0x00da}, + {0xc39b, 0x00db}, + {0xc39c, 0x00dc}, + {0xc39f, 0x00df}, + {0xc3a0, 0x00e0}, + {0xc3a1, 0x00e1}, + {0xc3a2, 0x00e2}, + {0xc3a3, 0x00e3}, + {0xc3a4, 0x00e4}, + {0xc3a5, 0x00e5}, + {0xc3a6, 0x00e6}, + {0xc3a7, 0x00e7}, + {0xc3a8, 0x00e8}, + {0xc3a9, 0x00e9}, + {0xc3aa, 0x00ea}, + {0xc3ab, 0x00eb}, + {0xc3ac, 0x00ec}, + {0xc3ad, 0x00ed}, + {0xc3ae, 0x00ee}, + {0xc3af, 0x00ef}, + {0xc3b1, 0x00f1}, + {0xc3b2, 0x00f2}, + {0xc3b3, 0x00f3}, + {0xc3b4, 0x00f4}, + {0xc3b5, 0x00f5}, + {0xc3b6, 0x00f6}, + {0xc3b7, 0x00f7}, + {0xc3b8, 0x00f8}, + {0xc3b9, 0x00f9}, + {0xc3ba, 0x00fa}, + {0xc3bb, 0x00fb}, + {0xc3bc, 0x00fc}, + {0xc3bf, 0x00ff}, + {0xc49e, 0x00d0}, + {0xc49f, 0x00f0}, + {0xc4b0, 0x00dd}, + {0xc4b1, 0x00fd}, + {0xc592, 0x008c}, + {0xc593, 0x009c}, + {0xc59e, 0x00de}, + {0xc59f, 0x00fe}, + {0xc5a0, 0x008a}, + {0xc5a1, 0x009a}, + {0xc5b8, 0x009f}, + {0xc692, 0x0083}, + {0xcb86, 0x0088}, + {0xcb9c, 0x0098}, + {0xe28093, 0x0096}, + {0xe28094, 0x0097}, + {0xe28098, 0x0091}, + {0xe28099, 0x0092}, + {0xe2809a, 0x0082}, + {0xe2809c, 0x0093}, + {0xe2809d, 0x0094}, + {0xe2809e, 0x0084}, + {0xe280a0, 0x0086}, + {0xe280a1, 0x0087}, + {0xe280a2, 0x0095}, + {0xe280a6, 0x0085}, + {0xe280b0, 0x0089}, + {0xe280b9, 0x008b}, + {0xe280ba, 0x009b}, + {0xe282ac, 0x0080}, + {0xe284a2, 0x0099} +}; diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1255.map b/src/backend/utils/mb/Unicode/utf8_to_win1255.map new file mode 100644 index 00000000000..9071fe0a61b --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_win1255.map @@ -0,0 +1,107 @@ +static pg_utf_to_local ULmapWIN1255[ 105 ] = { + {0xc2a0, 0x00a0}, + {0xc2a1, 0x00a1}, + {0xc2a2, 0x00a2}, + {0xc2a3, 0x00a3}, + {0xc2a5, 0x00a5}, + {0xc2a6, 0x00a6}, + {0xc2a7, 0x00a7}, + {0xc2a8, 0x00a8}, + {0xc2a9, 0x00a9}, + {0xc2ab, 0x00ab}, + {0xc2ac, 0x00ac}, + {0xc2ad, 0x00ad}, + {0xc2ae, 0x00ae}, + {0xc2af, 0x00af}, + {0xc2b0, 0x00b0}, + {0xc2b1, 0x00b1}, + {0xc2b2, 0x00b2}, + {0xc2b3, 0x00b3}, + {0xc2b4, 0x00b4}, + {0xc2b5, 0x00b5}, + {0xc2b6, 0x00b6}, + {0xc2b7, 0x00b7}, + {0xc2b8, 0x00b8}, + {0xc2b9, 0x00b9}, + {0xc2bb, 0x00bb}, + {0xc2bc, 0x00bc}, + {0xc2bd, 0x00bd}, + {0xc2be, 0x00be}, + {0xc2bf, 0x00bf}, + {0xc397, 0x00aa}, + {0xc3b7, 0x00ba}, + {0xc692, 0x0083}, + {0xcb86, 0x0088}, + {0xcb9c, 0x0098}, + {0xd6b0, 0x00c0}, + {0xd6b1, 0x00c1}, + {0xd6b2, 0x00c2}, + {0xd6b3, 0x00c3}, + {0xd6b4, 0x00c4}, + {0xd6b5, 0x00c5}, + {0xd6b6, 0x00c6}, + {0xd6b7, 0x00c7}, + {0xd6b8, 0x00c8}, + {0xd6b9, 0x00c9}, + {0xd6bb, 0x00cb}, + {0xd6bc, 0x00cc}, + {0xd6bd, 0x00cd}, + {0xd6be, 0x00ce}, + {0xd6bf, 0x00cf}, + {0xd780, 0x00d0}, + {0xd781, 0x00d1}, + {0xd782, 0x00d2}, + {0xd783, 0x00d3}, + {0xd790, 0x00e0}, + {0xd791, 0x00e1}, + {0xd792, 0x00e2}, + {0xd793, 0x00e3}, + {0xd794, 0x00e4}, + {0xd795, 0x00e5}, + {0xd796, 0x00e6}, + {0xd797, 0x00e7}, + {0xd798, 0x00e8}, + {0xd799, 0x00e9}, + {0xd79a, 0x00ea}, + {0xd79b, 0x00eb}, + {0xd79c, 0x00ec}, + {0xd79d, 0x00ed}, + {0xd79e, 0x00ee}, + {0xd79f, 0x00ef}, + {0xd7a0, 0x00f0}, + {0xd7a1, 0x00f1}, + {0xd7a2, 0x00f2}, + {0xd7a3, 0x00f3}, + {0xd7a4, 0x00f4}, + {0xd7a5, 0x00f5}, + {0xd7a6, 0x00f6}, + {0xd7a7, 0x00f7}, + {0xd7a8, 0x00f8}, + {0xd7a9, 0x00f9}, + {0xd7aa, 0x00fa}, + {0xd7b0, 0x00d4}, + {0xd7b1, 0x00d5}, + {0xd7b2, 0x00d6}, + {0xd7b3, 0x00d7}, + {0xd7b4, 0x00d8}, + {0xe2808e, 0x00fd}, + {0xe2808f, 0x00fe}, + {0xe28093, 0x0096}, + {0xe28094, 0x0097}, + {0xe28098, 0x0091}, + {0xe28099, 0x0092}, + {0xe2809a, 0x0082}, + {0xe2809c, 0x0093}, + {0xe2809d, 0x0094}, + {0xe2809e, 0x0084}, + {0xe280a0, 0x0086}, + {0xe280a1, 0x0087}, + {0xe280a2, 0x0095}, + {0xe280a6, 0x0085}, + {0xe280b0, 0x0089}, + {0xe280b9, 0x008b}, + {0xe280ba, 0x009b}, + {0xe282aa, 0x00a4}, + {0xe282ac, 0x0080}, + {0xe284a2, 0x0099} +}; diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1257.map b/src/backend/utils/mb/Unicode/utf8_to_win1257.map new file mode 100644 index 00000000000..562678119e9 --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_win1257.map @@ -0,0 +1,118 @@ +static pg_utf_to_local ULmapWIN1257[ 116 ] = { + {0xc2a0, 0x00a0}, + {0xc2a2, 0x00a2}, + {0xc2a3, 0x00a3}, + {0xc2a4, 0x00a4}, + {0xc2a6, 0x00a6}, + {0xc2a7, 0x00a7}, + {0xc2a8, 0x008d}, + {0xc2a9, 0x00a9}, + {0xc2ab, 0x00ab}, + {0xc2ac, 0x00ac}, + {0xc2ad, 0x00ad}, + {0xc2ae, 0x00ae}, + {0xc2af, 0x009d}, + {0xc2b0, 0x00b0}, + {0xc2b1, 0x00b1}, + {0xc2b2, 0x00b2}, + {0xc2b3, 0x00b3}, + {0xc2b4, 0x00b4}, + {0xc2b5, 0x00b5}, + {0xc2b6, 0x00b6}, + {0xc2b7, 0x00b7}, + {0xc2b8, 0x008f}, + {0xc2b9, 0x00b9}, + {0xc2bb, 0x00bb}, + {0xc2bc, 0x00bc}, + {0xc2bd, 0x00bd}, + {0xc2be, 0x00be}, + {0xc384, 0x00c4}, + {0xc385, 0x00c5}, + {0xc386, 0x00af}, + {0xc389, 0x00c9}, + {0xc393, 0x00d3}, + {0xc395, 0x00d5}, + {0xc396, 0x00d6}, + {0xc397, 0x00d7}, + {0xc398, 0x00a8}, + {0xc39c, 0x00dc}, + {0xc39f, 0x00df}, + {0xc3a4, 0x00e4}, + {0xc3a5, 0x00e5}, + {0xc3a6, 0x00bf}, + {0xc3a9, 0x00e9}, + {0xc3b3, 0x00f3}, + {0xc3b5, 0x00f5}, + {0xc3b6, 0x00f6}, + {0xc3b7, 0x00f7}, + {0xc3b8, 0x00b8}, + {0xc3bc, 0x00fc}, + {0xc480, 0x00c2}, + {0xc481, 0x00e2}, + {0xc484, 0x00c0}, + {0xc485, 0x00e0}, + {0xc486, 0x00c3}, + {0xc487, 0x00e3}, + {0xc48c, 0x00c8}, + {0xc48d, 0x00e8}, + {0xc492, 0x00c7}, + {0xc493, 0x00e7}, + {0xc496, 0x00cb}, + {0xc497, 0x00eb}, + {0xc498, 0x00c6}, + {0xc499, 0x00e6}, + {0xc4a2, 0x00cc}, + {0xc4a3, 0x00ec}, + {0xc4aa, 0x00ce}, + {0xc4ab, 0x00ee}, + {0xc4ae, 0x00c1}, + {0xc4af, 0x00e1}, + {0xc4b6, 0x00cd}, + {0xc4b7, 0x00ed}, + {0xc4bb, 0x00cf}, + {0xc4bc, 0x00ef}, + {0xc581, 0x00d9}, + {0xc582, 0x00f9}, + {0xc583, 0x00d1}, + {0xc584, 0x00f1}, + {0xc585, 0x00d2}, + {0xc586, 0x00f2}, + {0xc58c, 0x00d4}, + {0xc58d, 0x00f4}, + {0xc596, 0x00aa}, + {0xc597, 0x00ba}, + {0xc59a, 0x00da}, + {0xc59b, 0x00fa}, + {0xc5a0, 0x00d0}, + {0xc5a1, 0x00f0}, + {0xc5aa, 0x00db}, + {0xc5ab, 0x00fb}, + {0xc5b2, 0x00d8}, + {0xc5b3, 0x00f8}, + {0xc5b9, 0x00ca}, + {0xc5ba, 0x00ea}, + {0xc5bb, 0x00dd}, + {0xc5bc, 0x00fd}, + {0xc5bd, 0x00de}, + {0xc5be, 0x00fe}, + {0xcb87, 0x008e}, + {0xcb99, 0x00ff}, + {0xcb9b, 0x009e}, + {0xe28093, 0x0096}, + {0xe28094, 0x0097}, + {0xe28098, 0x0091}, + {0xe28099, 0x0092}, + {0xe2809a, 0x0082}, + {0xe2809c, 0x0093}, + {0xe2809d, 0x0094}, + {0xe2809e, 0x0084}, + {0xe280a0, 0x0086}, + {0xe280a1, 0x0087}, + {0xe280a2, 0x0095}, + {0xe280a6, 0x0085}, + {0xe280b0, 0x0089}, + {0xe280b9, 0x008b}, + {0xe280ba, 0x009b}, + {0xe282ac, 0x0080}, + {0xe284a2, 0x0099} +}; diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1258.map b/src/backend/utils/mb/Unicode/utf8_to_win1258.map index 84cefd91f3f..7c4629ff91f 100644 --- a/src/backend/utils/mb/Unicode/utf8_to_win1258.map +++ b/src/backend/utils/mb/Unicode/utf8_to_win1258.map @@ -1,5 +1,4 @@ -static pg_utf_to_local ULmapWIN1258[ 120 ] = { - {0x0000, 0x0081}, +static pg_utf_to_local ULmapWIN1258[ 119 ] = { {0xc2a0, 0x00a0}, {0xc2a1, 0x00a1}, {0xc2a2, 0x00a2}, diff --git a/src/backend/utils/mb/Unicode/utf8_to_win866.map b/src/backend/utils/mb/Unicode/utf8_to_win866.map index cac06a82edd..e5767e09307 100644 --- a/src/backend/utils/mb/Unicode/utf8_to_win866.map +++ b/src/backend/utils/mb/Unicode/utf8_to_win866.map @@ -1,4 +1,4 @@ -static pg_utf_to_local ULmap_WIN866[ 128 ] = { +static pg_utf_to_local ULmapWIN866[ 128 ] = { {0xc2a0, 0x00ff}, {0xc2a4, 0x00fd}, {0xc2b0, 0x00f8}, diff --git a/src/backend/utils/mb/Unicode/utf8_to_win874.map b/src/backend/utils/mb/Unicode/utf8_to_win874.map index cf0629f5ec8..d765744461f 100644 --- a/src/backend/utils/mb/Unicode/utf8_to_win874.map +++ b/src/backend/utils/mb/Unicode/utf8_to_win874.map @@ -1,5 +1,4 @@ -static pg_utf_to_local ULmapWIN874[ 98 ] = { - {0x0000, 0x0081}, +static pg_utf_to_local ULmapWIN874[ 97 ] = { {0xc2a0, 0x00a0}, {0xe0b881, 0x00a1}, {0xe0b882, 0x00a2}, diff --git a/src/backend/utils/mb/Unicode/win1250_to_utf8.map b/src/backend/utils/mb/Unicode/win1250_to_utf8.map index c66f61f48f7..22f44b7f58a 100644 --- a/src/backend/utils/mb/Unicode/win1250_to_utf8.map +++ b/src/backend/utils/mb/Unicode/win1250_to_utf8.map @@ -1,13 +1,10 @@ -static pg_local_to_utf LUmapWIN1250[ 128 ] = { +static pg_local_to_utf LUmapWIN1250[ 123 ] = { {0x0080, 0xe282ac}, - {0x0081, 0x0000}, {0x0082, 0xe2809a}, - {0x0083, 0x0000}, {0x0084, 0xe2809e}, {0x0085, 0xe280a6}, {0x0086, 0xe280a0}, {0x0087, 0xe280a1}, - {0x0088, 0x0000}, {0x0089, 0xe280b0}, {0x008a, 0xc5a0}, {0x008b, 0xe280b9}, @@ -15,7 +12,6 @@ static pg_local_to_utf LUmapWIN1250[ 128 ] = { {0x008d, 0xc5a4}, {0x008e, 0xc5bd}, {0x008f, 0xc5b9}, - {0x0090, 0x0000}, {0x0091, 0xe28098}, {0x0092, 0xe28099}, {0x0093, 0xe2809c}, @@ -23,7 +19,6 @@ static pg_local_to_utf LUmapWIN1250[ 128 ] = { {0x0095, 0xe280a2}, {0x0096, 0xe28093}, {0x0097, 0xe28094}, - {0x0098, 0x0000}, {0x0099, 0xe284a2}, {0x009a, 0xc5a1}, {0x009b, 0xe280ba}, diff --git a/src/backend/utils/mb/Unicode/win1251_to_utf8.map b/src/backend/utils/mb/Unicode/win1251_to_utf8.map index 855575cd515..cdea6fe4459 100644 --- a/src/backend/utils/mb/Unicode/win1251_to_utf8.map +++ b/src/backend/utils/mb/Unicode/win1251_to_utf8.map @@ -1,4 +1,4 @@ -static pg_local_to_utf LUmapWIN1251[ 128 ] = { +static pg_local_to_utf LUmapWIN1251[ 127 ] = { {0x0080, 0xd082}, {0x0081, 0xd083}, {0x0082, 0xe2809a}, @@ -7,7 +7,7 @@ static pg_local_to_utf LUmapWIN1251[ 128 ] = { {0x0085, 0xe280a6}, {0x0086, 0xe280a0}, {0x0087, 0xe280a1}, - {0x0088, 0x0000}, + {0x0088, 0xe282ac}, {0x0089, 0xe280b0}, {0x008a, 0xd089}, {0x008b, 0xe280b9}, @@ -23,7 +23,6 @@ static pg_local_to_utf LUmapWIN1251[ 128 ] = { {0x0095, 0xe280a2}, {0x0096, 0xe28093}, {0x0097, 0xe28094}, - {0x0098, 0x0000}, {0x0099, 0xe284a2}, {0x009a, 0xd199}, {0x009b, 0xe280ba}, diff --git a/src/backend/utils/mb/Unicode/win1252_to_utf8.map b/src/backend/utils/mb/Unicode/win1252_to_utf8.map index 636baf859f0..ad849ee011d 100644 --- a/src/backend/utils/mb/Unicode/win1252_to_utf8.map +++ b/src/backend/utils/mb/Unicode/win1252_to_utf8.map @@ -1,6 +1,5 @@ -static pg_local_to_utf LUmapWIN1252[ 128 ] = { +static pg_local_to_utf LUmapWIN1252[ 123 ] = { {0x0080, 0xe282ac}, - {0x0081, 0x0000}, {0x0082, 0xe2809a}, {0x0083, 0xc692}, {0x0084, 0xe2809e}, @@ -12,10 +11,7 @@ static pg_local_to_utf LUmapWIN1252[ 128 ] = { {0x008a, 0xc5a0}, {0x008b, 0xe280b9}, {0x008c, 0xc592}, - {0x008d, 0x0000}, {0x008e, 0xc5bd}, - {0x008f, 0x0000}, - {0x0090, 0x0000}, {0x0091, 0xe28098}, {0x0092, 0xe28099}, {0x0093, 0xe2809c}, @@ -28,7 +24,6 @@ static pg_local_to_utf LUmapWIN1252[ 128 ] = { {0x009a, 0xc5a1}, {0x009b, 0xe280ba}, {0x009c, 0xc593}, - {0x009d, 0x0000}, {0x009e, 0xc5be}, {0x009f, 0xc5b8}, {0x00a0, 0xc2a0}, diff --git a/src/backend/utils/mb/Unicode/win1253_to_utf8.map b/src/backend/utils/mb/Unicode/win1253_to_utf8.map new file mode 100644 index 00000000000..519a435d750 --- /dev/null +++ b/src/backend/utils/mb/Unicode/win1253_to_utf8.map @@ -0,0 +1,113 @@ +static pg_local_to_utf LUmapWIN1253[ 111 ] = { + {0x0080, 0xe282ac}, + {0x0082, 0xe2809a}, + {0x0083, 0xc692}, + {0x0084, 0xe2809e}, + {0x0085, 0xe280a6}, + {0x0086, 0xe280a0}, + {0x0087, 0xe280a1}, + {0x0089, 0xe280b0}, + {0x008b, 0xe280b9}, + {0x0091, 0xe28098}, + {0x0092, 0xe28099}, + {0x0093, 0xe2809c}, + {0x0094, 0xe2809d}, + {0x0095, 0xe280a2}, + {0x0096, 0xe28093}, + {0x0097, 0xe28094}, + {0x0099, 0xe284a2}, + {0x009b, 0xe280ba}, + {0x00a0, 0xc2a0}, + {0x00a1, 0xce85}, + {0x00a2, 0xce86}, + {0x00a3, 0xc2a3}, + {0x00a4, 0xc2a4}, + {0x00a5, 0xc2a5}, + {0x00a6, 0xc2a6}, + {0x00a7, 0xc2a7}, + {0x00a8, 0xc2a8}, + {0x00a9, 0xc2a9}, + {0x00ab, 0xc2ab}, + {0x00ac, 0xc2ac}, + {0x00ad, 0xc2ad}, + {0x00ae, 0xc2ae}, + {0x00af, 0xe28095}, + {0x00b0, 0xc2b0}, + {0x00b1, 0xc2b1}, + {0x00b2, 0xc2b2}, + {0x00b3, 0xc2b3}, + {0x00b4, 0xce84}, + {0x00b5, 0xc2b5}, + {0x00b6, 0xc2b6}, + {0x00b7, 0xc2b7}, + {0x00b8, 0xce88}, + {0x00b9, 0xce89}, + {0x00ba, 0xce8a}, + {0x00bb, 0xc2bb}, + {0x00bc, 0xce8c}, + {0x00bd, 0xc2bd}, + {0x00be, 0xce8e}, + {0x00bf, 0xce8f}, + {0x00c0, 0xce90}, + {0x00c1, 0xce91}, + {0x00c2, 0xce92}, + {0x00c3, 0xce93}, + {0x00c4, 0xce94}, + {0x00c5, 0xce95}, + {0x00c6, 0xce96}, + {0x00c7, 0xce97}, + {0x00c8, 0xce98}, + {0x00c9, 0xce99}, + {0x00ca, 0xce9a}, + {0x00cb, 0xce9b}, + {0x00cc, 0xce9c}, + {0x00cd, 0xce9d}, + {0x00ce, 0xce9e}, + {0x00cf, 0xce9f}, + {0x00d0, 0xcea0}, + {0x00d1, 0xcea1}, + {0x00d3, 0xcea3}, + {0x00d4, 0xcea4}, + {0x00d5, 0xcea5}, + {0x00d6, 0xcea6}, + {0x00d7, 0xcea7}, + {0x00d8, 0xcea8}, + {0x00d9, 0xcea9}, + {0x00da, 0xceaa}, + {0x00db, 0xceab}, + {0x00dc, 0xceac}, + {0x00dd, 0xcead}, + {0x00de, 0xceae}, + {0x00df, 0xceaf}, + {0x00e0, 0xceb0}, + {0x00e1, 0xceb1}, + {0x00e2, 0xceb2}, + {0x00e3, 0xceb3}, + {0x00e4, 0xceb4}, + {0x00e5, 0xceb5}, + {0x00e6, 0xceb6}, + {0x00e7, 0xceb7}, + {0x00e8, 0xceb8}, + {0x00e9, 0xceb9}, + {0x00ea, 0xceba}, + {0x00eb, 0xcebb}, + {0x00ec, 0xcebc}, + {0x00ed, 0xcebd}, + {0x00ee, 0xcebe}, + {0x00ef, 0xcebf}, + {0x00f0, 0xcf80}, + {0x00f1, 0xcf81}, + {0x00f2, 0xcf82}, + {0x00f3, 0xcf83}, + {0x00f4, 0xcf84}, + {0x00f5, 0xcf85}, + {0x00f6, 0xcf86}, + {0x00f7, 0xcf87}, + {0x00f8, 0xcf88}, + {0x00f9, 0xcf89}, + {0x00fa, 0xcf8a}, + {0x00fb, 0xcf8b}, + {0x00fc, 0xcf8c}, + {0x00fd, 0xcf8d}, + {0x00fe, 0xcf8e} +}; diff --git a/src/backend/utils/mb/Unicode/win1254_to_utf8.map b/src/backend/utils/mb/Unicode/win1254_to_utf8.map new file mode 100644 index 00000000000..370e4bc9101 --- /dev/null +++ b/src/backend/utils/mb/Unicode/win1254_to_utf8.map @@ -0,0 +1,123 @@ +static pg_local_to_utf LUmapWIN1254[ 121 ] = { + {0x0080, 0xe282ac}, + {0x0082, 0xe2809a}, + {0x0083, 0xc692}, + {0x0084, 0xe2809e}, + {0x0085, 0xe280a6}, + {0x0086, 0xe280a0}, + {0x0087, 0xe280a1}, + {0x0088, 0xcb86}, + {0x0089, 0xe280b0}, + {0x008a, 0xc5a0}, + {0x008b, 0xe280b9}, + {0x008c, 0xc592}, + {0x0091, 0xe28098}, + {0x0092, 0xe28099}, + {0x0093, 0xe2809c}, + {0x0094, 0xe2809d}, + {0x0095, 0xe280a2}, + {0x0096, 0xe28093}, + {0x0097, 0xe28094}, + {0x0098, 0xcb9c}, + {0x0099, 0xe284a2}, + {0x009a, 0xc5a1}, + {0x009b, 0xe280ba}, + {0x009c, 0xc593}, + {0x009f, 0xc5b8}, + {0x00a0, 0xc2a0}, + {0x00a1, 0xc2a1}, + {0x00a2, 0xc2a2}, + {0x00a3, 0xc2a3}, + {0x00a4, 0xc2a4}, + {0x00a5, 0xc2a5}, + {0x00a6, 0xc2a6}, + {0x00a7, 0xc2a7}, + {0x00a8, 0xc2a8}, + {0x00a9, 0xc2a9}, + {0x00aa, 0xc2aa}, + {0x00ab, 0xc2ab}, + {0x00ac, 0xc2ac}, + {0x00ad, 0xc2ad}, + {0x00ae, 0xc2ae}, + {0x00af, 0xc2af}, + {0x00b0, 0xc2b0}, + {0x00b1, 0xc2b1}, + {0x00b2, 0xc2b2}, + {0x00b3, 0xc2b3}, + {0x00b4, 0xc2b4}, + {0x00b5, 0xc2b5}, + {0x00b6, 0xc2b6}, + {0x00b7, 0xc2b7}, + {0x00b8, 0xc2b8}, + {0x00b9, 0xc2b9}, + {0x00ba, 0xc2ba}, + {0x00bb, 0xc2bb}, + {0x00bc, 0xc2bc}, + {0x00bd, 0xc2bd}, + {0x00be, 0xc2be}, + {0x00bf, 0xc2bf}, + {0x00c0, 0xc380}, + {0x00c1, 0xc381}, + {0x00c2, 0xc382}, + {0x00c3, 0xc383}, + {0x00c4, 0xc384}, + {0x00c5, 0xc385}, + {0x00c6, 0xc386}, + {0x00c7, 0xc387}, + {0x00c8, 0xc388}, + {0x00c9, 0xc389}, + {0x00ca, 0xc38a}, + {0x00cb, 0xc38b}, + {0x00cc, 0xc38c}, + {0x00cd, 0xc38d}, + {0x00ce, 0xc38e}, + {0x00cf, 0xc38f}, + {0x00d0, 0xc49e}, + {0x00d1, 0xc391}, + {0x00d2, 0xc392}, + {0x00d3, 0xc393}, + {0x00d4, 0xc394}, + {0x00d5, 0xc395}, + {0x00d6, 0xc396}, + {0x00d7, 0xc397}, + {0x00d8, 0xc398}, + {0x00d9, 0xc399}, + {0x00da, 0xc39a}, + {0x00db, 0xc39b}, + {0x00dc, 0xc39c}, + {0x00dd, 0xc4b0}, + {0x00de, 0xc59e}, + {0x00df, 0xc39f}, + {0x00e0, 0xc3a0}, + {0x00e1, 0xc3a1}, + {0x00e2, 0xc3a2}, + {0x00e3, 0xc3a3}, + {0x00e4, 0xc3a4}, + {0x00e5, 0xc3a5}, + {0x00e6, 0xc3a6}, + {0x00e7, 0xc3a7}, + {0x00e8, 0xc3a8}, + {0x00e9, 0xc3a9}, + {0x00ea, 0xc3aa}, + {0x00eb, 0xc3ab}, + {0x00ec, 0xc3ac}, + {0x00ed, 0xc3ad}, + {0x00ee, 0xc3ae}, + {0x00ef, 0xc3af}, + {0x00f0, 0xc49f}, + {0x00f1, 0xc3b1}, + {0x00f2, 0xc3b2}, + {0x00f3, 0xc3b3}, + {0x00f4, 0xc3b4}, + {0x00f5, 0xc3b5}, + {0x00f6, 0xc3b6}, + {0x00f7, 0xc3b7}, + {0x00f8, 0xc3b8}, + {0x00f9, 0xc3b9}, + {0x00fa, 0xc3ba}, + {0x00fb, 0xc3bb}, + {0x00fc, 0xc3bc}, + {0x00fd, 0xc4b1}, + {0x00fe, 0xc59f}, + {0x00ff, 0xc3bf} +}; diff --git a/src/backend/utils/mb/Unicode/win1255_to_utf8.map b/src/backend/utils/mb/Unicode/win1255_to_utf8.map new file mode 100644 index 00000000000..f5d7454c29f --- /dev/null +++ b/src/backend/utils/mb/Unicode/win1255_to_utf8.map @@ -0,0 +1,107 @@ +static pg_local_to_utf LUmapWIN1255[ 105 ] = { + {0x0080, 0xe282ac}, + {0x0082, 0xe2809a}, + {0x0083, 0xc692}, + {0x0084, 0xe2809e}, + {0x0085, 0xe280a6}, + {0x0086, 0xe280a0}, + {0x0087, 0xe280a1}, + {0x0088, 0xcb86}, + {0x0089, 0xe280b0}, + {0x008b, 0xe280b9}, + {0x0091, 0xe28098}, + {0x0092, 0xe28099}, + {0x0093, 0xe2809c}, + {0x0094, 0xe2809d}, + {0x0095, 0xe280a2}, + {0x0096, 0xe28093}, + {0x0097, 0xe28094}, + {0x0098, 0xcb9c}, + {0x0099, 0xe284a2}, + {0x009b, 0xe280ba}, + {0x00a0, 0xc2a0}, + {0x00a1, 0xc2a1}, + {0x00a2, 0xc2a2}, + {0x00a3, 0xc2a3}, + {0x00a4, 0xe282aa}, + {0x00a5, 0xc2a5}, + {0x00a6, 0xc2a6}, + {0x00a7, 0xc2a7}, + {0x00a8, 0xc2a8}, + {0x00a9, 0xc2a9}, + {0x00aa, 0xc397}, + {0x00ab, 0xc2ab}, + {0x00ac, 0xc2ac}, + {0x00ad, 0xc2ad}, + {0x00ae, 0xc2ae}, + {0x00af, 0xc2af}, + {0x00b0, 0xc2b0}, + {0x00b1, 0xc2b1}, + {0x00b2, 0xc2b2}, + {0x00b3, 0xc2b3}, + {0x00b4, 0xc2b4}, + {0x00b5, 0xc2b5}, + {0x00b6, 0xc2b6}, + {0x00b7, 0xc2b7}, + {0x00b8, 0xc2b8}, + {0x00b9, 0xc2b9}, + {0x00ba, 0xc3b7}, + {0x00bb, 0xc2bb}, + {0x00bc, 0xc2bc}, + {0x00bd, 0xc2bd}, + {0x00be, 0xc2be}, + {0x00bf, 0xc2bf}, + {0x00c0, 0xd6b0}, + {0x00c1, 0xd6b1}, + {0x00c2, 0xd6b2}, + {0x00c3, 0xd6b3}, + {0x00c4, 0xd6b4}, + {0x00c5, 0xd6b5}, + {0x00c6, 0xd6b6}, + {0x00c7, 0xd6b7}, + {0x00c8, 0xd6b8}, + {0x00c9, 0xd6b9}, + {0x00cb, 0xd6bb}, + {0x00cc, 0xd6bc}, + {0x00cd, 0xd6bd}, + {0x00ce, 0xd6be}, + {0x00cf, 0xd6bf}, + {0x00d0, 0xd780}, + {0x00d1, 0xd781}, + {0x00d2, 0xd782}, + {0x00d3, 0xd783}, + {0x00d4, 0xd7b0}, + {0x00d5, 0xd7b1}, + {0x00d6, 0xd7b2}, + {0x00d7, 0xd7b3}, + {0x00d8, 0xd7b4}, + {0x00e0, 0xd790}, + {0x00e1, 0xd791}, + {0x00e2, 0xd792}, + {0x00e3, 0xd793}, + {0x00e4, 0xd794}, + {0x00e5, 0xd795}, + {0x00e6, 0xd796}, + {0x00e7, 0xd797}, + {0x00e8, 0xd798}, + {0x00e9, 0xd799}, + {0x00ea, 0xd79a}, + {0x00eb, 0xd79b}, + {0x00ec, 0xd79c}, + {0x00ed, 0xd79d}, + {0x00ee, 0xd79e}, + {0x00ef, 0xd79f}, + {0x00f0, 0xd7a0}, + {0x00f1, 0xd7a1}, + {0x00f2, 0xd7a2}, + {0x00f3, 0xd7a3}, + {0x00f4, 0xd7a4}, + {0x00f5, 0xd7a5}, + {0x00f6, 0xd7a6}, + {0x00f7, 0xd7a7}, + {0x00f8, 0xd7a8}, + {0x00f9, 0xd7a9}, + {0x00fa, 0xd7aa}, + {0x00fd, 0xe2808e}, + {0x00fe, 0xe2808f} +}; diff --git a/src/backend/utils/mb/Unicode/win1257_to_utf8.map b/src/backend/utils/mb/Unicode/win1257_to_utf8.map new file mode 100644 index 00000000000..45d946d57c9 --- /dev/null +++ b/src/backend/utils/mb/Unicode/win1257_to_utf8.map @@ -0,0 +1,118 @@ +static pg_local_to_utf LUmapWIN1257[ 116 ] = { + {0x0080, 0xe282ac}, + {0x0082, 0xe2809a}, + {0x0084, 0xe2809e}, + {0x0085, 0xe280a6}, + {0x0086, 0xe280a0}, + {0x0087, 0xe280a1}, + {0x0089, 0xe280b0}, + {0x008b, 0xe280b9}, + {0x008d, 0xc2a8}, + {0x008e, 0xcb87}, + {0x008f, 0xc2b8}, + {0x0091, 0xe28098}, + {0x0092, 0xe28099}, + {0x0093, 0xe2809c}, + {0x0094, 0xe2809d}, + {0x0095, 0xe280a2}, + {0x0096, 0xe28093}, + {0x0097, 0xe28094}, + {0x0099, 0xe284a2}, + {0x009b, 0xe280ba}, + {0x009d, 0xc2af}, + {0x009e, 0xcb9b}, + {0x00a0, 0xc2a0}, + {0x00a2, 0xc2a2}, + {0x00a3, 0xc2a3}, + {0x00a4, 0xc2a4}, + {0x00a6, 0xc2a6}, + {0x00a7, 0xc2a7}, + {0x00a8, 0xc398}, + {0x00a9, 0xc2a9}, + {0x00aa, 0xc596}, + {0x00ab, 0xc2ab}, + {0x00ac, 0xc2ac}, + {0x00ad, 0xc2ad}, + {0x00ae, 0xc2ae}, + {0x00af, 0xc386}, + {0x00b0, 0xc2b0}, + {0x00b1, 0xc2b1}, + {0x00b2, 0xc2b2}, + {0x00b3, 0xc2b3}, + {0x00b4, 0xc2b4}, + {0x00b5, 0xc2b5}, + {0x00b6, 0xc2b6}, + {0x00b7, 0xc2b7}, + {0x00b8, 0xc3b8}, + {0x00b9, 0xc2b9}, + {0x00ba, 0xc597}, + {0x00bb, 0xc2bb}, + {0x00bc, 0xc2bc}, + {0x00bd, 0xc2bd}, + {0x00be, 0xc2be}, + {0x00bf, 0xc3a6}, + {0x00c0, 0xc484}, + {0x00c1, 0xc4ae}, + {0x00c2, 0xc480}, + {0x00c3, 0xc486}, + {0x00c4, 0xc384}, + {0x00c5, 0xc385}, + {0x00c6, 0xc498}, + {0x00c7, 0xc492}, + {0x00c8, 0xc48c}, + {0x00c9, 0xc389}, + {0x00ca, 0xc5b9}, + {0x00cb, 0xc496}, + {0x00cc, 0xc4a2}, + {0x00cd, 0xc4b6}, + {0x00ce, 0xc4aa}, + {0x00cf, 0xc4bb}, + {0x00d0, 0xc5a0}, + {0x00d1, 0xc583}, + {0x00d2, 0xc585}, + {0x00d3, 0xc393}, + {0x00d4, 0xc58c}, + {0x00d5, 0xc395}, + {0x00d6, 0xc396}, + {0x00d7, 0xc397}, + {0x00d8, 0xc5b2}, + {0x00d9, 0xc581}, + {0x00da, 0xc59a}, + {0x00db, 0xc5aa}, + {0x00dc, 0xc39c}, + {0x00dd, 0xc5bb}, + {0x00de, 0xc5bd}, + {0x00df, 0xc39f}, + {0x00e0, 0xc485}, + {0x00e1, 0xc4af}, + {0x00e2, 0xc481}, + {0x00e3, 0xc487}, + {0x00e4, 0xc3a4}, + {0x00e5, 0xc3a5}, + {0x00e6, 0xc499}, + {0x00e7, 0xc493}, + {0x00e8, 0xc48d}, + {0x00e9, 0xc3a9}, + {0x00ea, 0xc5ba}, + {0x00eb, 0xc497}, + {0x00ec, 0xc4a3}, + {0x00ed, 0xc4b7}, + {0x00ee, 0xc4ab}, + {0x00ef, 0xc4bc}, + {0x00f0, 0xc5a1}, + {0x00f1, 0xc584}, + {0x00f2, 0xc586}, + {0x00f3, 0xc3b3}, + {0x00f4, 0xc58d}, + {0x00f5, 0xc3b5}, + {0x00f6, 0xc3b6}, + {0x00f7, 0xc3b7}, + {0x00f8, 0xc5b3}, + {0x00f9, 0xc582}, + {0x00fa, 0xc59b}, + {0x00fb, 0xc5ab}, + {0x00fc, 0xc3bc}, + {0x00fd, 0xc5bc}, + {0x00fe, 0xc5be}, + {0x00ff, 0xcb99} +}; diff --git a/src/backend/utils/mb/Unicode/win1258_to_utf8.map b/src/backend/utils/mb/Unicode/win1258_to_utf8.map index 0596d8ba4ff..ed8a9146fc1 100644 --- a/src/backend/utils/mb/Unicode/win1258_to_utf8.map +++ b/src/backend/utils/mb/Unicode/win1258_to_utf8.map @@ -1,6 +1,5 @@ -static pg_local_to_utf LUmapWIN1258[ 128 ] = { +static pg_local_to_utf LUmapWIN1258[ 119 ] = { {0x0080, 0xe282ac}, - {0x0081, 0x0000}, {0x0082, 0xe2809a}, {0x0083, 0xc692}, {0x0084, 0xe2809e}, @@ -9,13 +8,8 @@ static pg_local_to_utf LUmapWIN1258[ 128 ] = { {0x0087, 0xe280a1}, {0x0088, 0xcb86}, {0x0089, 0xe280b0}, - {0x008a, 0x0000}, {0x008b, 0xe280b9}, {0x008c, 0xc592}, - {0x008d, 0x0000}, - {0x008e, 0x0000}, - {0x008f, 0x0000}, - {0x0090, 0x0000}, {0x0091, 0xe28098}, {0x0092, 0xe28099}, {0x0093, 0xe2809c}, @@ -25,11 +19,8 @@ static pg_local_to_utf LUmapWIN1258[ 128 ] = { {0x0097, 0xe28094}, {0x0098, 0xcb9c}, {0x0099, 0xe284a2}, - {0x009a, 0x0000}, {0x009b, 0xe280ba}, {0x009c, 0xc593}, - {0x009d, 0x0000}, - {0x009e, 0x0000}, {0x009f, 0xc5b8}, {0x00a0, 0xc2a0}, {0x00a1, 0xc2a1}, diff --git a/src/backend/utils/mb/Unicode/win874_to_utf8.map b/src/backend/utils/mb/Unicode/win874_to_utf8.map index 694a51bb430..bd4eadbbb6a 100644 --- a/src/backend/utils/mb/Unicode/win874_to_utf8.map +++ b/src/backend/utils/mb/Unicode/win874_to_utf8.map @@ -1,21 +1,6 @@ -static pg_local_to_utf LUmapWIN874[ 128 ] = { +static pg_local_to_utf LUmapWIN874[ 97 ] = { {0x0080, 0xe282ac}, - {0x0081, 0x0000}, - {0x0082, 0x0000}, - {0x0083, 0x0000}, - {0x0084, 0x0000}, {0x0085, 0xe280a6}, - {0x0086, 0x0000}, - {0x0087, 0x0000}, - {0x0088, 0x0000}, - {0x0089, 0x0000}, - {0x008a, 0x0000}, - {0x008b, 0x0000}, - {0x008c, 0x0000}, - {0x008d, 0x0000}, - {0x008e, 0x0000}, - {0x008f, 0x0000}, - {0x0090, 0x0000}, {0x0091, 0xe28098}, {0x0092, 0xe28099}, {0x0093, 0xe2809c}, @@ -23,14 +8,6 @@ static pg_local_to_utf LUmapWIN874[ 128 ] = { {0x0095, 0xe280a2}, {0x0096, 0xe28093}, {0x0097, 0xe28094}, - {0x0098, 0x0000}, - {0x0099, 0x0000}, - {0x009a, 0x0000}, - {0x009b, 0x0000}, - {0x009c, 0x0000}, - {0x009d, 0x0000}, - {0x009e, 0x0000}, - {0x009f, 0x0000}, {0x00a0, 0xc2a0}, {0x00a1, 0xe0b881}, {0x00a2, 0xe0b882}, @@ -90,10 +67,6 @@ static pg_local_to_utf LUmapWIN874[ 128 ] = { {0x00d8, 0xe0b8b8}, {0x00d9, 0xe0b8b9}, {0x00da, 0xe0b8ba}, - {0x00db, 0x0000}, - {0x00dc, 0x0000}, - {0x00dd, 0x0000}, - {0x00de, 0x0000}, {0x00df, 0xe0b8bf}, {0x00e0, 0xe0b980}, {0x00e1, 0xe0b981}, @@ -122,9 +95,5 @@ static pg_local_to_utf LUmapWIN874[ 128 ] = { {0x00f8, 0xe0b998}, {0x00f9, 0xe0b999}, {0x00fa, 0xe0b99a}, - {0x00fb, 0xe0b99b}, - {0x00fc, 0x0000}, - {0x00fd, 0x0000}, - {0x00fe, 0x0000}, - {0x00ff, 0x0000} + {0x00fb, 0xe0b99b} }; diff --git a/src/backend/utils/mb/conversion_procs/Makefile b/src/backend/utils/mb/conversion_procs/Makefile index 40425f1df6f..cf07f42560b 100644 --- a/src/backend/utils/mb/conversion_procs/Makefile +++ b/src/backend/utils/mb/conversion_procs/Makefile @@ -4,7 +4,7 @@ # Makefile for utils/mb/conversion_procs # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.15 2005/12/09 21:19:35 petere Exp $ +# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.16 2006/02/18 16:15:22 petere Exp $ # #------------------------------------------------------------------------- @@ -23,8 +23,7 @@ DIRS = \ utf8_and_ascii utf8_and_big5 utf8_and_cyrillic utf8_and_euc_cn \ utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_tw utf8_and_gb18030 \ utf8_and_gbk utf8_and_iso8859 utf8_and_iso8859_1 utf8_and_johab \ - utf8_and_sjis utf8_and_win1258 utf8_and_uhc utf8_and_win1250 \ - utf8_and_win1252 utf8_and_win1256 utf8_and_win874 + utf8_and_sjis utf8_and_win utf8_and_uhc # conversion_name source_encoding destination_encoding function object CONVERSIONS = \ @@ -84,12 +83,28 @@ CONVERSIONS = \ utf8_to_big5 UTF8 BIG5 utf8_to_big5 utf8_and_big5 \ utf8_to_koi8_r UTF8 KOI8R utf8_to_koi8r utf8_and_cyrillic \ koi8_r_to_utf8 KOI8R UTF8 koi8r_to_utf8 utf8_and_cyrillic \ - utf8_to_windows_1251 UTF8 WIN1251 utf8_to_win1251 utf8_and_cyrillic \ - windows_1251_to_utf8 WIN1251 UTF8 win1251_to_utf8 utf8_and_cyrillic \ - utf8_to_windows_1252 UTF8 WIN1252 utf8_to_win1252 utf8_and_win1252 \ - windows_1252_to_utf8 WIN1252 UTF8 win1252_to_utf8 utf8_and_win1252 \ - utf8_to_windows_866 UTF8 WIN866 utf8_to_win866 utf8_and_cyrillic \ - windows_866_to_utf8 WIN866 UTF8 win866_to_utf8 utf8_and_cyrillic \ + utf8_to_windows_866 UTF8 WIN866 utf8_to_win utf8_and_win \ + windows_866_to_utf8 WIN866 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_874 UTF8 WIN874 utf8_to_win utf8_and_win \ + windows_874_to_utf8 WIN874 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1250 UTF8 WIN1250 utf8_to_win utf8_and_win \ + windows_1250_to_utf8 WIN1250 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1251 UTF8 WIN1251 utf8_to_win utf8_and_win \ + windows_1251_to_utf8 WIN1251 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1252 UTF8 WIN1252 utf8_to_win utf8_and_win \ + windows_1252_to_utf8 WIN1252 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1253 UTF8 WIN1253 utf8_to_win utf8_and_win \ + windows_1253_to_utf8 WIN1253 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1254 UTF8 WIN1254 utf8_to_win utf8_and_win \ + windows_1254_to_utf8 WIN1254 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1255 UTF8 WIN1255 utf8_to_win utf8_and_win \ + windows_1255_to_utf8 WIN1255 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1256 UTF8 WIN1256 utf8_to_win utf8_and_win \ + windows_1256_to_utf8 WIN1256 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1257 UTF8 WIN1257 utf8_to_win utf8_and_win \ + windows_1257_to_utf8 WIN1257 UTF8 win_to_utf8 utf8_and_win \ + utf8_to_windows_1258 UTF8 WIN1258 utf8_to_win utf8_and_win \ + windows_1258_to_utf8 WIN1258 UTF8 win_to_utf8 utf8_and_win \ euc_cn_to_utf8 EUC_CN UTF8 euc_cn_to_utf8 utf8_and_euc_cn \ utf8_to_euc_cn UTF8 EUC_CN utf8_to_euc_cn utf8_and_euc_cn \ euc_jp_to_utf8 EUC_JP UTF8 euc_jp_to_utf8 utf8_and_euc_jp \ @@ -134,16 +149,9 @@ CONVERSIONS = \ utf8_to_johab UTF8 JOHAB utf8_to_johab utf8_and_johab \ sjis_to_utf8 SJIS UTF8 sjis_to_utf8 utf8_and_sjis \ utf8_to_sjis UTF8 SJIS utf8_to_sjis utf8_and_sjis \ - win1258_to_utf8 WIN1258 UTF8 win1258_to_utf8 utf8_and_win1258 \ - utf8_to_win1258 UTF8 WIN1258 utf8_to_win1258 utf8_and_win1258 \ uhc_to_utf8 UHC UTF8 uhc_to_utf8 utf8_and_uhc \ - utf8_to_uhc UTF8 UHC utf8_to_uhc utf8_and_uhc \ - utf8_to_windows_1250 UTF8 WIN1250 utf8_to_win1250 utf8_and_win1250 \ - windows_1250_to_utf8 WIN1250 UTF8 win1250_to_utf8 utf8_and_win1250 \ - utf8_to_windows_1256 UTF8 WIN1256 utf8_to_win1256 utf8_and_win1256 \ - windows_1256_to_utf8 WIN1256 UTF8 win1256_to_utf8 utf8_and_win1256 \ - utf8_to_windows_874 UTF8 WIN874 utf8_to_win874 utf8_and_win874 \ - windows_874_to_utf8 WIN874 UTF8 win874_to_utf8 utf8_and_win874 + utf8_to_uhc UTF8 UHC utf8_to_uhc utf8_and_uhc + all: $(SQLSCRIPT) @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c index efab622c94f..f732cce720e 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.12 2005/10/15 02:49:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.13 2006/02/18 16:15:22 petere Exp $ * *------------------------------------------------------------------------- */ @@ -16,24 +16,12 @@ #include "mb/pg_wchar.h" #include "../../Unicode/utf8_to_koi8r.map" #include "../../Unicode/koi8r_to_utf8.map" -#include "../../Unicode/utf8_to_win1251.map" -#include "../../Unicode/win1251_to_utf8.map" -#include "../../Unicode/utf8_to_win866.map" -#include "../../Unicode/win866_to_utf8.map" PG_FUNCTION_INFO_V1(utf8_to_koi8r); PG_FUNCTION_INFO_V1(koi8r_to_utf8); -PG_FUNCTION_INFO_V1(utf8_to_win1251); -PG_FUNCTION_INFO_V1(win1251_to_utf8); -PG_FUNCTION_INFO_V1(utf8_to_win866); -PG_FUNCTION_INFO_V1(win866_to_utf8); extern Datum utf8_to_koi8r(PG_FUNCTION_ARGS); extern Datum koi8r_to_utf8(PG_FUNCTION_ARGS); -extern Datum utf8_to_win1251(PG_FUNCTION_ARGS); -extern Datum win1251_to_utf8(PG_FUNCTION_ARGS); -extern Datum utf8_to_win866(PG_FUNCTION_ARGS); -extern Datum win866_to_utf8(PG_FUNCTION_ARGS); /* ---------- * conv_proc( @@ -57,8 +45,8 @@ utf8_to_koi8r(PG_FUNCTION_ARGS) Assert(PG_GETARG_INT32(1) == PG_KOI8R); Assert(len >= 0); - UtfToLocal(src, dest, ULmap_KOI8R, - sizeof(ULmap_KOI8R) / sizeof(pg_utf_to_local), len); + UtfToLocal(src, dest, ULmapKOI8R, + sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), len); PG_RETURN_VOID(); } @@ -80,70 +68,3 @@ koi8r_to_utf8(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -Datum -utf8_to_win1251(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_UTF8); - Assert(PG_GETARG_INT32(1) == PG_WIN1251); - Assert(len >= 0); - - UtfToLocal(src, dest, ULmap_WIN1251, - sizeof(ULmap_WIN1251) / sizeof(pg_utf_to_local), len); - - PG_RETURN_VOID(); -} - -Datum -win1251_to_utf8(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_WIN1251); - Assert(PG_GETARG_INT32(1) == PG_UTF8); - Assert(len >= 0); - - LocalToUtf(src, dest, LUmapWIN1251, - sizeof(LUmapWIN1251) / sizeof(pg_local_to_utf), PG_WIN1251, len); - - PG_RETURN_VOID(); -} - -Datum -utf8_to_win866(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_UTF8); - Assert(PG_GETARG_INT32(1) == PG_WIN866); - Assert(len >= 0); - - UtfToLocal(src, dest, ULmap_WIN866, - sizeof(ULmap_WIN866) / sizeof(pg_utf_to_local), len); - - PG_RETURN_VOID(); -} - -Datum -win866_to_utf8(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_WIN866); - Assert(PG_GETARG_INT32(1) == PG_UTF8); - Assert(len >= 0); - - LocalToUtf(src, dest, LUmapWIN866, - sizeof(LUmapWIN866) / sizeof(pg_local_to_utf), PG_WIN866, len); - - PG_RETURN_VOID(); -} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win874/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_win/Makefile similarity index 69% rename from src/backend/utils/mb/conversion_procs/utf8_and_win874/Makefile rename to src/backend/utils/mb/conversion_procs/utf8_and_win/Makefile index 0ec317ac5c1..c7c6f18cdd1 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win874/Makefile +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/Makefile @@ -1,12 +1,12 @@ #------------------------------------------------------------------------- # -# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win874/Makefile,v 1.3 2003/11/29 22:40:43 pgsql Exp $ +# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win/Makefile,v 1.1 2006/02/18 16:15:22 petere Exp $ # #------------------------------------------------------------------------- -subdir = src/backend/utils/mb/conversion_procs/utf8_and_win874 +subdir = src/backend/utils/mb/conversion_procs/utf8_and_win top_builddir = ../../../../../.. include $(top_builddir)/src/Makefile.global -NAME := utf8_and_win874 +NAME := utf8_and_win include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c new file mode 100644 index 00000000000..43f878ee0f7 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c @@ -0,0 +1,156 @@ +/*------------------------------------------------------------------------- + * + * WIN <--> UTF8 + * + * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c,v 1.1 2006/02/18 16:15:22 petere Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/utf8_to_win866.map" +#include "../../Unicode/utf8_to_win874.map" +#include "../../Unicode/utf8_to_win1250.map" +#include "../../Unicode/utf8_to_win1251.map" +#include "../../Unicode/utf8_to_win1252.map" +#include "../../Unicode/utf8_to_win1253.map" +#include "../../Unicode/utf8_to_win1254.map" +#include "../../Unicode/utf8_to_win1255.map" +#include "../../Unicode/utf8_to_win1256.map" +#include "../../Unicode/utf8_to_win1257.map" +#include "../../Unicode/utf8_to_win1258.map" +#include "../../Unicode/win866_to_utf8.map" +#include "../../Unicode/win874_to_utf8.map" +#include "../../Unicode/win1250_to_utf8.map" +#include "../../Unicode/win1251_to_utf8.map" +#include "../../Unicode/win1252_to_utf8.map" +#include "../../Unicode/win1253_to_utf8.map" +#include "../../Unicode/win1254_to_utf8.map" +#include "../../Unicode/win1255_to_utf8.map" +#include "../../Unicode/win1256_to_utf8.map" +#include "../../Unicode/win1257_to_utf8.map" +#include "../../Unicode/win1258_to_utf8.map" + +PG_FUNCTION_INFO_V1(win_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_win); + +extern Datum win_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_win(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns VOID; + * ---------- + */ + +typedef struct +{ + pg_enc encoding; + pg_local_to_utf *map1; /* to UTF8 map name */ + pg_utf_to_local *map2; /* from UTF8 map name */ + int size1; /* size of map1 */ + int size2; /* size of map2 */ +} pg_conv_map; + +static pg_conv_map maps[] = { + {PG_WIN866, LUmapWIN866, ULmapWIN866, + sizeof(LUmapWIN866) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN866) / sizeof(pg_utf_to_local)}, + {PG_WIN874, LUmapWIN874, ULmapWIN874, + sizeof(LUmapWIN874) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN874) / sizeof(pg_utf_to_local)}, + {PG_WIN1250, LUmapWIN1250, ULmapWIN1250, + sizeof(LUmapWIN1250) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1250) / sizeof(pg_utf_to_local)}, + {PG_WIN1251, LUmapWIN1251, ULmapWIN1251, + sizeof(LUmapWIN1251) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1251) / sizeof(pg_utf_to_local)}, + {PG_WIN1252, LUmapWIN1252, ULmapWIN1252, + sizeof(LUmapWIN1252) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1252) / sizeof(pg_utf_to_local)}, + {PG_WIN1253, LUmapWIN1253, ULmapWIN1253, + sizeof(LUmapWIN1253) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1253) / sizeof(pg_utf_to_local)}, + {PG_WIN1254, LUmapWIN1254, ULmapWIN1254, + sizeof(LUmapWIN1254) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1254) / sizeof(pg_utf_to_local)}, + {PG_WIN1255, LUmapWIN1255, ULmapWIN1255, + sizeof(LUmapWIN1255) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1255) / sizeof(pg_utf_to_local)}, + {PG_WIN1256, LUmapWIN1256, ULmapWIN1256, + sizeof(LUmapWIN1256) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1256) / sizeof(pg_utf_to_local)}, + {PG_WIN1257, LUmapWIN1257, ULmapWIN1257, + sizeof(LUmapWIN1257) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1257) / sizeof(pg_utf_to_local)}, + {PG_WIN1258, LUmapWIN1258, ULmapWIN1258, + sizeof(LUmapWIN1258) / sizeof(pg_local_to_utf), + sizeof(ULmapWIN1258) / sizeof(pg_utf_to_local)}, +}; + +Datum +win_to_utf8(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(0); + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + int i; + + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len >= 0); + + for (i=0;i<sizeof(maps)/sizeof(pg_conv_map);i++) + { + if (encoding == maps[i].encoding) + { + LocalToUtf(src, dest, maps[i].map1, maps[i].size1, encoding, len); + PG_RETURN_VOID(); + } + } + + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected encoding id %d for WIN charsets", encoding))); + + PG_RETURN_VOID(); +} + +Datum +utf8_to_win(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(1); + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + int i; + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(len >= 0); + + for (i=0;i<sizeof(maps)/sizeof(pg_conv_map);i++) + { + if (encoding == maps[i].encoding) + { + UtfToLocal(src, dest, maps[i].map2, maps[i].size2, len); + PG_RETURN_VOID(); + } + } + + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected encoding id %d for WIN charsets", encoding))); + + PG_RETURN_VOID(); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1250/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_win1250/Makefile deleted file mode 100644 index 67ca71d3868..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1250/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -#------------------------------------------------------------------------- -# -# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1250/Makefile,v 1.3 2003/11/29 22:40:41 pgsql Exp $ -# -#------------------------------------------------------------------------- -subdir = src/backend/utils/mb/conversion_procs/utf8_and_win1250 -top_builddir = ../../../../../.. -include $(top_builddir)/src/Makefile.global - -NAME := utf8_and_win1250 - -include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c deleted file mode 100644 index 6789ca7aaa5..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c +++ /dev/null @@ -1,69 +0,0 @@ -/*------------------------------------------------------------------------- - * - * WIN1250 and UTF8 - * - * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c,v 1.13 2005/10/15 02:49:35 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" -#include "fmgr.h" -#include "mb/pg_wchar.h" -#include "../../Unicode/utf8_to_win1250.map" -#include "../../Unicode/win1250_to_utf8.map" - -PG_FUNCTION_INFO_V1(utf8_to_win1250); -PG_FUNCTION_INFO_V1(win1250_to_utf8); - -extern Datum utf8_to_win1250(PG_FUNCTION_ARGS); -extern Datum win1250_to_utf8(PG_FUNCTION_ARGS); - -/* ---------- - * conv_proc( - * INTEGER, -- source encoding id - * INTEGER, -- destination encoding id - * CSTRING, -- source string (null terminated C string) - * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; - * ---------- - */ - -Datum -utf8_to_win1250(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_UTF8); - Assert(PG_GETARG_INT32(1) == PG_WIN1250); - Assert(len >= 0); - - UtfToLocal(src, dest, ULmapWIN1250, - sizeof(ULmapWIN1250) / sizeof(pg_utf_to_local), len); - - PG_RETURN_VOID(); -} - -Datum -win1250_to_utf8(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_WIN1250); - Assert(PG_GETARG_INT32(1) == PG_UTF8); - Assert(len >= 0); - - LocalToUtf(src, dest, LUmapWIN1250, - sizeof(LUmapWIN1250) / sizeof(pg_local_to_utf), PG_WIN1250, len); - - PG_RETURN_VOID(); -} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile deleted file mode 100644 index 06c85a390d1..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -#------------------------------------------------------------------------- -# -# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile,v 1.3 2005/03/14 18:31:22 momjian Exp $ -# -#------------------------------------------------------------------------- -subdir = src/backend/utils/mb/conversion_procs/utf8_and_win1252 -top_builddir = ../../../../../.. -include $(top_builddir)/src/Makefile.global - -NAME := utf8_and_win1252 - -include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c deleted file mode 100644 index b4d2b2375a9..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c +++ /dev/null @@ -1,69 +0,0 @@ -/*------------------------------------------------------------------------- - * - * WIN1252 and UTF8 - * - * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c,v 1.5 2005/10/15 02:49:35 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" -#include "fmgr.h" -#include "mb/pg_wchar.h" -#include "../../Unicode/utf8_to_win1252.map" -#include "../../Unicode/win1252_to_utf8.map" - -PG_FUNCTION_INFO_V1(utf8_to_win1252); -PG_FUNCTION_INFO_V1(win1252_to_utf8); - -extern Datum utf8_to_win1252(PG_FUNCTION_ARGS); -extern Datum win1252_to_utf8(PG_FUNCTION_ARGS); - -/* ---------- - * conv_proc( - * INTEGER, -- source encoding id - * INTEGER, -- destination encoding id - * CSTRING, -- source string (null terminated C string) - * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; - * ---------- - */ - -Datum -utf8_to_win1252(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_UTF8); - Assert(PG_GETARG_INT32(1) == PG_WIN1252); - Assert(len >= 0); - - UtfToLocal(src, dest, ULmapWIN1252, - sizeof(ULmapWIN1252) / sizeof(pg_utf_to_local), len); - - PG_RETURN_VOID(); -} - -Datum -win1252_to_utf8(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_WIN1252); - Assert(PG_GETARG_INT32(1) == PG_UTF8); - Assert(len >= 0); - - LocalToUtf(src, dest, LUmapWIN1252, - sizeof(LUmapWIN1252) / sizeof(pg_local_to_utf), PG_WIN1252, len); - - PG_RETURN_VOID(); -} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1256/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_win1256/Makefile deleted file mode 100644 index afefc2c3123..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1256/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -#------------------------------------------------------------------------- -# -# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1256/Makefile,v 1.3 2003/11/29 22:40:42 pgsql Exp $ -# -#------------------------------------------------------------------------- -subdir = src/backend/utils/mb/conversion_procs/utf8_and_win1256 -top_builddir = ../../../../../.. -include $(top_builddir)/src/Makefile.global - -NAME := utf8_and_win1256 - -include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c deleted file mode 100644 index d6b83d8f837..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c +++ /dev/null @@ -1,69 +0,0 @@ -/*------------------------------------------------------------------------- - * - * WIN1256 and UTF8 - * - * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c,v 1.13 2005/10/15 02:49:35 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" -#include "fmgr.h" -#include "mb/pg_wchar.h" -#include "../../Unicode/utf8_to_win1256.map" -#include "../../Unicode/win1256_to_utf8.map" - -PG_FUNCTION_INFO_V1(utf8_to_win1256); -PG_FUNCTION_INFO_V1(win1256_to_utf8); - -extern Datum utf8_to_win1256(PG_FUNCTION_ARGS); -extern Datum win1256_to_utf8(PG_FUNCTION_ARGS); - -/* ---------- - * conv_proc( - * INTEGER, -- source encoding id - * INTEGER, -- destination encoding id - * CSTRING, -- source string (null terminated C string) - * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; - * ---------- - */ - -Datum -utf8_to_win1256(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_UTF8); - Assert(PG_GETARG_INT32(1) == PG_WIN1256); - Assert(len >= 0); - - UtfToLocal(src, dest, ULmapWIN1256, - sizeof(ULmapWIN1256) / sizeof(pg_utf_to_local), len); - - PG_RETURN_VOID(); -} - -Datum -win1256_to_utf8(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_WIN1256); - Assert(PG_GETARG_INT32(1) == PG_UTF8); - Assert(len >= 0); - - LocalToUtf(src, dest, LUmapWIN1256, - sizeof(LUmapWIN1256) / sizeof(pg_local_to_utf), PG_WIN1256, len); - - PG_RETURN_VOID(); -} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1258/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_win1258/Makefile deleted file mode 100644 index 53c65f414c7..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1258/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -#------------------------------------------------------------------------- -# -# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1258/Makefile,v 1.2 2005/03/07 23:18:06 neilc Exp $ -# -#------------------------------------------------------------------------- -subdir = src/backend/utils/mb/conversion_procs/utf8_and_win1258 -top_builddir = ../../../../../.. -include $(top_builddir)/src/Makefile.global - -NAME := utf8_and_win1258 - -include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1258/utf8_and_win1258.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1258/utf8_and_win1258.c deleted file mode 100644 index 7cdcfd3c120..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1258/utf8_and_win1258.c +++ /dev/null @@ -1,68 +0,0 @@ -/*------------------------------------------------------------------------- - * - * WIN1258 <--> UTF8 - * - * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1258/utf8_and_win1258.c,v 1.3 2005/10/15 02:49:35 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" -#include "fmgr.h" -#include "mb/pg_wchar.h" -#include "../../Unicode/win1258_to_utf8.map" -#include "../../Unicode/utf8_to_win1258.map" - -PG_FUNCTION_INFO_V1(win1258_to_utf8); -PG_FUNCTION_INFO_V1(utf8_to_win1258); - -extern Datum win1258_to_utf8(PG_FUNCTION_ARGS); -extern Datum utf8_to_win1258(PG_FUNCTION_ARGS); - -/* ---------- - * conv_proc( - * INTEGER, -- source encoding id - * INTEGER, -- destination encoding id - * CSTRING, -- source string (null terminated C string) - * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; - * ---------- - */ -Datum -win1258_to_utf8(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_WIN1258); - Assert(PG_GETARG_INT32(1) == PG_UTF8); - Assert(len >= 0); - - LocalToUtf(src, dest, LUmapWIN1258, - sizeof(LUmapWIN1258) / sizeof(pg_local_to_utf), PG_WIN1258, len); - - PG_RETURN_VOID(); -} - -Datum -utf8_to_win1258(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_UTF8); - Assert(PG_GETARG_INT32(1) == PG_WIN1258); - Assert(len >= 0); - - UtfToLocal(src, dest, ULmapWIN1258, - sizeof(ULmapWIN1258) / sizeof(pg_utf_to_local), len); - - PG_RETURN_VOID(); -} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c b/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c deleted file mode 100644 index 7eda096a9be..00000000000 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c +++ /dev/null @@ -1,69 +0,0 @@ -/*------------------------------------------------------------------------- - * - * WIN874 and UTF8 - * - * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c,v 1.13 2005/10/15 02:49:35 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" -#include "fmgr.h" -#include "mb/pg_wchar.h" -#include "../../Unicode/utf8_to_win874.map" -#include "../../Unicode/win874_to_utf8.map" - -PG_FUNCTION_INFO_V1(utf8_to_win874); -PG_FUNCTION_INFO_V1(win874_to_utf8); - -extern Datum utf8_to_win874(PG_FUNCTION_ARGS); -extern Datum win874_to_utf8(PG_FUNCTION_ARGS); - -/* ---------- - * conv_proc( - * INTEGER, -- source encoding id - * INTEGER, -- destination encoding id - * CSTRING, -- source string (null terminated C string) - * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; - * ---------- - */ - -Datum -utf8_to_win874(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_UTF8); - Assert(PG_GETARG_INT32(1) == PG_WIN874); - Assert(len >= 0); - - UtfToLocal(src, dest, ULmapWIN874, - sizeof(ULmapWIN874) / sizeof(pg_utf_to_local), len); - - PG_RETURN_VOID(); -} - -Datum -win874_to_utf8(PG_FUNCTION_ARGS) -{ - unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); - unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); - int len = PG_GETARG_INT32(4); - - Assert(PG_GETARG_INT32(0) == PG_WIN874); - Assert(PG_GETARG_INT32(1) == PG_UTF8); - Assert(len >= 0); - - LocalToUtf(src, dest, LUmapWIN874, - sizeof(LUmapWIN874) / sizeof(pg_local_to_utf), PG_WIN874, len); - - PG_RETURN_VOID(); -} diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c index 45427768132..f628fcd679e 100644 --- a/src/backend/utils/mb/encnames.c +++ b/src/backend/utils/mb/encnames.c @@ -2,7 +2,7 @@ * Encoding names and routines for work with it. All * in this file is shared bedween FE and BE. * - * $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.28 2006/02/12 22:32:42 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.29 2006/02/18 16:15:22 petere Exp $ */ #ifdef FRONTEND #include "postgres_fe.h" @@ -196,9 +196,21 @@ pg_encname pg_encname_tbl[] = { "win1252", PG_WIN1252 }, /* alias for Windows-1252 */ + { + "win1253", PG_WIN1253 + }, /* alias for Windows-1253 */ + { + "win1254", PG_WIN1254 + }, /* alias for Windows-1254 */ + { + "win1255", PG_WIN1255 + }, /* alias for Windows-1255 */ { "win1256", PG_WIN1256 }, /* alias for Windows-1256 */ + { + "win1257", PG_WIN1257 + }, /* alias for Windows-1257 */ { "win1258", PG_WIN1258 }, /* alias for Windows-1258 */ @@ -229,9 +241,21 @@ pg_encname pg_encname_tbl[] = { "windows1252", PG_WIN1252 }, /* Windows-1252; Microsoft */ + { + "windows1253", PG_WIN1253 + }, /* Windows-1253; Microsoft */ + { + "windows1254", PG_WIN1254 + }, /* Windows-1254; Microsoft */ + { + "windows1255", PG_WIN1255 + }, /* Windows-1255; Microsoft */ { "windows1256", PG_WIN1256 }, /* Windows-1256; Microsoft */ + { + "windows1257", PG_WIN1257 + }, /* Windows-1257; Microsoft */ { "windows1258", PG_WIN1258 }, /* Windows-1258; Microsoft */ @@ -358,6 +382,18 @@ pg_enc2name pg_enc2name_tbl[] = { "WIN1250", PG_WIN1250 }, + { + "WIN1253", PG_WIN1253 + }, + { + "WIN1254", PG_WIN1254 + }, + { + "WIN1255", PG_WIN1255 + }, + { + "WIN1257", PG_WIN1257 + }, { "SJIS", PG_SJIS }, diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index 2f0725363cc..d996b6c826b 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multibyte streams. * Tatsuo Ishii - * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.53 2006/02/10 00:39:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.54 2006/02/18 16:15:22 petere Exp $ * * WIN1250 client encoding updated by Pavel Behal * @@ -892,11 +892,15 @@ pg_wchar_tbl pg_wchar_table[] = { {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 26; ISO-8859-7 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 27; ISO-8859-8 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 28; PG_WIN1250 */ - {0, pg_sjis_mblen, pg_sjis_dsplen, 2}, /* 29; PG_SJIS */ - {0, pg_big5_mblen, pg_big5_dsplen, 2}, /* 30; PG_BIG5 */ - {0, pg_gbk_mblen, pg_gbk_dsplen, 2}, /* 31; PG_GBK */ - {0, pg_uhc_mblen, pg_uhc_dsplen, 2}, /* 32; PG_UHC */ - {0, pg_gb18030_mblen, pg_gb18030_dsplen, 2} /* 33; PG_GB18030 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 29; PG_WIN1253 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 30; PG_WIN1254 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 31; PG_WIN1255 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 32; PG_WIN1257 */ + {0, pg_sjis_mblen, pg_sjis_dsplen, 2}, /* 33; PG_SJIS */ + {0, pg_big5_mblen, pg_big5_dsplen, 2}, /* 34; PG_BIG5 */ + {0, pg_gbk_mblen, pg_gbk_dsplen, 2}, /* 35; PG_GBK */ + {0, pg_uhc_mblen, pg_uhc_dsplen, 2}, /* 36; PG_UHC */ + {0, pg_gb18030_mblen, pg_gb18030_dsplen, 2} /* 37; PG_GB18030 */ }; /* returns the byte length of a word for mule internal code */ diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index e1c7ad30ce8..9b35151640c 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * Portions taken from FreeBSD. * - * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.109 2006/02/12 03:22:18 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.110 2006/02/18 16:15:23 petere Exp $ * *------------------------------------------------------------------------- */ @@ -779,7 +779,11 @@ struct encoding_match encoding_match_list[] = { {PG_LATIN10, "iso885916"}, {PG_WIN1252, "CP1252"}, + {PG_WIN1253, "CP1253"}, + {PG_WIN1254, "CP1254"}, + {PG_WIN1255, "CP1255"}, {PG_WIN1256, "CP1256"}, + {PG_WIN1257, "CP1257"}, {PG_WIN1258, "CP1258"}, #ifdef NOT_VERIFIED {PG_WIN874, "???"}, diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 71cb50386f0..d049f4ecfc3 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.66 2005/12/24 18:23:02 momjian Exp $ */ +/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.67 2006/02/18 16:15:23 petere Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -177,6 +177,10 @@ typedef enum pg_enc PG_ISO_8859_7, /* ISO-8859-7 */ PG_ISO_8859_8, /* ISO-8859-8 */ PG_WIN1250, /* windows-1250 */ + PG_WIN1253, /* windows-1253 */ + PG_WIN1254, /* windows-1254 */ + PG_WIN1255, /* windows-1255 */ + PG_WIN1257, /* windows-1257 */ /* PG_ENCODING_BE_LAST points to the above entry */ /* followings are for client encoding only */ @@ -189,7 +193,7 @@ typedef enum pg_enc } pg_enc; -#define PG_ENCODING_BE_LAST PG_WIN1250 +#define PG_ENCODING_BE_LAST PG_WIN1257 /* * Please use these tests before access to pg_encconv_tbl[] diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index 1130d127faa..00a1ecf6380 100644 --- a/src/test/regress/expected/conversion.out +++ b/src/test/regress/expected/conversion.out @@ -1409,7 +1409,7 @@ SELECT CONVERT('foo', 'UTF8', 'SJIS'); (1 row) -- WIN1258 --> UTF8 -SELECT CONVERT('foo' USING win1258_to_utf8); +SELECT CONVERT('foo' USING windows_1258_to_utf8); convert_using --------------- foo @@ -1422,7 +1422,7 @@ SELECT CONVERT('foo', 'WIN1258', 'UTF8'); (1 row) -- UTF8 --> WIN1258 -SELECT CONVERT('foo' USING utf8_to_win1258); +SELECT CONVERT('foo' USING utf8_to_windows_1258); convert_using --------------- foo @@ -1538,6 +1538,110 @@ SELECT CONVERT('foo', 'WIN874', 'UTF8'); foo (1 row) +-- UTF8 --> WIN1253 +SELECT CONVERT('foo' USING utf8_to_windows_1253); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'UTF8', 'WIN1253'); + convert +--------- + foo +(1 row) + +-- WIN1253 --> UTF8 +SELECT CONVERT('foo' USING windows_1253_to_utf8); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'WIN1253', 'UTF8'); + convert +--------- + foo +(1 row) + +-- UTF8 --> WIN1254 +SELECT CONVERT('foo' USING utf8_to_windows_1254); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'UTF8', 'WIN1254'); + convert +--------- + foo +(1 row) + +-- WIN1254 --> UTF8 +SELECT CONVERT('foo' USING windows_1254_to_utf8); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'WIN1254', 'UTF8'); + convert +--------- + foo +(1 row) + +-- UTF8 --> WIN1255 +SELECT CONVERT('foo' USING utf8_to_windows_1255); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'UTF8', 'WIN1255'); + convert +--------- + foo +(1 row) + +-- WIN1255 --> UTF8 +SELECT CONVERT('foo' USING windows_1255_to_utf8); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'WIN1255', 'UTF8'); + convert +--------- + foo +(1 row) + +-- UTF8 --> WIN1257 +SELECT CONVERT('foo' USING utf8_to_windows_1257); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'UTF8', 'WIN1257'); + convert +--------- + foo +(1 row) + +-- WIN1257 --> UTF8 +SELECT CONVERT('foo' USING windows_1257_to_utf8); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'WIN1257', 'UTF8'); + convert +--------- + foo +(1 row) + -- -- return to the super user -- diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index b172d764ad0..64a4ca5650c 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -346,10 +346,10 @@ SELECT CONVERT('foo', 'SJIS', 'UTF8'); SELECT CONVERT('foo' USING utf8_to_sjis); SELECT CONVERT('foo', 'UTF8', 'SJIS'); -- WIN1258 --> UTF8 -SELECT CONVERT('foo' USING win1258_to_utf8); +SELECT CONVERT('foo' USING windows_1258_to_utf8); SELECT CONVERT('foo', 'WIN1258', 'UTF8'); -- UTF8 --> WIN1258 -SELECT CONVERT('foo' USING utf8_to_win1258); +SELECT CONVERT('foo' USING utf8_to_windows_1258); SELECT CONVERT('foo', 'UTF8', 'WIN1258'); -- UHC --> UTF8 SELECT CONVERT('foo' USING uhc_to_utf8); @@ -375,6 +375,30 @@ SELECT CONVERT('foo', 'UTF8', 'WIN874'); -- WIN874 --> UTF8 SELECT CONVERT('foo' USING windows_874_to_utf8); SELECT CONVERT('foo', 'WIN874', 'UTF8'); +-- UTF8 --> WIN1253 +SELECT CONVERT('foo' USING utf8_to_windows_1253); +SELECT CONVERT('foo', 'UTF8', 'WIN1253'); +-- WIN1253 --> UTF8 +SELECT CONVERT('foo' USING windows_1253_to_utf8); +SELECT CONVERT('foo', 'WIN1253', 'UTF8'); +-- UTF8 --> WIN1254 +SELECT CONVERT('foo' USING utf8_to_windows_1254); +SELECT CONVERT('foo', 'UTF8', 'WIN1254'); +-- WIN1254 --> UTF8 +SELECT CONVERT('foo' USING windows_1254_to_utf8); +SELECT CONVERT('foo', 'WIN1254', 'UTF8'); +-- UTF8 --> WIN1255 +SELECT CONVERT('foo' USING utf8_to_windows_1255); +SELECT CONVERT('foo', 'UTF8', 'WIN1255'); +-- WIN1255 --> UTF8 +SELECT CONVERT('foo' USING windows_1255_to_utf8); +SELECT CONVERT('foo', 'WIN1255', 'UTF8'); +-- UTF8 --> WIN1257 +SELECT CONVERT('foo' USING utf8_to_windows_1257); +SELECT CONVERT('foo', 'UTF8', 'WIN1257'); +-- WIN1257 --> UTF8 +SELECT CONVERT('foo' USING windows_1257_to_utf8); +SELECT CONVERT('foo', 'WIN1257', 'UTF8'); -- -- return to the super user -- -- GitLab