diff --git a/doc/README.mb b/doc/README.mb index 4d6c3288af1030d5d178e45e9bf4a6c83c6c0e21..379622eedfcdb1ba8c34619a0036d0acd9580f32 100644 --- a/doc/README.mb +++ b/doc/README.mb @@ -1,4 +1,4 @@ -postgresql 6.4 multi-byte (MB) support README Dec 16 1998 +postgresql 6.5 multi-byte (MB) support README Jan 26 1999 Tatsuo Ishii t-ishii@sra.co.jp @@ -113,6 +113,7 @@ Supported encodings for PGCLIENTENCODING are: EUC_CN Chinese EUC EUC_KR Korean EUC EUC_TW Taiwan EUC + BIG5 Traditional chinese MULE_INTERNAL Mule internal LATIN1 ISO 8859-1 English and some European languages LATIN2 ISO 8859-2 English and some European languages @@ -169,6 +170,12 @@ Unicode: http://www.unicode.org/ 5. History +Jan 26, 1999 + * Add support Big5 for fronend encoding + (you need to create a database with EUC_TW to use Big5) + * Add regression test case for EUC_TW + (contributed by Jonah Kuo <jonahkuo@mail.ttn.com.tw>) + Dec 15, 1998 * Bugs related to SQL_ASCII support fixed diff --git a/doc/README.mb.jp b/doc/README.mb.jp index 52ae09ec48a15b5b57100e0be817ad20ad69b37f..6030613ec9026c4306feec16913ac203ff42f742 100644 --- a/doc/README.mb.jp +++ b/doc/README.mb.jp @@ -1,4 +1,4 @@ -postgresql 6.4 multi-byte (MB) support README 1998/12/16 $B:n@.(B +postgresql 6.5 multi-byte (MB) support README 1999/1/26 $B:n@.(B $B@P0fC#IW(B t-ishii@sra.co.jp @@ -130,7 +130,7 @@ initdb/createdb/create database $B$K$*$1$k%(%s%3!<%G%#%s%0$N;XDj$K$D$$$F(B $B4D6-JQ?t(B PGCLIENTENCODING $B$,@_Dj$5$l$F$$$k>l9g$O$=$NCM$,M%@h$5$l!"%P%C(B $B%/%(%s%IB&$H0[$J$k%(%s%3!<%G%#%s%0$,;HMQ$G$-$^$9!#@_Dj2DG=$J%(%s%3!<(B - $B%G%#%s%0$O!">e5-$K2C$(!"(BSJIS ($B%7%U%H(BJIS)$B$,;XDj$G$-$^$9!#(B + $B%G%#%s%0$O!">e5-$K2C$(!"(BSJIS ($B%7%U%H(BJIS)$B$H(B BiG5 $B$,;XDj$G$-$^$9!#(B $B$A$J$_$K!"(BSJIS $B$O(B JISX0201 $B$N(B 1$B%P%$%H%+%J!"$$$o$f$k!VH>3Q%+%?(B $B%+%J!W$b%5%]!<%H$7$F$$$^$9(B($B7h$7$F!VH>3Q%+%?%+%J!W$N;HMQ$r$*4+(B @@ -206,6 +206,13 @@ initdb/createdb/create database $B$K$*$1$k%(%s%3!<%G%#%s%0$N;XDj$K$D$$$F(B $B2~DjMzNr!'(B + 1999/1/26 Big5 $B%5%]!<%HDI2C(B + * Big5 $B$,%U%m%s%H%(%s%IB&$N%(%s%3!<%G%#%s%0$H$7$FMxMQ$G$-$k$h(B + $B$&$K$J$j$^$7$?!#$3$N>l9g!"%P%C%/%(%s%IB&$N%(%s%3!<%G%#%s%0$O(B + EUC_TW $B$^$?$O(B MULE_INTERNAL $B$H$7$^$9!#(B + * EUC_TW $B$N(B regression test $B%1!<%9$rDI2C(B + (contributed by Jonah Kuo <jonahkuo@mail.ttn.com.tw>) + 1998/12/16 $BK\%I%-%e%a%s%H=$@5!#(B * Makefile.custom $B$G(B MB=EUC_JP $B$J$I$H@_Dj$9$kJ}K!$O(B 6.4 $B0J9_(B $B%5%]!<%H$5$l$F$$$J$$$N$G:o=|$7$?!#(B diff --git a/src/backend/utils/mb/Makefile b/src/backend/utils/mb/Makefile index 5b7cb745add50a22bd6bceda8d67847a3dcd467e..47180e8004d6536023d8576380c7993ba462b6a5 100644 --- a/src/backend/utils/mb/Makefile +++ b/src/backend/utils/mb/Makefile @@ -4,7 +4,7 @@ # Makefile for utils/mb # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.2 1998/07/26 04:31:03 scrappy Exp $ +# $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.3 1999/02/02 18:51:21 momjian Exp $ # #------------------------------------------------------------------------- @@ -16,7 +16,8 @@ ifdef MULTIBYTE CFLAGS+= $(MBFLAGS) endif -OBJS = common.o conv.o mbutils.o wchar.o wstrcmp.o wstrncmp.o variable.o +OBJS = common.o conv.o mbutils.o wchar.o wstrcmp.o wstrncmp.o variable.o \ + big5.o all: SUBSYS.o diff --git a/src/backend/utils/mb/big5.c b/src/backend/utils/mb/big5.c new file mode 100644 index 0000000000000000000000000000000000000000..105854d24799ee59b8742442f308461871dc5745 --- /dev/null +++ b/src/backend/utils/mb/big5.c @@ -0,0 +1,347 @@ +/* + * conversion between BIG5 and Mule Internal Code(CNS 116643-1992 + * plane 1 and plane 2). + * This program is partially copied from lv(Multilingual file viewer) + * and slightly modified. lv is written and copyrighted by NARITA Tomio + * (nrt@web.ad.jp). + * + * 1999/1/15 Tatsuo Ishii + * + * $Id: big5.c,v 1.1 1999/02/02 18:51:22 momjian Exp $ + */ + +#include "mb/pg_wchar.h" + +typedef struct { + unsigned short code, peer; +} codes_t; + +/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */ +static codes_t big5Level1ToCnsPlane1[ 25 ] = { /* range */ + { 0xA140, 0x2121 }, + { 0xA1F6, 0x2258 }, + { 0xA1F7, 0x2257 }, + { 0xA1F8, 0x2259 }, + { 0xA2AF, 0x2421 }, + { 0xA3C0, 0x4221 }, + { 0xa3e1, 0x0000 }, + { 0xA440, 0x4421 }, + { 0xACFE, 0x5753 }, + { 0xacff, 0x0000 }, + { 0xAD40, 0x5323 }, + { 0xAFD0, 0x5754 }, + { 0xBBC8, 0x6B51 }, + { 0xBE52, 0x6B50 }, + { 0xBE53, 0x6F5C }, + { 0xC1AB, 0x7536 }, + { 0xC2CB, 0x7535 }, + { 0xC2CC, 0x7737 }, + { 0xC361, 0x782E }, + { 0xC3B9, 0x7865 }, + { 0xC3BA, 0x7864 }, + { 0xC3BB, 0x7866 }, + { 0xC456, 0x782D }, + { 0xC457, 0x7962 }, + { 0xc67f, 0x0000 } +}; + +/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */ +static codes_t cnsPlane1ToBig5Level1[ 26 ] = { /* range */ + { 0x2121, 0xA140 }, + { 0x2257, 0xA1F7 }, + { 0x2258, 0xA1F6 }, + { 0x2259, 0xA1F8 }, + { 0x234f, 0x0000 }, + { 0x2421, 0xA2AF }, + { 0x2571, 0x0000 }, + { 0x4221, 0xA3C0 }, + { 0x4242, 0x0000 }, + { 0x4421, 0xA440 }, + { 0x5323, 0xAD40 }, + { 0x5753, 0xACFE }, + { 0x5754, 0xAFD0 }, + { 0x6B50, 0xBE52 }, + { 0x6B51, 0xBBC8 }, + { 0x6F5C, 0xBE53 }, + { 0x7535, 0xC2CB }, + { 0x7536, 0xC1AB }, + { 0x7737, 0xC2CC }, + { 0x782D, 0xC456 }, + { 0x782E, 0xC361 }, + { 0x7864, 0xC3BA }, + { 0x7865, 0xC3B9 }, + { 0x7866, 0xC3BB }, + { 0x7962, 0xC457 }, + { 0x7d4c, 0x0000 } +}; + +/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */ +static codes_t big5Level2ToCnsPlane2[ 48 ] = { /* range */ + { 0xC940, 0x2121 }, + { 0xc94a, 0x0000 }, + { 0xC94B, 0x212B }, + { 0xC96C, 0x214D }, + { 0xC9BE, 0x214C }, + { 0xC9BF, 0x217D }, + { 0xC9ED, 0x224E }, + { 0xCAF7, 0x224D }, + { 0xCAF8, 0x2439 }, + { 0xD77A, 0x3F6A }, + { 0xD77B, 0x387E }, + { 0xDBA7, 0x3F6B }, + { 0xDDFC, 0x4176 }, + { 0xDDFD, 0x4424 }, + { 0xE8A3, 0x554C }, + { 0xE976, 0x5723 }, + { 0xEB5B, 0x5A29 }, + { 0xEBF1, 0x554B }, + { 0xEBF2, 0x5B3F }, + { 0xECDE, 0x5722 }, + { 0xECDF, 0x5C6A }, + { 0xEDAA, 0x5D75 }, + { 0xEEEB, 0x642F }, + { 0xEEEC, 0x6039 }, + { 0xF056, 0x5D74 }, + { 0xF057, 0x6243 }, + { 0xF0CB, 0x5A28 }, + { 0xF0CC, 0x6337 }, + { 0xF163, 0x6430 }, + { 0xF16B, 0x6761 }, + { 0xF16C, 0x6438 }, + { 0xF268, 0x6934 }, + { 0xF269, 0x6573 }, + { 0xF2C3, 0x664E }, + { 0xF375, 0x6762 }, + { 0xF466, 0x6935 }, + { 0xF4B5, 0x664D }, + { 0xF4B6, 0x6962 }, + { 0xF4FD, 0x6A4C }, + { 0xF663, 0x6A4B }, + { 0xF664, 0x6C52 }, + { 0xF977, 0x7167 }, + { 0xF9C4, 0x7166 }, + { 0xF9C5, 0x7234 }, + { 0xF9C6, 0x7240 }, + { 0xF9C7, 0x7235 }, + { 0xF9D2, 0x7241 }, + { 0xf9d6, 0x0000 } +}; + +/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */ +static codes_t cnsPlane2ToBig5Level2[ 49 ] = { /* range */ + { 0x2121, 0xC940 }, + { 0x212B, 0xC94B }, + { 0x214C, 0xC9BE }, + { 0x214D, 0xC96C }, + { 0x217D, 0xC9BF }, + { 0x224D, 0xCAF7 }, + { 0x224E, 0xC9ED }, + { 0x2439, 0xCAF8 }, + { 0x387E, 0xD77B }, + { 0x3F6A, 0xD77A }, + { 0x3F6B, 0xDBA7 }, + { 0x4424, 0x0000 }, + { 0x4176, 0xDDFC }, + { 0x4177, 0x0000 }, + { 0x4424, 0xDDFD }, + { 0x554B, 0xEBF1 }, + { 0x554C, 0xE8A3 }, + { 0x5722, 0xECDE }, + { 0x5723, 0xE976 }, + { 0x5A28, 0xF0CB }, + { 0x5A29, 0xEB5B }, + { 0x5B3F, 0xEBF2 }, + { 0x5C6A, 0xECDF }, + { 0x5D74, 0xF056 }, + { 0x5D75, 0xEDAA }, + { 0x6039, 0xEEEC }, + { 0x6243, 0xF057 }, + { 0x6337, 0xF0CC }, + { 0x642F, 0xEEEB }, + { 0x6430, 0xF163 }, + { 0x6438, 0xF16C }, + { 0x6573, 0xF269 }, + { 0x664D, 0xF4B5 }, + { 0x664E, 0xF2C3 }, + { 0x6761, 0xF16B }, + { 0x6762, 0xF375 }, + { 0x6934, 0xF268 }, + { 0x6935, 0xF466 }, + { 0x6962, 0xF4B6 }, + { 0x6A4B, 0xF663 }, + { 0x6A4C, 0xF4FD }, + { 0x6C52, 0xF664 }, + { 0x7166, 0xF9C4 }, + { 0x7167, 0xF977 }, + { 0x7234, 0xF9C5 }, + { 0x7235, 0xF9C7 }, + { 0x7240, 0xF9C6 }, + { 0x7241, 0xF9D2 }, + { 0x7245, 0x0000 } +}; + +/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */ +static unsigned short b1c4[][2] = { + {0xC879, 0x2123}, + {0xC87B, 0x2124}, + {0xC87D, 0x212A}, + {0xC8A2, 0x2152} +}; + +/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */ +static unsigned short b2c3[][2] = { + {0xF9D6, 0x4337}, + {0xF9D7, 0x4F50}, + {0xF9D8, 0x444E}, + {0xF9D9, 0x504A}, + {0xF9DA, 0x2C5D}, + {0xF9DB, 0x3D7E}, + {0xF9DC, 0x4B5C} +}; + +static unsigned short BinarySearchRange +(codes_t *array, int high, unsigned short code ) +{ + int low, mid, distance, tmp; + + low = 0; + mid = high >> 1; + + for( ; low <= high ; mid = ( low + high ) >> 1 ){ + if( ( array[ mid ].code <= code ) && ( array[ mid + 1 ].code > code ) ){ + if( 0 == array[ mid ].peer ) + return 0; + if( code >= 0xa140U ){ + /* big5 to cns */ + tmp = ( ( code & 0xff00 ) - ( array[ mid ].code & 0xff00 ) ) >> 8; + high = code & 0x00ff; + low = array[ mid ].code & 0x00ff; + /* + * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e, 0xa1-0xfe + * (radicals: 0x00-0x3e, 0x3f-0x9c) + * big5 radix is 0x9d. [region_low, region_high] + * We should remember big5 has two different regions (above). + * There is a bias for the distance between these regions. + * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is 1.) + * bias = - 0x22. + */ + distance = tmp * 0x9d + high - low + + ( high >= 0xa1 ? ( low >= 0xa1 ? 0 : - 0x22 ) + : ( low >= 0xa1 ? + 0x22 : 0 ) ); + /* + * NOTE: we have to convert the distance into a code point. + * The code point's low_byte is 0x21 plus mod_0x5e. + * In the first, we extract the mod_0x5e of the starting + * code point, subtracting 0x21, and add distance to it. + * Then we calculate again mod_0x5e of them, and restore + * the final codepoint, adding 0x21. + */ + tmp = ( array[ mid ].peer & 0x00ff ) + distance - 0x21; + tmp = ( array[ mid ].peer & 0xff00 ) + ( ( tmp / 0x5e ) << 8 ) + + 0x21 + tmp % 0x5e; + return tmp; + } else { + /* cns to big5 */ + tmp = ( ( code & 0xff00 ) - ( array[ mid ].code & 0xff00 ) ) >> 8; + /* + * NOTE: ISO charsets ranges between 0x21-0xfe (94charset). + * Its radix is 0x5e. But there is no distance bias like big5. + */ + distance = tmp * 0x5e + + ( (int)( code & 0x00ff ) - (int)( array[ mid ].code & 0x00ff ) ); + /* + * NOTE: Similar to big5 to cns conversion, we extract mod_0x9d and + * restore mod_0x9d into a code point. + */ + low = array[ mid ].peer & 0x00ff; + tmp = low + distance - ( low >= 0xa1 ? 0x62 : 0x40 ); + low = tmp % 0x9d; + tmp = ( array[ mid ].peer & 0xff00 ) + ( ( tmp / 0x9d ) << 8 ) + + ( low > 0x3e ? 0x62 : 0x40 ) + low; + return tmp; + } + } else if( array[ mid ].code > code ){ + high = mid - 1; + } else { + low = mid + 1; + } + } + + return 0; +} + + +unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc ) +{ + unsigned short cns = 0; + int i; + + if( big5 < 0xc940U ){ + /* level 1 */ + + for (i=0;i<sizeof(b1c4)/sizeof(unsigned short);i++) { + if (b1c4[i][0] == big5) { + *lc = LC_CNS11643_4; + return(b1c4[i][1] | 0x8080U); + } + } + + if( 0 < (cns = BinarySearchRange( big5Level1ToCnsPlane1, 23, big5 )) ) + *lc = LC_CNS11643_1; + } else if( big5 == 0xc94aU ){ + /* level 2 */ + *lc = LC_CNS11643_1; + cns = 0x4442; + } else { + /* level 2 */ + for (i=0;i<sizeof(b2c3)/sizeof(unsigned short);i++) { + if (b2c3[i][0] == big5) { + *lc = LC_CNS11643_3; + return(b2c3[i][1]); + } + } + + if( 0 < (cns = BinarySearchRange( big5Level2ToCnsPlane2, 46, big5 )) ) + *lc = LC_CNS11643_2; + } + + if( 0 == cns ){ /* no mapping Big5 to CNS 11643-1992 */ + *lc = 0; + return (unsigned short)'?'; + } + + return cns | 0x8080; +} + +unsigned short CNStoBIG5( unsigned short cns, unsigned char lc ) +{ + int i; + unsigned int big5 = 0; + + cns &= 0x7f7f; + + switch( lc ){ + case LC_CNS11643_1: + big5 = BinarySearchRange( cnsPlane1ToBig5Level1, 24, cns ); + break; + case LC_CNS11643_2: + big5 = BinarySearchRange( cnsPlane2ToBig5Level2, 47, cns ); + break; + case LC_CNS11643_3: + for (i=0;i<sizeof(b2c3)/sizeof(unsigned short);i++) { + if (b2c3[i][1] == cns) { + return(b2c3[i][0]); + } + } + break; + case LC_CNS11643_4: + for (i=0;i<sizeof(b1c4)/sizeof(unsigned short);i++) { + if (b1c4[i][1] == cns) { + return(b1c4[i][0]); + } + } + default: + break; + } + return big5; +} diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index 1a4493c8e07dc395d729e63264e0335f9dc9b7db..9aac4e96a921458cb77eab39de07b4d616901091 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -2,7 +2,7 @@ * conversion between client encoding and server internal encoding * (currently mule internal code (mic) is used) * Tatsuo Ishii - * $Id: conv.c,v 1.4 1998/12/14 04:59:58 momjian Exp $ + * $Id: conv.c,v 1.5 1999/02/02 18:51:23 momjian Exp $ */ #include <stdio.h> #include <string.h> @@ -369,6 +369,94 @@ mic2euc_tw(unsigned char *mic, unsigned char *p, int len) *p = '\0'; } +/* + * Big5 ---> MIC + */ +static void +big52mic(unsigned char *big5, unsigned char *p, int len) +{ + unsigned short c1; + unsigned short big5buf, cnsBuf; + unsigned char lc; + char bogusBuf[2]; + int i; + + while (len > 0 && (c1 = *big5++)) + { + if (c1 <= 0x007fU) { /* ASCII */ + len--; + *p++ = c1; + } else { + len -= 2; + big5buf = c1 << 8; + c1 = *big5++; + big5buf |= c1; + cnsBuf = BIG5toCNS(big5buf, &lc); + if (lc != 0) { + if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4) { + *p++ = 0x9d; /* LCPRV2 */ + } + *p++ = lc; /* Plane No. */ + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } else { /* cannot convert */ + big5 -= 2; + *p++ = '('; + for (i=0;i<2;i++) { + sprintf(bogusBuf,"%02x",*big5++); + *p++ = bogusBuf[0]; + *p++ = bogusBuf[1]; + } + *p++ = ')'; + } + } + } + *p = '\0'; +} + +/* + * MIC ---> Big5 + */ +static void +mic2big5(unsigned char *mic, unsigned char *p, int len) +{ + int l; + unsigned short c1; + unsigned short big5buf, cnsBuf; + + while (len > 0 && (c1 = *mic)) + { + l = pg_mic_mblen(mic++); + len -= l; + + /* 0x9d means LCPRV2 */ + if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d) + { + if (c1 == 0x9d) { + c1 = *mic++; /* get plane no. */ + } + cnsBuf = (*mic++)<<8; + cnsBuf |= (*mic++) & 0x00ff; + big5buf = CNStoBIG5(cnsBuf, c1); + if (big5buf == 0) { /* cannot convert to Big5! */ + mic -= l; + printBogusChar(&mic, &p); + } else { + *p++ = (big5buf >> 8) & 0x00ff; + *p++ = big5buf & 0x00ff; + } + } + else if (c1 <= 0x7f) /* ASCII */ + { + *p++ = c1; + } else { /* cannot convert to Big5! */ + mic--; + printBogusChar(&mic, &p); + } + } + *p = '\0'; +} + /* * LATINn ---> MIC */ @@ -514,5 +602,6 @@ pg_encoding_conv_tbl pg_conv_tbl[] = { {LATIN4, "LATIN4", 0, latin42mic, mic2latin4}, /* ISO 8859 Latin 4 */ {LATIN5, "LATIN5", 0, latin52mic, mic2latin5}, /* ISO 8859 Latin 5 */ {SJIS, "SJIS", 1, sjis2mic, mic2sjis}, /* SJIS */ + {BIG5, "BIG5", 1, big52mic, mic2big5}, /* Big5 */ {-1, "", 0, 0, 0} /* end mark */ }; diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index e359a53df3d4350b2c673d52f39a00daf6dc7548..f354ccc96a24ff63135167ad62c9b552e6e23019 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -3,14 +3,14 @@ * client encoding and server internal encoding. * (currently mule internal code (mic) is used) * Tatsuo Ishii - * $Id: mbutils.c,v 1.4 1998/09/25 01:46:23 momjian Exp $ */ + * $Id: mbutils.c,v 1.5 1999/02/02 18:51:23 momjian Exp $ */ #include <stdio.h> #include <string.h> #include "mb/pg_wchar.h" -static client_encoding = -1; +static int client_encoding = -1; static void (*client_to_mic) ();/* something to MIC */ static void (*client_from_mic) (); /* MIC to something */ static void (*server_to_mic) ();/* something to MIC */ diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index edd786f29f80470f789a3a203b59c0c514097155..2a1141fbadbf776b58f4f0a7c15b2baf2f41bf68 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multi-byte streams. * Tatsuo Ishii - * $Id: wchar.c,v 1.4 1998/09/01 04:33:26 momjian Exp $ + * $Id: wchar.c,v 1.5 1999/02/02 18:51:23 momjian Exp $ */ #include "mb/pg_wchar.h" @@ -396,6 +396,25 @@ pg_sjis_mblen(const unsigned char *s) return (len); } +/* + * Big5 + */ +static int +pg_big5_mblen(const unsigned char *s) +{ + int len; + + if (*s > 0x7f) + { /* kanji? */ + len = 2; + } + else + { /* should be ASCII */ + len = 1; + } + return (len); +} + pg_wchar_tbl pg_wchar_table[] = { {pg_ascii2wchar_with_len, pg_ascii_mblen}, {pg_eucjp2wchar_with_len, pg_eucjp_mblen}, @@ -429,7 +448,8 @@ pg_wchar_tbl pg_wchar_table[] = { {0, 0}, {0, 0}, {0, 0}, - {0, pg_sjis_mblen} + {0, pg_sjis_mblen}, + {0, pg_big5_mblen} }; /* returns the byte length of a word for mule internal code */ diff --git a/src/bin/psql/psqlHelp.h b/src/bin/psql/psqlHelp.h index ab0696d1ae44f82a4146842e715a6a8e0c33fff8..570f0226e344b1fc8a4230a7b79ae49ec3777d4c 100644 --- a/src/bin/psql/psqlHelp.h +++ b/src/bin/psql/psqlHelp.h @@ -5,7 +5,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: psqlHelp.h,v 1.57 1999/02/02 18:41:17 momjian Exp $ + * $Id: psqlHelp.h,v 1.58 1999/02/02 18:51:24 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -330,7 +330,7 @@ static struct _helpStruct QL_HELP[] = { set GEQO TO 'ON[=#]'|'OFF'\n\ set R_PLANS TO 'ON'|'OFF'\n\ set QUERY_LIMIT TO #\n\ -set CLIENT_ENCODING TO 'EUC_JP'|'SJIS'|'EUC_CN'|'EUC_KR'|'EUC_TW'|'MULE_INTERNAL'|'LATIN1'|'LATIN2'|'LATIN3'|'LATIN4'|'LATIN5'"}, +set CLIENT_ENCODING TO 'EUC_JP'|'SJIS'|'EUC_CN'|'EUC_KR'|'EUC_TW'|'BIG5'|'MULE_INTERNAL'|'LATIN1'|'LATIN2'|'LATIN3'|'LATIN4'|'LATIN5'"}, #else "\ \tSET DateStyle TO 'ISO'|'SQL'|'Postgres'|'European'|'US'|'NonEuropean'\n\ diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 62bbe60cb1623b36311f09407200184fcc5c9335..889606361180fcc3442b3de2d18cecf37003a801 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -1,4 +1,4 @@ -/* $Id: pg_wchar.h,v 1.5 1998/09/25 01:46:25 momjian Exp $ */ +/* $Id: pg_wchar.h,v 1.6 1999/02/02 18:51:25 momjian Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -11,7 +11,7 @@ #define EUC_JP 1 /* EUC for Japanese */ #define EUC_CN 2 /* EUC for Chinese */ #define EUC_KR 3 /* EUC for Korean */ -#define EUC_TW 3 /* EUC for Taiwan */ +#define EUC_TW 4 /* EUC for Taiwan */ #define UNICODE 5 /* Unicode UTF-8 */ #define MULE_INTERNAL 6 /* Mule internal code */ #define LATIN1 7 /* ISO-8859 Latin 1 */ @@ -25,6 +25,7 @@ #define LATIN9 15 /* ISO-8859 Latin 9 */ /* followings are for client encoding only */ #define SJIS 32 /* Shift JIS */ +#define BIG5 33 /* Big5 */ #ifdef MULTIBYTE typedef unsigned int pg_wchar; @@ -122,6 +123,8 @@ extern int GetDatabaseEncoding(void); extern void SetDatabaseEncoding(int); extern void SetTemplateEncoding(int); extern int GetTemplateEncoding(void); +extern unsigned short BIG5toCNS(unsigned short, unsigned char *); +extern unsigned short CNStoBIG5(unsigned short, unsigned char); #endif /* MULTIBYTE */ diff --git a/src/interfaces/libpq/Makefile.in b/src/interfaces/libpq/Makefile.in index 471fda1773df1258e5a8ebbc142bd5399e6a0754..b5ad798affcdbcb2fff131ca1025fcea3e31956f 100644 --- a/src/interfaces/libpq/Makefile.in +++ b/src/interfaces/libpq/Makefile.in @@ -6,7 +6,7 @@ # Copyright (c) 1994, Regents of the University of California # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/interfaces/libpq/Attic/Makefile.in,v 1.42 1999/01/17 06:19:34 momjian Exp $ +# $Header: /cvsroot/pgsql/src/interfaces/libpq/Attic/Makefile.in,v 1.43 1999/02/02 18:51:29 momjian Exp $ # #------------------------------------------------------------------------- @@ -31,7 +31,7 @@ OBJS= fe-auth.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-lobj.o \ dllist.o pqsignal.o ifdef MULTIBYTE -OBJS+= common.o wchar.o conv.o +OBJS+= common.o wchar.o conv.o big5.o endif # If crypt is a separate library, rather than part of libc, @@ -62,6 +62,9 @@ wchar.c: $(SRCDIR)/backend/utils/mb/wchar.c conv.c: $(SRCDIR)/backend/utils/mb/conv.c -$(LN_S) $(SRCDIR)/backend/utils/mb/conv.c . + +big5.c: $(SRCDIR)/backend/utils/mb/big5.c + -$(LN_S) $(SRCDIR)/backend/utils/mb/big5.c . endif # The following rules cause dependencies in the backend directory to @@ -142,7 +145,7 @@ beforeinstall-headers: .PHONY: clean clean: rm -f libpq.a $(shlib) $(OBJS) - rm -f dllist.c common.c wchar.c conv.c + rm -f dllist.c common.c wchar.c conv.c big5.c ifeq ($(PORTNAME), win) rm -f pq.def endif diff --git a/src/test/mb/expected/big5.out b/src/test/mb/expected/big5.out new file mode 100644 index 0000000000000000000000000000000000000000..0ca86167ece0fd667b31cd492b3fe7d3741df251 --- /dev/null +++ b/src/test/mb/expected/big5.out @@ -0,0 +1,84 @@ +QUERY: drop table �t�Ӹ��; +QUERY: create table �t�Ӹ�� (��~�O text, ���q���Y varchar, �a�} varchar(16)); +QUERY: create index �t�Ӹ��index1 on �t�Ӹ�� using btree (��~�O); +QUERY: create index �t�Ӹ��index2 on �t�Ӹ�� using hash (���q���Y); +QUERY: insert into �t�Ӹ�� values ('�q���~', '�F�F���', '�_A01��'); +QUERY: insert into �t�Ӹ�� values ('�s�y�~', '�]���������q', '��B10��'); +QUERY: insert into �t�Ӹ�� values ('�\���~', '�����ѥ��������q', '��Z01�E'); +QUERY: vacuum �t�Ӹ��; +QUERY: select * from �t�Ӹ��; +��~�O|���q���Y |�a�} +------+----------------+------- +�q���~|�F�F��� |�_A01�� +�s�y�~|�]���������q |��B10�� +�\���~|�����ѥ��������q|��Z01�E +(3 rows) + +QUERY: select * from �t�Ӹ�� where �a�} = '��Z01�E'; +��~�O|���q���Y |�a�} +------+----------------+------- +�\���~|�����ѥ��������q|��Z01�E +(1 row) + +QUERY: select * from �t�Ӹ�� where �a�} ~* '��z01�E'; +��~�O|���q���Y |�a�} +------+----------------+------- +�\���~|�����ѥ��������q|��Z01�E +(1 row) + +QUERY: select * from �t�Ӹ�� where �a�} like '_Z01_'; +��~�O|���q���Y |�a�} +------+----------------+------- +�\���~|�����ѥ��������q|��Z01�E +(1 row) + +QUERY: select * from �t�Ӹ�� where �a�} like '_Z%'; +��~�O|���q���Y |�a�} +------+----------------+------- +�\���~|�����ѥ��������q|��Z01�E +(1 row) + +QUERY: select * from �t�Ӹ�� where ���q���Y ~ '�F�F��[�H�O��]'; +��~�O|���q���Y|�a�} +------+--------+------- +�q���~|�F�F���|�_A01�� +(1 row) + +QUERY: select * from �t�Ӹ�� where ���q���Y ~* '�F�F��[�H�O��]'; +��~�O|���q���Y|�a�} +------+--------+------- +�q���~|�F�F���|�_A01�� +(1 row) + +QUERY: select *, character_length(��~�O) from �t�Ӹ��; +��~�O|���q���Y |�a�} |length +------+----------------+-------+------ +�q���~|�F�F��� |�_A01��| 3 +�s�y�~|�]���������q |��B10��| 3 +�\���~|�����ѥ��������q|��Z01�E| 3 +(3 rows) + +QUERY: select *, octet_length(��~�O) from �t�Ӹ��; +��~�O|���q���Y |�a�} |octet_length +------+----------------+-------+------------ +�q���~|�F�F��� |�_A01��| 6 +�s�y�~|�]���������q |��B10��| 6 +�\���~|�����ѥ��������q|��Z01�E| 6 +(3 rows) + +QUERY: select *, position('����' in ���q���Y) from �t�Ӹ��; +��~�O|���q���Y |�a�} |strpos +------+----------------+-------+------ +�q���~|�F�F��� |�_A01��| 0 +�s�y�~|�]���������q |��B10��| 3 +�\���~|�����ѥ��������q|��Z01�E| 5 +(3 rows) + +QUERY: select *, substring(���q���Y from 3 for 6 ) from �t�Ӹ��; +��~�O|���q���Y |�a�} |substr +------+----------------+-------+------------ +�q���~|�F�F��� |�_A01��|��� +�s�y�~|�]���������q |��B10��|�������q +�\���~|�����ѥ��������q|��Z01�E|�ѥ��������q +(3 rows) + diff --git a/src/test/mb/mbregress.sh b/src/test/mb/mbregress.sh index 2ef2eb04124a1cbb6bac7f92c0b66dfd946526b0..9216d3c58be05a763d62ad64baa32ac7e67649e2 100644 --- a/src/test/mb/mbregress.sh +++ b/src/test/mb/mbregress.sh @@ -1,5 +1,5 @@ #! /bin/sh -# $Header: /cvsroot/pgsql/src/test/mb/mbregress.sh,v 1.2 1998/07/26 04:31:38 scrappy Exp $ +# $Header: /cvsroot/pgsql/src/test/mb/mbregress.sh,v 1.3 1999/02/02 18:51:32 momjian Exp $ if echo '\c' | grep -s c >/dev/null 2>&1 then @@ -15,7 +15,7 @@ if [ ! -d results ];then fi PSQL="psql -n -e -q" -tests="euc_jp sjis euc_kr euc_cn unicode mule_internal" +tests="euc_jp sjis euc_kr euc_cn euc_tw big5 unicode mule_internal" unset PGCLIENTENCODING for i in $tests do @@ -26,6 +26,11 @@ do export PGCLIENTENCODING $PSQL euc_jp < sql/sjis.sql > results/sjis.out 2>&1 unset PGCLIENTENCODING + elif [ $i = big5 ];then + PGCLIENTENCODING=BIG5 + export PGCLIENTENCODING + $PSQL euc_tw < sql/big5.sql > results/big5.out 2>&1 + unset PGCLIENTENCODING else destroydb $i >/dev/null 2>&1 createdb -E `echo $i|tr "[a-z]" "[A-Z]"` $i diff --git a/src/test/mb/sql/big5.sql b/src/test/mb/sql/big5.sql new file mode 100644 index 0000000000000000000000000000000000000000..4e2d10019962d0b902f1f6d97c51551afdd4a9cf --- /dev/null +++ b/src/test/mb/sql/big5.sql @@ -0,0 +1,20 @@ +drop table �t�Ӹ��; +create table �t�Ӹ�� (��~�O text, ���q���Y varchar, �a�} varchar(16)); +create index �t�Ӹ��index1 on �t�Ӹ�� using btree (��~�O); +create index �t�Ӹ��index2 on �t�Ӹ�� using hash (���q���Y); +insert into �t�Ӹ�� values ('�q���~', '�F�F���', '�_A01��'); +insert into �t�Ӹ�� values ('�s�y�~', '�]���������q', '��B10��'); +insert into �t�Ӹ�� values ('�\���~', '�����ѥ��������q', '��Z01�E'); +vacuum �t�Ӹ��; +select * from �t�Ӹ��; +select * from �t�Ӹ�� where �a�} = '��Z01�E'; +select * from �t�Ӹ�� where �a�} ~* '��z01�E'; +select * from �t�Ӹ�� where �a�} like '_Z01_'; +select * from �t�Ӹ�� where �a�} like '_Z%'; +select * from �t�Ӹ�� where ���q���Y ~ '�F�F��[�H�O��]'; +select * from �t�Ӹ�� where ���q���Y ~* '�F�F��[�H�O��]'; + +select *, character_length(��~�O) from �t�Ӹ��; +select *, octet_length(��~�O) from �t�Ӹ��; +select *, position('����' in ���q���Y) from �t�Ӹ��; +select *, substring(���q���Y from 3 for 6 ) from �t�Ӹ��;