KOI8-U support (was Re: [BUGS] create database warning)
От | Peter Eisentraut |
---|---|
Тема | KOI8-U support (was Re: [BUGS] create database warning) |
Дата | |
Msg-id | 498C7569.3060204@gmx.net обсуждение исходный текст |
Ответы |
Re: KOI8-U support (was Re: [BUGS] create database warning)
|
Список | pgsql-hackers |
Peter Eisentraut wrote: > Mykola Stryebkov wrote: >> template1=# create database inf with encoding 'windows-1251'; >> WARNING: could not determine encoding for locale "uk_UA.KOI8-U": >> codeset is "KOI8-U" >> DETAIL: Please report this to <pgsql-bugs@postgresql.org>. >> CREATE DATABASE >> template1=# >> >> What's wrong with it? > > PostgreSQL does not support the KOI8-U encoding. You could try to use > uk_UA.utf8. We get these complaints about missing KOI8-U support once in a while. Attached is a patch to add KOI8-U support with UTF-8 conversion. It should be enough to help this class of users, but more fancy features such as conversion between KOI8U and KOI8R or KOI8U and various WIN encodings is not there yet. I propose to add this patch to PostgreSQL 8.4. Mykola, if you have a 8.3 or 8.4 source code lying around, please test this patch. diff -Nur ../cvs-pgsql/doc/src/sgml/charset.sgml ./doc/src/sgml/charset.sgml --- ../cvs-pgsql/doc/src/sgml/charset.sgml 2008-10-02 10:50:13.000000000 +0300 +++ ./doc/src/sgml/charset.sgml 2009-02-06 18:26:09.000000000 +0200 @@ -457,12 +457,20 @@ <entry></entry> </row> <row> - <entry><literal>KOI8</literal></entry> - <entry><acronym>KOI</acronym>8-R(U)</entry> - <entry>Cyrillic</entry> + <entry><literal>KOI8R</literal></entry> + <entry><acronym>KOI</acronym>8-R</entry> + <entry>Cyrillic (Russian)</entry> <entry>Yes</entry> <entry>1</entry> - <entry><literal>KOI8R</></entry> + <entry><literal>KOI8</></entry> + </row> + <row> + <entry><literal>KOI8U</literal></entry> + <entry><acronym>KOI</acronym>8-U</entry> + <entry>Cyrillic (Ukrainian)</entry> + <entry>Yes</entry> + <entry>1</entry> + <entry></entry> </row> <row> <entry><literal>LATIN1</literal></entry> diff -Nur ../cvs-pgsql/src/backend/utils/mb/conversion_procs/Makefile ./src/backend/utils/mb/conversion_procs/Makefile --- ../cvs-pgsql/src/backend/utils/mb/conversion_procs/Makefile 2008-08-23 23:31:37.000000000 +0300 +++ ./src/backend/utils/mb/conversion_procs/Makefile 2009-02-06 19:03:20.000000000 +0200 @@ -84,6 +84,8 @@ utf8_to_big5 UTF8 BIG5 utf8_to_big5 utf8_and_big5 \ utf8_to_koi8_r UTF8 KOI8R utf8_to_koi8r utf8_and_cyrillic \ koi8_r_to_utf8 KOI8R UTF8 koi8r_to_utf8 utf8_and_cyrillic \ + utf8_to_koi8_u UTF8 KOI8U utf8_to_koi8u utf8_and_cyrillic \ + koi8_u_to_utf8 KOI8U UTF8 koi8u_to_utf8 utf8_and_cyrillic \ utf8_to_windows_866 UTF8 WIN866 utf8_to_win utf8_and_win \ windows_866_to_utf8 WIN866 UTF8 win_to_utf8 utf8_and_win \ utf8_to_windows_874 UTF8 WIN874 utf8_to_win utf8_and_win \ diff -Nur ../cvs-pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c ./src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c --- ../cvs-pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c 2009-01-30 10:35:27.000000000+0200 +++ ./src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c 2009-02-06 19:20:06.000000000 +0200 @@ -16,15 +16,23 @@ #include "mb/pg_wchar.h" #include "../../Unicode/utf8_to_koi8r.map" #include "../../Unicode/koi8r_to_utf8.map" +#include "../../Unicode/utf8_to_koi8u.map" +#include "../../Unicode/koi8u_to_utf8.map" PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(utf8_to_koi8r); PG_FUNCTION_INFO_V1(koi8r_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_koi8u); +PG_FUNCTION_INFO_V1(koi8u_to_utf8); + extern Datum utf8_to_koi8r(PG_FUNCTION_ARGS); extern Datum koi8r_to_utf8(PG_FUNCTION_ARGS); +extern Datum utf8_to_koi8u(PG_FUNCTION_ARGS); +extern Datum koi8u_to_utf8(PG_FUNCTION_ARGS); + /* ---------- * conv_proc( * INTEGER, -- source encoding id @@ -65,3 +73,33 @@ PG_RETURN_VOID(); } + +Datum +utf8_to_koi8u(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U); + + UtfToLocal(src, dest, ULmapKOI8U, NULL, + sizeof(ULmapKOI8U) / sizeof(pg_utf_to_local), 0, PG_KOI8U, len); + + PG_RETURN_VOID(); +} + +Datum +koi8u_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8); + + LocalToUtf(src, dest, LUmapKOI8U, NULL, + sizeof(LUmapKOI8U) / sizeof(pg_local_to_utf), 0, PG_KOI8U, len); + + PG_RETURN_VOID(); +} diff -Nur ../cvs-pgsql/src/backend/utils/mb/encnames.c ./src/backend/utils/mb/encnames.c --- ../cvs-pgsql/src/backend/utils/mb/encnames.c 2007-11-15 23:14:40.000000000 +0200 +++ ./src/backend/utils/mb/encnames.c 2009-02-06 18:18:28.000000000 +0200 @@ -123,6 +123,9 @@ "koi8r", PG_KOI8R }, /* KOI8-R; RFC1489 */ { + "koi8u", PG_KOI8U + }, /* KOI8-U; RFC2319 */ + { "latin1", PG_LATIN1 }, /* alias for ISO-8859-1 */ { @@ -366,7 +369,7 @@ "WIN874", PG_WIN874 }, { - "KOI8", PG_KOI8R + "KOI8R", PG_KOI8R }, { "WIN1251", PG_WIN1251 @@ -402,6 +405,9 @@ "WIN1257", PG_WIN1257 }, { + "KOI8U", PG_KOI8U + }, + { "SJIS", PG_SJIS }, { diff -Nur ../cvs-pgsql/src/backend/utils/mb/Unicode/koi8u_to_utf8.map ./src/backend/utils/mb/Unicode/koi8u_to_utf8.map --- ../cvs-pgsql/src/backend/utils/mb/Unicode/koi8u_to_utf8.map 1970-01-01 02:00:00.000000000 +0200 +++ ./src/backend/utils/mb/Unicode/koi8u_to_utf8.map 2009-02-06 19:26:41.000000000 +0200 @@ -0,0 +1,130 @@ +static pg_local_to_utf LUmapKOI8U[ 128 ] = { + {0x0080, 0xe29480}, + {0x0081, 0xe29482}, + {0x0082, 0xe2948c}, + {0x0083, 0xe29490}, + {0x0084, 0xe29494}, + {0x0085, 0xe29498}, + {0x0086, 0xe2949c}, + {0x0087, 0xe294a4}, + {0x0088, 0xe294ac}, + {0x0089, 0xe294b4}, + {0x008a, 0xe294bc}, + {0x008b, 0xe29680}, + {0x008c, 0xe29684}, + {0x008d, 0xe29688}, + {0x008e, 0xe2968c}, + {0x008f, 0xe29690}, + {0x0090, 0xe29691}, + {0x0091, 0xe29692}, + {0x0092, 0xe29693}, + {0x0093, 0xe28ca0}, + {0x0094, 0xe296a0}, + {0x0095, 0xe28899}, + {0x0096, 0xe2889a}, + {0x0097, 0xe28988}, + {0x0098, 0xe289a4}, + {0x0099, 0xe289a5}, + {0x009a, 0xc2a0}, + {0x009b, 0xe28ca1}, + {0x009c, 0xc2b0}, + {0x009d, 0xc2b2}, + {0x009e, 0xc2b7}, + {0x009f, 0xc3b7}, + {0x00a0, 0xe29590}, + {0x00a1, 0xe29591}, + {0x00a2, 0xe29592}, + {0x00a3, 0xd191}, + {0x00a4, 0xd194}, + {0x00a5, 0xe29594}, + {0x00a6, 0xd196}, + {0x00a7, 0xd197}, + {0x00a8, 0xe29597}, + {0x00a9, 0xe29598}, + {0x00aa, 0xe29599}, + {0x00ab, 0xe2959a}, + {0x00ac, 0xe2959b}, + {0x00ad, 0xd291}, + {0x00ae, 0xe2959d}, + {0x00af, 0xe2959e}, + {0x00b0, 0xe2959f}, + {0x00b1, 0xe295a0}, + {0x00b2, 0xe295a1}, + {0x00b3, 0xd081}, + {0x00b4, 0xd084}, + {0x00b5, 0xe295a3}, + {0x00b6, 0xd086}, + {0x00b7, 0xd087}, + {0x00b8, 0xe295a6}, + {0x00b9, 0xe295a7}, + {0x00ba, 0xe295a8}, + {0x00bb, 0xe295a9}, + {0x00bc, 0xe295aa}, + {0x00bd, 0xd290}, + {0x00be, 0xe295ac}, + {0x00bf, 0xc2a9}, + {0x00c0, 0xd18e}, + {0x00c1, 0xd0b0}, + {0x00c2, 0xd0b1}, + {0x00c3, 0xd186}, + {0x00c4, 0xd0b4}, + {0x00c5, 0xd0b5}, + {0x00c6, 0xd184}, + {0x00c7, 0xd0b3}, + {0x00c8, 0xd185}, + {0x00c9, 0xd0b8}, + {0x00ca, 0xd0b9}, + {0x00cb, 0xd0ba}, + {0x00cc, 0xd0bb}, + {0x00cd, 0xd0bc}, + {0x00ce, 0xd0bd}, + {0x00cf, 0xd0be}, + {0x00d0, 0xd0bf}, + {0x00d1, 0xd18f}, + {0x00d2, 0xd180}, + {0x00d3, 0xd181}, + {0x00d4, 0xd182}, + {0x00d5, 0xd183}, + {0x00d6, 0xd0b6}, + {0x00d7, 0xd0b2}, + {0x00d8, 0xd18c}, + {0x00d9, 0xd18b}, + {0x00da, 0xd0b7}, + {0x00db, 0xd188}, + {0x00dc, 0xd18d}, + {0x00dd, 0xd189}, + {0x00de, 0xd187}, + {0x00df, 0xd18a}, + {0x00e0, 0xd0ae}, + {0x00e1, 0xd090}, + {0x00e2, 0xd091}, + {0x00e3, 0xd0a6}, + {0x00e4, 0xd094}, + {0x00e5, 0xd095}, + {0x00e6, 0xd0a4}, + {0x00e7, 0xd093}, + {0x00e8, 0xd0a5}, + {0x00e9, 0xd098}, + {0x00ea, 0xd099}, + {0x00eb, 0xd09a}, + {0x00ec, 0xd09b}, + {0x00ed, 0xd09c}, + {0x00ee, 0xd09d}, + {0x00ef, 0xd09e}, + {0x00f0, 0xd09f}, + {0x00f1, 0xd0af}, + {0x00f2, 0xd0a0}, + {0x00f3, 0xd0a1}, + {0x00f4, 0xd0a2}, + {0x00f5, 0xd0a3}, + {0x00f6, 0xd096}, + {0x00f7, 0xd092}, + {0x00f8, 0xd0ac}, + {0x00f9, 0xd0ab}, + {0x00fa, 0xd097}, + {0x00fb, 0xd0a8}, + {0x00fc, 0xd0ad}, + {0x00fd, 0xd0a9}, + {0x00fe, 0xd0a7}, + {0x00ff, 0xd0aa} +}; diff -Nur ../cvs-pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl ./src/backend/utils/mb/Unicode/UCS_to_most.pl --- ../cvs-pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl 2009-01-19 09:59:45.000000000 +0200 +++ ./src/backend/utils/mb/Unicode/UCS_to_most.pl 2009-02-06 18:52:13.000000000 +0200 @@ -43,6 +43,7 @@ 'ISO8859_15' => '8859-15.TXT', 'ISO8859_16' => '8859-16.TXT', 'KOI8R' => 'KOI8-R.TXT', + 'KOI8U' => 'KOI8-U.TXT', 'GBK' => 'CP936.TXT', 'UHC' => 'CP949.TXT', 'JOHAB' => 'JOHAB.TXT', @@ -50,6 +51,7 @@ ); @charsets = keys(filename); +@charsets = @ARGV if scalar(@ARGV); foreach $charset (@charsets) { # diff -Nur ../cvs-pgsql/src/backend/utils/mb/Unicode/utf8_to_koi8u.map ./src/backend/utils/mb/Unicode/utf8_to_koi8u.map --- ../cvs-pgsql/src/backend/utils/mb/Unicode/utf8_to_koi8u.map 1970-01-01 02:00:00.000000000 +0200 +++ ./src/backend/utils/mb/Unicode/utf8_to_koi8u.map 2009-02-06 18:52:18.000000000 +0200 @@ -0,0 +1,130 @@ +static pg_utf_to_local ULmapKOI8U[ 128 ] = { + {0xc2a0, 0x009a}, + {0xc2a9, 0x00bf}, + {0xc2b0, 0x009c}, + {0xc2b2, 0x009d}, + {0xc2b7, 0x009e}, + {0xc3b7, 0x009f}, + {0xd081, 0x00b3}, + {0xd084, 0x00b4}, + {0xd086, 0x00b6}, + {0xd087, 0x00b7}, + {0xd090, 0x00e1}, + {0xd091, 0x00e2}, + {0xd092, 0x00f7}, + {0xd093, 0x00e7}, + {0xd094, 0x00e4}, + {0xd095, 0x00e5}, + {0xd096, 0x00f6}, + {0xd097, 0x00fa}, + {0xd098, 0x00e9}, + {0xd099, 0x00ea}, + {0xd09a, 0x00eb}, + {0xd09b, 0x00ec}, + {0xd09c, 0x00ed}, + {0xd09d, 0x00ee}, + {0xd09e, 0x00ef}, + {0xd09f, 0x00f0}, + {0xd0a0, 0x00f2}, + {0xd0a1, 0x00f3}, + {0xd0a2, 0x00f4}, + {0xd0a3, 0x00f5}, + {0xd0a4, 0x00e6}, + {0xd0a5, 0x00e8}, + {0xd0a6, 0x00e3}, + {0xd0a7, 0x00fe}, + {0xd0a8, 0x00fb}, + {0xd0a9, 0x00fd}, + {0xd0aa, 0x00ff}, + {0xd0ab, 0x00f9}, + {0xd0ac, 0x00f8}, + {0xd0ad, 0x00fc}, + {0xd0ae, 0x00e0}, + {0xd0af, 0x00f1}, + {0xd0b0, 0x00c1}, + {0xd0b1, 0x00c2}, + {0xd0b2, 0x00d7}, + {0xd0b3, 0x00c7}, + {0xd0b4, 0x00c4}, + {0xd0b5, 0x00c5}, + {0xd0b6, 0x00d6}, + {0xd0b7, 0x00da}, + {0xd0b8, 0x00c9}, + {0xd0b9, 0x00ca}, + {0xd0ba, 0x00cb}, + {0xd0bb, 0x00cc}, + {0xd0bc, 0x00cd}, + {0xd0bd, 0x00ce}, + {0xd0be, 0x00cf}, + {0xd0bf, 0x00d0}, + {0xd180, 0x00d2}, + {0xd181, 0x00d3}, + {0xd182, 0x00d4}, + {0xd183, 0x00d5}, + {0xd184, 0x00c6}, + {0xd185, 0x00c8}, + {0xd186, 0x00c3}, + {0xd187, 0x00de}, + {0xd188, 0x00db}, + {0xd189, 0x00dd}, + {0xd18a, 0x00df}, + {0xd18b, 0x00d9}, + {0xd18c, 0x00d8}, + {0xd18d, 0x00dc}, + {0xd18e, 0x00c0}, + {0xd18f, 0x00d1}, + {0xd191, 0x00a3}, + {0xd194, 0x00a4}, + {0xd196, 0x00a6}, + {0xd197, 0x00a7}, + {0xd290, 0x00bd}, + {0xd291, 0x00ad}, + {0xe28899, 0x0095}, + {0xe2889a, 0x0096}, + {0xe28988, 0x0097}, + {0xe289a4, 0x0098}, + {0xe289a5, 0x0099}, + {0xe28ca0, 0x0093}, + {0xe28ca1, 0x009b}, + {0xe29480, 0x0080}, + {0xe29482, 0x0081}, + {0xe2948c, 0x0082}, + {0xe29490, 0x0083}, + {0xe29494, 0x0084}, + {0xe29498, 0x0085}, + {0xe2949c, 0x0086}, + {0xe294a4, 0x0087}, + {0xe294ac, 0x0088}, + {0xe294b4, 0x0089}, + {0xe294bc, 0x008a}, + {0xe29590, 0x00a0}, + {0xe29591, 0x00a1}, + {0xe29592, 0x00a2}, + {0xe29594, 0x00a5}, + {0xe29597, 0x00a8}, + {0xe29598, 0x00a9}, + {0xe29599, 0x00aa}, + {0xe2959a, 0x00ab}, + {0xe2959b, 0x00ac}, + {0xe2959d, 0x00ae}, + {0xe2959e, 0x00af}, + {0xe2959f, 0x00b0}, + {0xe295a0, 0x00b1}, + {0xe295a1, 0x00b2}, + {0xe295a3, 0x00b5}, + {0xe295a6, 0x00b8}, + {0xe295a7, 0x00b9}, + {0xe295a8, 0x00ba}, + {0xe295a9, 0x00bb}, + {0xe295aa, 0x00bc}, + {0xe295ac, 0x00be}, + {0xe29680, 0x008b}, + {0xe29684, 0x008c}, + {0xe29688, 0x008d}, + {0xe2968c, 0x008e}, + {0xe29690, 0x008f}, + {0xe29691, 0x0090}, + {0xe29692, 0x0091}, + {0xe29693, 0x0092}, + {0xe296a0, 0x0094} +}; diff -Nur ../cvs-pgsql/src/backend/utils/mb/wchar.c ./src/backend/utils/mb/wchar.c --- ../cvs-pgsql/src/backend/utils/mb/wchar.c 2009-01-30 10:35:27.000000000 +0200 +++ ./src/backend/utils/mb/wchar.c 2009-02-06 18:19:48.000000000 +0200 @@ -1373,6 +1373,7 @@ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 31; PG_WIN1254 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 32; PG_WIN1255 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 33; PG_WIN1257 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 34; PG_KOI8U */ {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* 34; PG_SJIS */ {0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* 35; PG_BIG5 */ {0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* 36; PG_GBK */ diff -Nur ../cvs-pgsql/src/include/mb/pg_wchar.h ./src/include/mb/pg_wchar.h --- ../cvs-pgsql/src/include/mb/pg_wchar.h 2009-01-30 10:35:28.000000000 +0200 +++ ./src/include/mb/pg_wchar.h 2009-02-06 19:25:34.000000000 +0200 @@ -202,6 +202,7 @@ PG_WIN1254, /* windows-1254 */ PG_WIN1255, /* windows-1255 */ PG_WIN1257, /* windows-1257 */ + PG_KOI8U, /* KOI8-U */ /* PG_ENCODING_BE_LAST points to the above entry */ /* followings are for client encoding only */ @@ -216,7 +217,7 @@ } pg_enc; -#define PG_ENCODING_BE_LAST PG_WIN1257 +#define PG_ENCODING_BE_LAST PG_KOI8U /* * Please use these tests before access to pg_encconv_tbl[] diff -Nur ../cvs-pgsql/src/port/chklocale.c ./src/port/chklocale.c --- ../cvs-pgsql/src/port/chklocale.c 2009-01-19 10:00:11.000000000 +0200 +++ ./src/port/chklocale.c 2009-02-06 18:20:56.000000000 +0200 @@ -123,6 +123,9 @@ {PG_KOI8R, "KOI8-R"}, {PG_KOI8R, "CP20866"}, + {PG_KOI8U, "KOI8-U"}, + {PG_KOI8U, "CP21866"}, + {PG_WIN866, "CP866"}, {PG_WIN874, "CP874"}, {PG_WIN1250, "CP1250"},
В списке pgsql-hackers по дате отправления: