upper()/lower() truncates the result under Japanese Windows

Поиск
Список
Период
Сортировка
От Hiroshi Inoue
Тема upper()/lower() truncates the result under Japanese Windows
Дата
Msg-id 4944DE4A.8050001@tpf.co.jp
обсуждение исходный текст
Ответы Re: upper()/lower() truncates the result under Japanese Windows  (Tom Lane <tgl@sss.pgh.pa.us>)
Список pgsql-hackers
Hi,

Upper(), lower() or initcap() function truncates the result
under Japanese Windows with e.g. the server encoding=UTF-8
and the LC_CTYPE setting Japanese_japan.932 .

Below is an example.

$ psql
psql (8.4devel)
Type "help" for help.

inoue=# \encoding sjis

inoue=# show server_encoding;
  server_encoding
-----------------
  UTF8
(1 行)

inoue=# show LC_CTYPE;
       lc_ctype
--------------------
  Japanese_Japan.932
(1 行)

inoue=# \set jpnstr '''カタカナ'''
inoue=# select char_length(:jpnstr);
  char_length
-------------
            4
(1 行)

inoue=# select upper(:jpnstr);
  upper
--------
  カタカ
(1 行)

inoue=# select char_length(upper(:jpnstr));
  char_length
-------------
            3
(1 行)

The output of the last command should be 4 not 3.
Attached is a patch to fix the bug.
After applying the patch the result is

inoue=# select upper(:jpnstr);
   upper
----------
  カタカナ
(1 行)

inoue=# select char_length(upper(:jpnstr));
  char_length
-------------
            4
(1 行)

regards,
Hiroshi Inoue


Index: formatting.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.151
diff -c -c -r1.151 formatting.c
*** formatting.c    1 Dec 2008 17:11:18 -0000    1.151
--- formatting.c    14 Dec 2008 09:09:00 -0000
***************
*** 1462,1467 ****
--- 1462,1468 ----
      {
          wchar_t        *workspace;
          int            curr_char = 0;
+         size_t        max_len, alloc_size;

          /* Output workspace cannot have more codes than input bytes */
          workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
***************
*** 1472,1480 ****
              workspace[curr_char] = towlower(workspace[curr_char]);

          /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);

!         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
          pfree(workspace);
      }
      else
--- 1473,1489 ----
              workspace[curr_char] = towlower(workspace[curr_char]);

          /* Make result large enough; case change might change number of bytes */
! #ifdef    WIN32
!         max_len = pg_database_encoding_max_length();
!         if (MB_CUR_MAX > max_len)
!             max_len = MB_CUR_MAX;
! #else
!         max_len = MB_CUR_MAX;
! #endif
!         alloc_size = curr_char * max_len + 1;
!         result = palloc(alloc_size);

!         wchar2char(result, workspace, alloc_size);
          pfree(workspace);
      }
      else
***************
*** 1510,1515 ****
--- 1519,1525 ----
      {
          wchar_t        *workspace;
          int            curr_char = 0;
+         size_t        max_len, alloc_size;

          /* Output workspace cannot have more codes than input bytes */
          workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
***************
*** 1520,1528 ****
              workspace[curr_char] = towupper(workspace[curr_char]);

          /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);

!         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
          pfree(workspace);
      }
      else
--- 1530,1546 ----
              workspace[curr_char] = towupper(workspace[curr_char]);

          /* Make result large enough; case change might change number of bytes */
! #ifdef    WIN32
!         max_len = pg_database_encoding_max_length();
!         if (MB_CUR_MAX > max_len)
!             max_len = MB_CUR_MAX;
! #else
!         max_len = MB_CUR_MAX;
! #endif
!         alloc_size = curr_char * max_len + 1;
!         result = palloc(alloc_size);

!         wchar2char(result, workspace, alloc_size);
          pfree(workspace);
      }
      else
***************
*** 1559,1564 ****
--- 1577,1583 ----
      {
          wchar_t        *workspace;
          int            curr_char = 0;
+         size_t        max_len, alloc_size;

          /* Output workspace cannot have more codes than input bytes */
          workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
***************
*** 1575,1583 ****
          }

          /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);

!         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
          pfree(workspace);
      }
      else
--- 1594,1610 ----
          }

          /* Make result large enough; case change might change number of bytes */
! #ifdef    WIN32
!         max_len = pg_database_encoding_max_length();
!         if (MB_CUR_MAX > max_len)
!             max_len = MB_CUR_MAX;
! #else
!         max_len = MB_CUR_MAX;
! #endif
!         alloc_size = curr_char * max_len + 1;
!         result = palloc(alloc_size);

!         wchar2char(result, workspace, alloc_size);
          pfree(workspace);
      }
      else

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Emmanuel Cecchet
Дата:
Сообщение: Re: Sync Rep: First Thoughts on Code
Следующее
От: Jan Urbański
Дата:
Сообщение: Re: Stats target increase vs compute_tsvector_stats()