Re: [GENERAL] trouble with to_char('L')

Поиск
Список
Период
Сортировка
От Bruce Momjian
Тема Re: [GENERAL] trouble with to_char('L')
Дата
Msg-id 201003021814.o22IE1s26092@momjian.us
обсуждение исходный текст
Ответ на Re: [GENERAL] trouble with to_char('L')  (Hiroshi Inoue <inoue@tpf.co.jp>)
Ответы Re: [GENERAL] trouble with to_char('L')  (Takahiro Itagaki <itagaki.takahiro@oss.ntt.co.jp>)
Список pgsql-hackers
Hiroshi Inoue wrote:
> >>>> I need someone with WIN32 experience to review and test this patch.
> >>> I don't understand why cache_locale_time() works on Windows.  It sets
> >>> the LC_CTYPE but does not do any encoding coversion.
> >> Doesn't strftime_win32 do the conversion?
> >
> > Oh, I now see strftime is redefined as a macro in that C files.  Thanks.
> >
> >>> Do month and
> >>> day-of-week names not work either, or do they work and the encoding
> >>> conversion for numeric/money, e.g. Euro, it not necessary?
> >> db_strdup does the conversion.
> >
> > Should we pull the encoding conversion into a separate function and have
> > strftime_win32() and db_strdup() both call it?
>
> We may be able to pull the conversion WideChars => UTF8 =>
> a PG encoding into an function.

OK, I have created a new function, win32_wchar_to_db_encoding(), to
share the conversion from wide characters to the database encoding.
New patch attached.

> BTW both PGLC_localeconv() and cache_locale_time() save the current
>   LC_CTYPE first and restore them just before returning the functions.
> I'm suspicious if it's OK when errors occur in middle of the functions.

Yea, I added a comment questioning if that is a problem.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  PG East:  http://www.enterprisedb.com/community/nav-pg-east-2010.do
Index: src/backend/utils/adt/pg_locale.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/pg_locale.c,v
retrieving revision 1.53
diff -c -c -r1.53 pg_locale.c
*** src/backend/utils/adt/pg_locale.c    27 Feb 2010 20:20:44 -0000    1.53
--- src/backend/utils/adt/pg_locale.c    2 Mar 2010 18:11:41 -0000
***************
*** 4,10 ****
   *
   * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group
   *
!  * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.53 2010/02/27 20:20:44 momjian Exp $
   *
   *-----------------------------------------------------------------------
   */
--- 4,10 ----
   *
   * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group
   *
!  * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.51 2010/01/02 16:57:54 momjian Exp $
   *
   *-----------------------------------------------------------------------
   */
***************
*** 96,101 ****
--- 96,109 ----
  static char *IsoLocaleName(const char *);        /* MSVC specific */
  #endif

+ #ifdef WIN32
+ static size_t win32_wchar_to_db_encoding(const wchar_t *wbuf,
+                                 const size_t wchars, char *dst, size_t dstlen);
+ static char *db_encoding_strdup(const char *item, const char *str);
+ static size_t strftime_win32(char *dst, size_t dstlen, const wchar_t *format,
+                              const struct tm *tm);
+ #endif
+

  /*
   * pg_perm_setlocale
***************
*** 387,392 ****
--- 395,488 ----
  }


+ #ifdef    WIN32
+ /*
+  *    Convert wide character string (UTF16 on Win32) to UTF8, and then
+  *    optionally to the db encoding.
+  */
+ static size_t win32_wchar_to_db_encoding(const wchar_t *wbuf,
+                                 const size_t wchars, char *dst, size_t dstlen)
+ {
+     int    db_encoding = GetDatabaseEncoding();
+     int    utf8len;
+
+     /* Convert wide string (UTF16) to UTF8 */
+     utf8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, wchars, dst, dstlen, NULL, NULL);
+     if (utf8len == 0)
+         /* Does this leave LC_CTYPE set incorrectly? */
+         elog(ERROR,
+             "could not convert string %04x to UTF-8: error %lu", wbuf[0], GetLastError());
+     pfree(wbuf);
+
+     dst[utf8len] = '\0';
+     if (db_encoding != PG_UTF8)
+     {
+         PG_TRY();
+         {
+             char *convstr = pg_do_encoding_conversion(dst, utf8len, PG_UTF8, db_encoding);
+             if (dst != convstr)
+             {
+                 strlcpy(dst, convstr, dstlen);
+                 pfree(convstr);
+             }
+         }
+         PG_CATCH();
+         {
+             FlushErrorState();
+             dst[0] = '\0';
+         }
+         PG_END_TRY();
+     }
+
+     return pg_mbstrlen(dst);
+ }
+
+ /*
+  *    This converts the LC_CTYPE-encoded string returned from the
+  *    locale routines to the database encoding.
+  */
+ static char *db_encoding_strdup(const char *item, const char *str)
+ {
+     int    db_encoding = GetDatabaseEncoding();
+     size_t    wchars, ilen, wclen, dstlen;
+     int    bytes_per_char;
+     wchar_t    *wbuf;
+     char    *dst;
+
+     if (!str[0])
+         return strdup(str);
+
+     /* allocate wide character string */
+     ilen = strlen(str) + 1;
+     wclen = ilen * sizeof(wchar_t);
+     wbuf = (wchar_t *) palloc(wclen);
+
+     /* Convert multi-byte string using current LC_CTYPE to a wide-character string */
+     wchars = mbstowcs(wbuf, str, ilen);
+     if (wchars == (size_t) -1)
+         elog(ERROR,
+             "could not convert string to wide characters: error %lu", GetLastError());
+
+     /* allocate target string */
+     bytes_per_char = pg_encoding_max_length(PG_UTF8);
+     if (pg_encoding_max_length(db_encoding) > bytes_per_char)
+         bytes_per_char = pg_encoding_max_length(db_encoding);
+     dstlen = wchars * bytes_per_char + 1;
+     if ((dst = malloc(dstlen)) == NULL)
+         elog(ERROR, "could not allocate a destination buffer");
+
+     /* Convert wide string (UTF16) to db encoding */
+     win32_wchar_to_db_encoding(wbuf, wchars, dst, dstlen);
+
+     return dst;
+ }
+ #else
+ static char *db_encoding_strdup(const char *item, const char *str)
+ {
+     return strdup(str);
+ }
+ #endif /* WIN32 */
+
  /*
   * Return the POSIX lconv struct (contains number/money formatting
   * information) with locale information for all categories.
***************
*** 398,403 ****
--- 494,502 ----
      struct lconv *extlconv;
      char       *save_lc_monetary;
      char       *save_lc_numeric;
+ #ifdef    WIN32
+     char       *save_lc_ctype = NULL;
+ #endif

      /* Did we do it already? */
      if (CurrentLocaleConvValid)
***************
*** 413,442 ****
      if (save_lc_numeric)
          save_lc_numeric = pstrdup(save_lc_numeric);

      setlocale(LC_MONETARY, locale_monetary);
      setlocale(LC_NUMERIC, locale_numeric);
!
!     /* Get formatting information */
      extlconv = localeconv();

      /*
!      * Must copy all values since restoring internal settings may overwrite
       * localeconv()'s results.
       */
      CurrentLocaleConv = *extlconv;
!     CurrentLocaleConv.currency_symbol = strdup(extlconv->currency_symbol);
!     CurrentLocaleConv.decimal_point = strdup(extlconv->decimal_point);
!     CurrentLocaleConv.grouping = strdup(extlconv->grouping);
!     CurrentLocaleConv.thousands_sep = strdup(extlconv->thousands_sep);
!     CurrentLocaleConv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
!     CurrentLocaleConv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
      CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
!     CurrentLocaleConv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
!     CurrentLocaleConv.negative_sign = strdup(extlconv->negative_sign);
!     CurrentLocaleConv.positive_sign = strdup(extlconv->positive_sign);
      CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn;

!     /* Try to restore internal settings */
      if (save_lc_monetary)
      {
          setlocale(LC_MONETARY, save_lc_monetary);
--- 512,588 ----
      if (save_lc_numeric)
          save_lc_numeric = pstrdup(save_lc_numeric);

+ #ifdef    WIN32
+     /*
+      *    Ideally, the db server encoding and locale settings would
+      *    always match.  Unfortunately, WIN32 does not support UTF-8
+      *    values for setlocale(), even though PostgreSQL runs fine with
+      *    a UTF-8 encoding on Windows:
+      *
+      *        http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
+      *
+      *    Therefore, we must set LC_CTYPE to match LC_NUMERIC and
+      *    LC_MONETARY, call localeconv(), and use mbstowcs() to
+      *    convert the locale-aware string, e.g. Euro symbol, which
+      *    is not in UTF-8 to the server encoding.
+      */
+
+     if ((save_lc_ctype = setlocale(LC_CTYPE, NULL)) != NULL)
+     {
+         save_lc_ctype = pstrdup(save_lc_ctype);
+         /* Set LC_CTYPE to match LC_MONETARY? */
+         if (pg_strcasecmp(save_lc_ctype, locale_monetary) != 0)
+             setlocale(LC_CTYPE, locale_monetary);
+     }
+ #endif
+
      setlocale(LC_MONETARY, locale_monetary);
      setlocale(LC_NUMERIC, locale_numeric);
!     /*
!      *    Get formatting information for LC_MONETARY, and LC_NUMERIC if they
!      *    are the same.
!      */
      extlconv = localeconv();

      /*
!      * Must copy all values since restoring internal settings might overwrite
       * localeconv()'s results.
       */
      CurrentLocaleConv = *extlconv;
!
!     /* The first argument of db_encoding_strdup() is only used on WIN32 */
!     CurrentLocaleConv.currency_symbol = db_encoding_strdup("currency_symbol", extlconv->currency_symbol);
!     CurrentLocaleConv.int_curr_symbol = db_encoding_strdup("int_curr_symbol", extlconv->int_curr_symbol);
!     CurrentLocaleConv.mon_decimal_point = db_encoding_strdup("mon_decimal_point", extlconv->mon_decimal_point);
      CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
!     CurrentLocaleConv.mon_thousands_sep = db_encoding_strdup("mon_thousands_sep", extlconv->mon_thousands_sep);
!     CurrentLocaleConv.negative_sign = db_encoding_strdup("negative_sign", extlconv->negative_sign);
!     CurrentLocaleConv.positive_sign = db_encoding_strdup("positive_sign", extlconv->positive_sign);
      CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn;

! #ifdef    WIN32
!     if (save_lc_ctype && pg_strcasecmp(locale_numeric, locale_monetary) != 0)
!     {
!         setlocale(LC_CTYPE, locale_numeric);
!         /* Get formatting information for LC_NUMERIC with matching LC_CTYPE */
!         extlconv = localeconv();
!     }
! #endif
!
!     CurrentLocaleConv.decimal_point = db_encoding_strdup("decimal_point", extlconv->decimal_point);
!     CurrentLocaleConv.grouping = strdup(extlconv->grouping);
!     CurrentLocaleConv.thousands_sep = db_encoding_strdup("thousands_sep", extlconv->thousands_sep);
!
!     /*
!      *    Restore internal settings
!      */
! #ifdef    WIN32
!     if (save_lc_ctype)
!     {
!         setlocale(LC_CTYPE, save_lc_ctype);
!         pfree(save_lc_ctype);
!     }
! #endif
      if (save_lc_monetary)
      {
          setlocale(LC_MONETARY, save_lc_monetary);
***************
*** 455,483 ****

  #ifdef WIN32
  /*
!  * On win32, strftime() returns the encoding in CP_ACP, which is likely
!  * different from SERVER_ENCODING. This is especially important in Japanese
!  * versions of Windows which will use SJIS encoding, which we don't support
!  * as a server encoding.
!  *
!  * Replace strftime() with a version that gets the string in UTF16 and then
!  * converts it to the appropriate encoding as necessary.
   *
   * Note that this only affects the calls to strftime() in this file, which are
   * used to get the locale-aware strings. Other parts of the backend use
   * pg_strftime(), which isn't locale-aware and does not need to be replaced.
   */
  static size_t
! strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm * tm)
  {
!     size_t        len;
      wchar_t        wbuf[MAX_L10N_DATA];
-     int            encoding;

!     encoding = GetDatabaseEncoding();
!
!     len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
!     if (len == 0)

          /*
           * strftime call failed - return 0 with the contents of dst
--- 601,628 ----

  #ifdef WIN32
  /*
!  * On WIN32, strftime() returns the encoding in CP_ACP (the default
!  * operating system codpage for that computer), which is likely different
!  * from SERVER_ENCODING.  This is especially important in Japanese versions
!  * of Windows which will use SJIS encoding, which we don't support as a
!  * server encoding.
!  *
!  * So, instead of using strftime(), use wcsftime() to return the value in
!  * wide characters (internally UTF16) and then convert it to the appropriate
!  * database encoding.
   *
   * Note that this only affects the calls to strftime() in this file, which are
   * used to get the locale-aware strings. Other parts of the backend use
   * pg_strftime(), which isn't locale-aware and does not need to be replaced.
   */
  static size_t
! strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm *tm)
  {
!     size_t        wchars;
      wchar_t        wbuf[MAX_L10N_DATA];

!     wchars = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
!     if (wchars == 0)

          /*
           * strftime call failed - return 0 with the contents of dst
***************
*** 485,511 ****
           */
          return 0;

!     len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
!     if (len == 0)
!         elog(ERROR,
!              "could not convert string to UTF-8:error %lu", GetLastError());
!
!     dst[len] = '\0';
!     if (encoding != PG_UTF8)
!     {
!         char       *convstr = pg_do_encoding_conversion(dst, len, PG_UTF8, encoding);
!
!         if (dst != convstr)
!         {
!             strlcpy(dst, convstr, dstlen);
!             len = strlen(dst);
!         }
!     }
!
!     return len;
  }

  #define strftime(a,b,c,d) strftime_win32(a,b,L##c,d)
  #endif   /* WIN32 */


--- 630,641 ----
           */
          return 0;

!     return win32_wchar_to_db_encoding(wbuf, wchars, dst, dstlen);
  }

+ /* redefine strftime() */
  #define strftime(a,b,c,d) strftime_win32(a,b,L##c,d)
+
  #endif   /* WIN32 */


***************
*** 533,542 ****
      elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);

  #ifdef WIN32
!     /* set user's value of ctype locale */
      save_lc_ctype = setlocale(LC_CTYPE, NULL);
      if (save_lc_ctype)
          save_lc_ctype = pstrdup(save_lc_ctype);

      setlocale(LC_CTYPE, locale_time);
  #endif
--- 663,674 ----
      elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);

  #ifdef WIN32
!     /* See the WIN32 comment near the top of PGLC_localeconv() */
      save_lc_ctype = setlocale(LC_CTYPE, NULL);
      if (save_lc_ctype)
          save_lc_ctype = pstrdup(save_lc_ctype);
+     else
+         save_lc_ctype = pstrdup("");

      setlocale(LC_CTYPE, locale_time);
  #endif

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Andrew Dunstan
Дата:
Сообщение: Re: USE_LIBXSLT in MSVC builds
Следующее
От: Bruce Momjian
Дата:
Сообщение: Re: Re: Hot Standby query cancellation and Streaming Replication integration