Re: TM format can mix encodings in to_char()

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: TM format can mix encodings in to_char()
Дата
Msg-id 8523.1555785972@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: TM format can mix encodings in to_char()  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: TM format can mix encodings in to_char()  (Tom Lane <tgl@sss.pgh.pa.us>)
Список pgsql-hackers
I wrote:
> This is a little bit off, now that I look at it, because it's
> failing to account for the possibility of getting -1 from
> pg_get_encoding_from_locale.  It should probably do what
> pg_bind_textdomain_codeset does:
>     if (encoding < 0)
>         encoding = PG_SQL_ASCII;

Actually, the more I looked at the existing code, the less happy I got
with its error handling.  cache_locale_time() is an absolute failure
in that respect, because it makes no attempt at all to avoid throwing
errors while LC_TIME or LC_CTYPE is set to a transient value.  We
could maybe tolerate LC_TIME being weird, but continuing with a value
of LC_CTYPE that doesn't match the database setting would almost
certainly be disastrous.

PGLC_localeconv had at least thought about the issue, but it ends up
wimping out:

        /*
         * Report it if we failed to restore anything.  Perhaps this should be
         * FATAL, rather than continuing with bad locale settings?
         */
        if (trouble)
            elog(WARNING, "failed to restore old locale");

And it's also oddly willing to keep going even if it couldn't get the
original setlocale settings to begin with.

I think that this code was written with only LC_MONETARY and LC_NUMERIC
in mind, for which there's at least some small argument that continuing
with unwanted values wouldn't be fatal (though IIRC, LC_NUMERIC would
still change the behavior of float8out).  Since we added code to also
change LC_CTYPE on Windows, I think that continuing on after a restore
failure would be disastrous.  And we've not heard any field reports
of this warning anyway, so there's no reason to think we have to support
the case in practice.

Hence, the attached revised patch changes the code to do elog(FATAL)
if it can't restore any locale settings to their previous values,
and it fixes cache_locale_time to not do anything risky while it's
got transient locale settings in place.

I propose to apply and back-patch this; the code's basically the
same in all supported branches.

            regards, tom lane

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 6e33d65..e607a39 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -490,7 +490,6 @@ PGLC_localeconv(void)
     static bool CurrentLocaleConvAllocated = false;
     struct lconv *extlconv;
     struct lconv worklconv;
-    bool        trouble = false;
     char       *save_lc_monetary;
     char       *save_lc_numeric;
 #ifdef WIN32
@@ -522,14 +521,16 @@ PGLC_localeconv(void)
      */
     memset(&worklconv, 0, sizeof(worklconv));

-    /* Save user's values of monetary and numeric locales */
+    /* Save prevailing values of monetary and numeric locales */
     save_lc_monetary = setlocale(LC_MONETARY, NULL);
-    if (save_lc_monetary)
-        save_lc_monetary = pstrdup(save_lc_monetary);
+    if (!save_lc_monetary)
+        elog(ERROR, "setlocale(NULL) failed");
+    save_lc_monetary = pstrdup(save_lc_monetary);

     save_lc_numeric = setlocale(LC_NUMERIC, NULL);
-    if (save_lc_numeric)
-        save_lc_numeric = pstrdup(save_lc_numeric);
+    if (!save_lc_numeric)
+        elog(ERROR, "setlocale(NULL) failed");
+    save_lc_numeric = pstrdup(save_lc_numeric);

 #ifdef WIN32

@@ -554,10 +555,11 @@ PGLC_localeconv(void)
      * UTF16 and convert from that.
      */

-    /* save user's value of ctype locale */
+    /* Save prevailing value of ctype locale */
     save_lc_ctype = setlocale(LC_CTYPE, NULL);
-    if (save_lc_ctype)
-        save_lc_ctype = pstrdup(save_lc_ctype);
+    if (!save_lc_ctype)
+        elog(ERROR, "setlocale(NULL) failed");
+    save_lc_ctype = pstrdup(save_lc_ctype);

     /* Here begins the critical section where we must not throw error */

@@ -601,27 +603,22 @@ PGLC_localeconv(void)
     worklconv.p_sign_posn = extlconv->p_sign_posn;
     worklconv.n_sign_posn = extlconv->n_sign_posn;

-    /* Try to restore internal settings */
-    if (save_lc_monetary)
-    {
-        if (!setlocale(LC_MONETARY, save_lc_monetary))
-            trouble = true;
-    }
-
-    if (save_lc_numeric)
-    {
-        if (!setlocale(LC_NUMERIC, save_lc_numeric))
-            trouble = true;
-    }
-
+    /*
+     * Restore the prevailing locale settings; failure to do so is fatal.
+     * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
+     * but proceeding with the wrong value of LC_CTYPE would certainly be bad
+     * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
+     * are almost certainly "C", there's really no reason that restoring those
+     * should fail.
+     */
 #ifdef WIN32
-    /* Try to restore internal ctype settings */
-    if (save_lc_ctype)
-    {
-        if (!setlocale(LC_CTYPE, save_lc_ctype))
-            trouble = true;
-    }
+    if (!setlocale(LC_CTYPE, save_lc_ctype))
+        elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
 #endif
+    if (!setlocale(LC_MONETARY, save_lc_monetary))
+        elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
+    if (!setlocale(LC_NUMERIC, save_lc_numeric))
+        elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);

     /*
      * At this point we've done our best to clean up, and can call functions
@@ -632,21 +629,11 @@ PGLC_localeconv(void)
     {
         int            encoding;

-        /*
-         * Report it if we failed to restore anything.  Perhaps this should be
-         * FATAL, rather than continuing with bad locale settings?
-         */
-        if (trouble)
-            elog(WARNING, "failed to restore old locale");
-
         /* Release the pstrdup'd locale names */
-        if (save_lc_monetary)
-            pfree(save_lc_monetary);
-        if (save_lc_numeric)
-            pfree(save_lc_numeric);
+        pfree(save_lc_monetary);
+        pfree(save_lc_numeric);
 #ifdef WIN32
-        if (save_lc_ctype)
-            pfree(save_lc_ctype);
+        pfree(save_lc_ctype);
 #endif

         /* If any of the preceding strdup calls failed, complain now. */
@@ -657,15 +644,22 @@ PGLC_localeconv(void)

         /*
          * Now we must perform encoding conversion from whatever's associated
-         * with the locale into the database encoding.
+         * with the locales into the database encoding.  If we can't identify
+         * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
+         * use PG_SQL_ASCII, which will result in just validating that the
+         * strings are OK in the database encoding.
          */
         encoding = pg_get_encoding_from_locale(locale_numeric, true);
+        if (encoding < 0)
+            encoding = PG_SQL_ASCII;

         db_encoding_convert(encoding, &worklconv.decimal_point);
         db_encoding_convert(encoding, &worklconv.thousands_sep);
         /* grouping is not text and does not require conversion */

         encoding = pg_get_encoding_from_locale(locale_monetary, true);
+        if (encoding < 0)
+            encoding = PG_SQL_ASCII;

         db_encoding_convert(encoding, &worklconv.int_curr_symbol);
         db_encoding_convert(encoding, &worklconv.currency_symbol);
@@ -758,29 +752,29 @@ strftime_win32(char *dst, size_t dstlen,
 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
 #endif                            /* WIN32 */

-/* Subroutine for cache_locale_time(). */
+/*
+ * Subroutine for cache_locale_time().
+ * Convert the given string from encoding "encoding" to the database
+ * encoding, and store the result at *dst, replacing any previous value.
+ */
 static void
-cache_single_time(char **dst, const char *format, const struct tm *tm)
+cache_single_string(char **dst, const char *src, int encoding)
 {
-    char        buf[MAX_L10N_DATA];
     char       *ptr;
+    char       *olddst;

-    /*
-     * MAX_L10N_DATA is sufficient buffer space for every known locale, and
-     * POSIX defines no strftime() errors.  (Buffer space exhaustion is not an
-     * error.)    An implementation might report errors (e.g. ENOMEM) by
-     * returning 0 (or, less plausibly, a negative value) and setting errno.
-     * Report errno just in case the implementation did that, but clear it in
-     * advance of the call so we don't emit a stale, unrelated errno.
-     */
-    errno = 0;
-    if (strftime(buf, MAX_L10N_DATA, format, tm) <= 0)
-        elog(ERROR, "strftime(%s) failed: %m", format);
+    /* Convert the string to the database encoding, or validate it's OK */
+    ptr = pg_any_to_server(src, strlen(src), encoding);

-    ptr = MemoryContextStrdup(TopMemoryContext, buf);
-    if (*dst)
-        pfree(*dst);
-    *dst = ptr;
+    /* Store the string in long-lived storage, replacing any previous value */
+    olddst = *dst;
+    *dst = MemoryContextStrdup(TopMemoryContext, ptr);
+    if (olddst)
+        pfree(olddst);
+
+    /* Might as well clean up any palloc'd conversion result, too */
+    if (ptr != src)
+        pfree(ptr);
 }

 /*
@@ -789,11 +783,14 @@ cache_single_time(char **dst, const char *format, const struct tm *tm)
 void
 cache_locale_time(void)
 {
-    char       *save_lc_time;
+    char        buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
+    char       *bufptr;
     time_t        timenow;
     struct tm  *timeinfo;
+    bool        strftimefail = false;
+    int            encoding;
     int            i;
-
+    char       *save_lc_time;
 #ifdef WIN32
     char       *save_lc_ctype;
 #endif
@@ -804,10 +801,18 @@ cache_locale_time(void)

     elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);

-    /* save user's value of time locale */
+    /*
+     * As in PGLC_localeconv(), it's critical that we not throw error while
+     * libc's locale settings have nondefault values.  Hence, we just call
+     * strftime() within the critical section, and then convert and save its
+     * results afterwards.
+     */
+
+    /* Save prevailing value of time locale */
     save_lc_time = setlocale(LC_TIME, NULL);
-    if (save_lc_time)
-        save_lc_time = pstrdup(save_lc_time);
+    if (!save_lc_time)
+        elog(ERROR, "setlocale(NULL) failed");
+    save_lc_time = pstrdup(save_lc_time);

 #ifdef WIN32

@@ -820,10 +825,11 @@ cache_locale_time(void)
      * PGLC_localeconv().
      */

-    /* save user's value of ctype locale */
+    /* Save prevailing value of ctype locale */
     save_lc_ctype = setlocale(LC_CTYPE, NULL);
-    if (save_lc_ctype)
-        save_lc_ctype = pstrdup(save_lc_ctype);
+    if (!save_lc_ctype)
+        elog(ERROR, "setlocale(NULL) failed");
+    save_lc_ctype = pstrdup(save_lc_ctype);

     /* use lc_time to set the ctype */
     setlocale(LC_CTYPE, locale_time);
@@ -831,15 +837,33 @@ cache_locale_time(void)

     setlocale(LC_TIME, locale_time);

+    /* We use times close to current time as data for strftime(). */
     timenow = time(NULL);
     timeinfo = localtime(&timenow);

+    /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
+    bufptr = buf;
+
+    /*
+     * MAX_L10N_DATA is sufficient buffer space for every known locale, and
+     * POSIX defines no strftime() errors.  (Buffer space exhaustion is not an
+     * error.)    An implementation might report errors (e.g. ENOMEM) by
+     * returning 0 (or, less plausibly, a negative value) and setting errno.
+     * Report errno just in case the implementation did that, but clear it in
+     * advance of the calls so we don't emit a stale, unrelated errno.
+     */
+    errno = 0;
+
     /* localized days */
     for (i = 0; i < 7; i++)
     {
         timeinfo->tm_wday = i;
-        cache_single_time(&localized_abbrev_days[i], "%a", timeinfo);
-        cache_single_time(&localized_full_days[i], "%A", timeinfo);
+        if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
+            strftimefail = true;
+        bufptr += MAX_L10N_DATA;
+        if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
+            strftimefail = true;
+        bufptr += MAX_L10N_DATA;
     }

     /* localized months */
@@ -847,27 +871,66 @@ cache_locale_time(void)
     {
         timeinfo->tm_mon = i;
         timeinfo->tm_mday = 1;    /* make sure we don't have invalid date */
-        cache_single_time(&localized_abbrev_months[i], "%b", timeinfo);
-        cache_single_time(&localized_full_months[i], "%B", timeinfo);
+        if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
+            strftimefail = true;
+        bufptr += MAX_L10N_DATA;
+        if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
+            strftimefail = true;
+        bufptr += MAX_L10N_DATA;
     }

-    /* try to restore internal settings */
-    if (save_lc_time)
+    /*
+     * Restore the prevailing locale settings; as in PGLC_localeconv(),
+     * failure to do so is fatal.
+     */
+#ifdef WIN32
+    if (!setlocale(LC_CTYPE, save_lc_ctype))
+        elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
+#endif
+    if (!setlocale(LC_TIME, save_lc_time))
+        elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
+
+    /*
+     * At this point we've done our best to clean up, and can throw errors, or
+     * call functions that might throw errors, with a clean conscience.
+     */
+    if (strftimefail)
+        elog(ERROR, "strftime() failed: %m");
+
+    /* Release the pstrdup'd locale names */
+    pfree(save_lc_time);
+#ifdef WIN32
+    pfree(save_lc_ctype);
+#endif
+
+    /*
+     * As in PGLC_localeconv(), we must convert the strings from the encoding
+     * implied by LC_TIME to the database encoding.  If we can't identify the
+     * LC_TIME encoding, just perform encoding validation.
+     */
+    encoding = pg_get_encoding_from_locale(locale_time, true);
+    if (encoding < 0)
+        encoding = PG_SQL_ASCII;
+
+    bufptr = buf;
+
+    /* localized days */
+    for (i = 0; i < 7; i++)
     {
-        if (!setlocale(LC_TIME, save_lc_time))
-            elog(WARNING, "failed to restore old locale");
-        pfree(save_lc_time);
+        cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
+        bufptr += MAX_L10N_DATA;
+        cache_single_string(&localized_full_days[i], bufptr, encoding);
+        bufptr += MAX_L10N_DATA;
     }

-#ifdef WIN32
-    /* try to restore internal ctype settings */
-    if (save_lc_ctype)
+    /* localized months */
+    for (i = 0; i < 12; i++)
     {
-        if (!setlocale(LC_CTYPE, save_lc_ctype))
-            elog(WARNING, "failed to restore old locale");
-        pfree(save_lc_ctype);
+        cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
+        bufptr += MAX_L10N_DATA;
+        cache_single_string(&localized_full_months[i], bufptr, encoding);
+        bufptr += MAX_L10N_DATA;
     }
-#endif

     CurrentLCTimeValid = true;
 }
diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out
index 15b3222..619688f 100644
--- a/src/test/regress/expected/collate.linux.utf8.out
+++ b/src/test/regress/expected/collate.linux.utf8.out
@@ -430,6 +430,18 @@ SELECT relname FROM pg_class WHERE relname ~* '^abc';

 -- to_char
 SET lc_time TO 'tr_TR';
+SELECT to_char(date '2010-02-01', 'DD TMMON YYYY');
+   to_char
+-------------
+ 01 ŞUB 2010
+(1 row)
+
+SELECT to_char(date '2010-02-01', 'DD TMMON YYYY' COLLATE "tr_TR");
+   to_char
+-------------
+ 01 ŞUB 2010
+(1 row)
+
 SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
    to_char
 -------------
diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql
index 4ca02b8..c009fd2 100644
--- a/src/test/regress/sql/collate.linux.utf8.sql
+++ b/src/test/regress/sql/collate.linux.utf8.sql
@@ -169,6 +169,8 @@ SELECT relname FROM pg_class WHERE relname ~* '^abc';
 -- to_char

 SET lc_time TO 'tr_TR';
+SELECT to_char(date '2010-02-01', 'DD TMMON YYYY');
+SELECT to_char(date '2010-02-01', 'DD TMMON YYYY' COLLATE "tr_TR");
 SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
 SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Alexander Korotkov
Дата:
Сообщение: Re: jsonpath
Следующее
От: Robert Haas
Дата:
Сообщение: Re: block-level incremental backup