Re: [HACKERS] Cutting initdb's runtime (Perl question embedded)

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: [HACKERS] Cutting initdb's runtime (Perl question embedded)
Дата
Msg-id 25428.1533925885@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: [HACKERS] Cutting initdb's runtime (Perl question embedded)  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: [HACKERS] Cutting initdb's runtime (Perl question embedded)
Список pgsql-hackers
I wrote:
>> ... I'll
>> take a look at whipping up something that checks /etc/localtime.

> Here's a draft patch.  It seems to do what I expect on a couple of
> different macOS releases as well as recent Fedora.

The cfbot points out that this has suffered bit-rot, so here's a rebased
version --- no substantive changes.

            regards, tom lane

diff --git a/src/bin/initdb/findtimezone.c b/src/bin/initdb/findtimezone.c
index 4c3a91a..6901188 100644
*** a/src/bin/initdb/findtimezone.c
--- b/src/bin/initdb/findtimezone.c
***************
*** 15,20 ****
--- 15,21 ----
  #include <fcntl.h>
  #include <sys/stat.h>
  #include <time.h>
+ #include <unistd.h>

  #include "pgtz.h"

*************** pg_load_tz(const char *name)
*** 126,137 ****
   * On most systems, we rely on trying to match the observable behavior of
   * the C library's localtime() function.  The database zone that matches
   * furthest into the past is the one to use.  Often there will be several
!  * zones with identical rankings (since the Olson database assigns multiple
   * names to many zones).  We break ties arbitrarily by preferring shorter,
   * then alphabetically earlier zone names.
   *
   * Win32's native knowledge about timezones appears to be too incomplete
!  * and too different from the Olson database for the above matching strategy
   * to be of any use. But there is just a limited number of timezones
   * available, so we can rely on a handmade mapping table instead.
   */
--- 127,145 ----
   * On most systems, we rely on trying to match the observable behavior of
   * the C library's localtime() function.  The database zone that matches
   * furthest into the past is the one to use.  Often there will be several
!  * zones with identical rankings (since the IANA database assigns multiple
   * names to many zones).  We break ties arbitrarily by preferring shorter,
   * then alphabetically earlier zone names.
   *
+  * Many modern systems use the IANA database, so if we can determine the
+  * system's idea of which zone it is using and its behavior matches our zone
+  * of the same name, we can skip the rather-expensive search through all the
+  * zones in our database.  This short-circuit path also ensures that we spell
+  * the zone name the same way the system setting does, even in the presence
+  * of multiple aliases for the same zone.
+  *
   * Win32's native knowledge about timezones appears to be too incomplete
!  * and too different from the IANA database for the above matching strategy
   * to be of any use. But there is just a limited number of timezones
   * available, so we can rely on a handmade mapping table instead.
   */
*************** struct tztry
*** 150,155 ****
--- 158,165 ----
      time_t        test_times[MAX_TEST_TIMES];
  };

+ static bool check_system_link_file(const char *linkname, struct tztry *tt,
+                        char *bestzonename);
  static void scan_available_timezones(char *tzdir, char *tzdirsub,
                           struct tztry *tt,
                           int *bestscore, char *bestzonename);
*************** score_timezone(const char *tzname, struc
*** 299,310 ****
      return i;
  }


  /*
   * Try to identify a timezone name (in our terminology) that best matches the
!  * observed behavior of the system timezone library.  We cannot assume that
!  * the system TZ environment setting (if indeed there is one) matches our
!  * terminology, so we ignore it and just look at what localtime() returns.
   */
  static const char *
  identify_system_timezone(void)
--- 309,327 ----
      return i;
  }

+ /*
+  * Test whether given zone name is a perfect match to localtime() behavior
+  */
+ static bool
+ perfect_timezone_match(const char *tzname, struct tztry *tt)
+ {
+     return (score_timezone(tzname, tt) == tt->n_test_times);
+ }
+

  /*
   * Try to identify a timezone name (in our terminology) that best matches the
!  * observed behavior of the system localtime() function.
   */
  static const char *
  identify_system_timezone(void)
*************** identify_system_timezone(void)
*** 339,345 ****
       * way of doing things, but experience has shown that system-supplied
       * timezone definitions are likely to have DST behavior that is right for
       * the recent past and not so accurate further back. Scoring in this way
!      * allows us to recognize zones that have some commonality with the Olson
       * database, without insisting on exact match. (Note: we probe Thursdays,
       * not Sundays, to avoid triggering DST-transition bugs in localtime
       * itself.)
--- 356,362 ----
       * way of doing things, but experience has shown that system-supplied
       * timezone definitions are likely to have DST behavior that is right for
       * the recent past and not so accurate further back. Scoring in this way
!      * allows us to recognize zones that have some commonality with the IANA
       * database, without insisting on exact match. (Note: we probe Thursdays,
       * not Sundays, to avoid triggering DST-transition bugs in localtime
       * itself.)
*************** identify_system_timezone(void)
*** 374,380 ****
          tt.test_times[tt.n_test_times++] = t;
      }

!     /* Search for the best-matching timezone file */
      strlcpy(tmptzdir, pg_TZDIR(), sizeof(tmptzdir));
      bestscore = -1;
      resultbuf[0] = '\0';
--- 391,408 ----
          tt.test_times[tt.n_test_times++] = t;
      }

!     /*
!      * Try to avoid the brute-force search by seeing if we can recognize the
!      * system's timezone setting directly.
!      *
!      * Currently we just check /etc/localtime; there are other conventions for
!      * this, but that seems to be the only one used on enough platforms to be
!      * worth troubling over.
!      */
!     if (check_system_link_file("/etc/localtime", &tt, resultbuf))
!         return resultbuf;
!
!     /* No luck, so search for the best-matching timezone file */
      strlcpy(tmptzdir, pg_TZDIR(), sizeof(tmptzdir));
      bestscore = -1;
      resultbuf[0] = '\0';
*************** identify_system_timezone(void)
*** 383,389 ****
                               &bestscore, resultbuf);
      if (bestscore > 0)
      {
!         /* Ignore Olson's rather silly "Factory" zone; use GMT instead */
          if (strcmp(resultbuf, "Factory") == 0)
              return NULL;
          return resultbuf;
--- 411,417 ----
                               &bestscore, resultbuf);
      if (bestscore > 0)
      {
!         /* Ignore IANA's rather silly "Factory" zone; use GMT instead */
          if (strcmp(resultbuf, "Factory") == 0)
              return NULL;
          return resultbuf;
*************** identify_system_timezone(void)
*** 472,478 ****

      /*
       * Did not find the timezone.  Fallback to use a GMT zone.  Note that the
!      * Olson timezone database names the GMT-offset zones in POSIX style: plus
       * is west of Greenwich.  It's unfortunate that this is opposite of SQL
       * conventions.  Should we therefore change the names? Probably not...
       */
--- 500,506 ----

      /*
       * Did not find the timezone.  Fallback to use a GMT zone.  Note that the
!      * IANA timezone database names the GMT-offset zones in POSIX style: plus
       * is west of Greenwich.  It's unfortunate that this is opposite of SQL
       * conventions.  Should we therefore change the names? Probably not...
       */
*************** identify_system_timezone(void)
*** 487,492 ****
--- 515,608 ----
  }

  /*
+  * Examine a system-provided symlink file to see if it tells us the timezone.
+  *
+  * Unfortunately, there is little standardization of how the system default
+  * timezone is determined in the absence of a TZ environment setting.
+  * But a common strategy is to create a symlink at a well-known place.
+  * If "linkname" identifies a readable symlink, and the tail of its contents
+  * matches a zone name we know, and the actual behavior of localtime() agrees
+  * with what we think that zone means, then we may use that zone name.
+  *
+  * We insist on a perfect behavioral match, which might not happen if the
+  * system has a different IANA database version than we do; but in that case
+  * it seems best to fall back to the brute-force search.
+  *
+  * linkname is the symlink file location to probe.
+  *
+  * tt tells about the system timezone behavior we need to match.
+  *
+  * If we successfully identify a zone name, store it in *bestzonename and
+  * return true; else return false.  bestzonename must be a buffer of length
+  * TZ_STRLEN_MAX + 1.
+  */
+ static bool
+ check_system_link_file(const char *linkname, struct tztry *tt,
+                        char *bestzonename)
+ {
+ #ifdef HAVE_READLINK
+     char        link_target[MAXPGPATH];
+     int            len;
+     const char *cur_name;
+
+     /*
+      * Try to read the symlink.  If not there, not a symlink, etc etc, just
+      * quietly fail; the precise reason needn't concern us.
+      */
+     len = readlink(linkname, link_target, sizeof(link_target));
+     if (len < 0 || len >= sizeof(link_target))
+         return false;
+     link_target[len] = '\0';
+
+ #ifdef DEBUG_IDENTIFY_TIMEZONE
+     fprintf(stderr, "symbolic link \"%s\" contains \"%s\"\n",
+             linkname, link_target);
+ #endif
+
+     /*
+      * The symlink is probably of the form "/path/to/zones/zone/name", or
+      * possibly it is a relative path.  Nobody puts their zone DB directly in
+      * the root directory, so we can definitely skip the first component; but
+      * after that it's trial-and-error to identify which path component begins
+      * the zone name.
+      */
+     cur_name = link_target;
+     while (*cur_name)
+     {
+         /* Advance to next segment of path */
+         cur_name = strchr(cur_name + 1, '/');
+         if (cur_name == NULL)
+             break;
+         /* If there are consecutive slashes, skip all, as the kernel would */
+         do
+         {
+             cur_name++;
+         } while (*cur_name == '/');
+
+         /*
+          * Test remainder of path to see if it is a matching zone name.
+          * Relative paths might contain ".."; we needn't bother testing if the
+          * first component is that.  Also defend against overlength names.
+          */
+         if (*cur_name && *cur_name != '.' &&
+             strlen(cur_name) <= TZ_STRLEN_MAX &&
+             perfect_timezone_match(cur_name, tt))
+         {
+             /* Success! */
+             strcpy(bestzonename, cur_name);
+             return true;
+         }
+     }
+
+     /* Couldn't extract a matching zone name */
+     return false;
+ #else
+     /* No symlinks?  Forget it */
+     return false;
+ #endif
+ }
+
+ /*
   * Recursively scan the timezone database looking for the best match to
   * the system timezone behavior.
   *
*************** static const struct
*** 586,592 ****
       * HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time
       * Zones on Windows 10 and Windows 7.
       *
!      * The zones have been matched to Olson timezones by looking at the cities
       * listed in the win32 display name (in the comment here) in most cases.
       */
      {
--- 702,708 ----
       * HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time
       * Zones on Windows 10 and Windows 7.
       *
!      * The zones have been matched to IANA timezones by looking at the cities
       * listed in the win32 display name (in the comment here) in most cases.
       */
      {
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 32746c7..cb8c745 100644
*** a/src/bin/initdb/initdb.c
--- b/src/bin/initdb/initdb.c
*************** static char *pgdata_native;
*** 174,179 ****
--- 174,180 ----
  static int    n_connections = 10;
  static int    n_buffers = 50;
  static const char *dynamic_shared_memory_type = NULL;
+ static const char *default_timezone = NULL;

  /*
   * Warning messages for authentication methods
*************** test_config_settings(void)
*** 1058,1063 ****
--- 1059,1069 ----
          printf("%dMB\n", (n_buffers * (BLCKSZ / 1024)) / 1024);
      else
          printf("%dkB\n", n_buffers * (BLCKSZ / 1024));
+
+     printf(_("selecting default timezone ... "));
+     fflush(stdout);
+     default_timezone = select_default_timezone(share_path);
+     printf("%s\n", default_timezone ? default_timezone : "GMT");
  }

  /*
*************** setup_config(void)
*** 1086,1092 ****
      char      **conflines;
      char        repltok[MAXPGPATH];
      char        path[MAXPGPATH];
-     const char *default_timezone;
      char       *autoconflines[3];

      fputs(_("creating configuration files ... "), stdout);
--- 1092,1097 ----
*************** setup_config(void)
*** 1168,1174 ****
                                "#default_text_search_config = 'pg_catalog.simple'",
                                repltok);

-     default_timezone = select_default_timezone(share_path);
      if (default_timezone)
      {
          snprintf(repltok, sizeof(repltok), "timezone = '%s'",
--- 1173,1178 ----

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Alvaro Herrera
Дата:
Сообщение: Re: Improve behavior of concurrent TRUNCATE
Следующее
От: Tom Lane
Дата:
Сообщение: Re: Allowing printf("%m") only where it actually works