Re: statistics for shared catalogs not updated when autovacuum is off

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: statistics for shared catalogs not updated when autovacuum is off
Дата
Msg-id 13023.1464213041@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: statistics for shared catalogs not updated when autovacuum is off  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: statistics for shared catalogs not updated when autovacuum is off  (Noah Misch <noah@leadboat.com>)
Список pgsql-hackers
I wrote:
> OK.  I'm going to work on replacing the DBWriteRequest structure with
> an OID list, and on improving the comments some more, but I don't
> anticipate further functional changes.

Here's the end result of that.  I went ahead and pushed the original
small patch, concluding that that was fixing a different bug and so
should be a different commit.

            regards, tom lane

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index a6db9cd..ef36193 100644
*** a/src/backend/postmaster/pgstat.c
--- b/src/backend/postmaster/pgstat.c
***************
*** 38,44 ****
  #include "access/xact.h"
  #include "catalog/pg_database.h"
  #include "catalog/pg_proc.h"
- #include "lib/ilist.h"
  #include "libpq/ip.h"
  #include "libpq/libpq.h"
  #include "libpq/pqsignal.h"
--- 38,43 ----
*************** static int    localNumBackends = 0;
*** 221,237 ****
  static PgStat_ArchiverStats archiverStats;
  static PgStat_GlobalStats globalStats;

! /* Write request info for each database */
! typedef struct DBWriteRequest
! {
!     Oid            databaseid;        /* OID of the database to write */
!     TimestampTz request_time;    /* timestamp of the last write request */
!     slist_node    next;
! } DBWriteRequest;
!
! /* Latest statistics request times from backends */
! static slist_head last_statrequests = SLIST_STATIC_INIT(last_statrequests);

  static volatile bool need_exit = false;
  static volatile bool got_SIGHUP = false;

--- 220,233 ----
  static PgStat_ArchiverStats archiverStats;
  static PgStat_GlobalStats globalStats;

! /*
!  * List of OIDs of databases we need to write out.  If an entry is InvalidOid,
!  * it means to write only the shared-catalog stats ("DB 0"); otherwise, we
!  * will write both that DB's data and the shared stats.
!  */
! static List *pending_write_requests = NIL;

+ /* Signal handler flags */
  static volatile bool need_exit = false;
  static volatile bool got_SIGHUP = false;

*************** PgstatCollectorMain(int argc, char *argv
*** 3497,3504 ****
      init_ps_display("stats collector process", "", "", "");

      /*
!      * Read in an existing statistics stats file or initialize the stats to
!      * zero.
       */
      pgStatRunningInCollector = true;
      pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
--- 3493,3499 ----
      init_ps_display("stats collector process", "", "", "");

      /*
!      * Read in existing stats files or initialize the stats to zero.
       */
      pgStatRunningInCollector = true;
      pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
*************** PgstatCollectorMain(int argc, char *argv
*** 3544,3551 ****
              }

              /*
!              * Write the stats file if a new request has arrived that is not
!              * satisfied by existing file.
               */
              if (pgstat_write_statsfile_needed())
                  pgstat_write_statsfiles(false, false);
--- 3539,3546 ----
              }

              /*
!              * Write the stats file(s) if a new request has arrived that is
!              * not satisfied by existing file(s).
               */
              if (pgstat_write_statsfile_needed())
                  pgstat_write_statsfiles(false, false);
*************** pgstat_get_tab_entry(PgStat_StatDBEntry
*** 3875,3888 ****
   * pgstat_write_statsfiles() -
   *        Write the global statistics file, as well as requested DB files.
   *
!  *    If writing to the permanent files (happens when the collector is
!  *    shutting down only), remove the temporary files so that backends
!  *    starting up under a new postmaster can't read the old data before
!  *    the new collector is ready.
   *
   *    When 'allDbs' is false, only the requested databases (listed in
!  *    last_statrequests) will be written; otherwise, all databases will be
!  *    written.
   * ----------
   */
  static void
--- 3870,3883 ----
   * pgstat_write_statsfiles() -
   *        Write the global statistics file, as well as requested DB files.
   *
!  *    'permanent' specifies writing to the permanent files not temporary ones.
!  *    When true (happens only when the collector is shutting down), also remove
!  *    the temporary files so that backends starting up under a new postmaster
!  *    can't read old data before the new collector is ready.
   *
   *    When 'allDbs' is false, only the requested databases (listed in
!  *    pending_write_requests) will be written; otherwise, all databases
!  *    will be written.
   * ----------
   */
  static void
*************** pgstat_write_statsfiles(bool permanent,
*** 3942,3956 ****
      while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
      {
          /*
!          * Write out the tables and functions into the DB stat file, if
!          * required.
!          *
!          * We need to do this before the dbentry write, to ensure the
!          * timestamps written to both are consistent.
           */
          if (allDbs || pgstat_db_requested(dbentry->databaseid))
          {
              dbentry->stats_timestamp = globalStats.stats_timestamp;
              pgstat_write_db_statsfile(dbentry, permanent);
          }

--- 3937,3950 ----
      while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
      {
          /*
!          * Write out the table and function stats for this DB into the
!          * appropriate per-DB stat file, if required.
           */
          if (allDbs || pgstat_db_requested(dbentry->databaseid))
          {
+             /* Make DB's timestamp consistent with the global stats */
              dbentry->stats_timestamp = globalStats.stats_timestamp;
+
              pgstat_write_db_statsfile(dbentry, permanent);
          }

*************** pgstat_write_statsfiles(bool permanent,
*** 4003,4029 ****
       * Now throw away the list of requests.  Note that requests sent after we
       * started the write are still waiting on the network socket.
       */
!     if (!slist_is_empty(&last_statrequests))
!     {
!         slist_mutable_iter iter;
!
!         /*
!          * Strictly speaking we should do slist_delete_current() before
!          * freeing each request struct.  We skip that and instead
!          * re-initialize the list header at the end.  Nonetheless, we must use
!          * slist_foreach_modify, not just slist_foreach, since we will free
!          * the node's storage before advancing.
!          */
!         slist_foreach_modify(iter, &last_statrequests)
!         {
!             DBWriteRequest *req;
!
!             req = slist_container(DBWriteRequest, next, iter.cur);
!             pfree(req);
!         }
!
!         slist_init(&last_statrequests);
!     }
  }

  /*
--- 3997,4004 ----
       * Now throw away the list of requests.  Note that requests sent after we
       * started the write are still waiting on the network socket.
       */
!     list_free(pending_write_requests);
!     pending_write_requests = NIL;
  }

  /*
*************** pgstat_write_db_statsfile(PgStat_StatDBE
*** 4162,4174 ****
  /* ----------
   * pgstat_read_statsfiles() -
   *
!  *    Reads in the existing statistics collector files and initializes the
!  *    databases' hash table.  If the permanent file name is requested (which
!  *    only happens in the stats collector itself), also remove the file after
!  *    reading; the in-memory status is now authoritative, and the permanent file
!  *    would be out of date in case somebody else reads it.
   *
!  *    If a deep read is requested, table/function stats are read also, otherwise
   *    the table/function hash tables remain empty.
   * ----------
   */
--- 4137,4156 ----
  /* ----------
   * pgstat_read_statsfiles() -
   *
!  *    Reads in some existing statistics collector files and returns the
!  *    databases hash table that is the top level of the data.
   *
!  *    If 'onlydb' is not InvalidOid, it means we only want data for that DB
!  *    plus the shared catalogs ("DB 0").  We'll still populate the DB hash
!  *    table for all databases, but we don't bother even creating table/function
!  *    hash tables for other databases.
!  *
!  *    'permanent' specifies reading from the permanent files not temporary ones.
!  *    When true (happens only when the collector is starting up), remove the
!  *    files after reading; the in-memory status is now authoritative, and the
!  *    files would be out of date in case somebody else reads them.
!  *
!  *    If a 'deep' read is requested, table/function stats are read, otherwise
   *    the table/function hash tables remain empty.
   * ----------
   */
*************** pgstat_read_statsfiles(Oid onlydb, bool
*** 4305,4312 ****
                  dbentry->functions = NULL;

                  /*
!                  * Don't collect tables if not the requested DB (or the
!                  * shared-table info)
                   */
                  if (onlydb != InvalidOid)
                  {
--- 4287,4294 ----
                  dbentry->functions = NULL;

                  /*
!                  * Don't create tables/functions hashtables for uninteresting
!                  * databases.
                   */
                  if (onlydb != InvalidOid)
                  {
*************** pgstat_read_statsfiles(Oid onlydb, bool
*** 4334,4342 ****

                  /*
                   * If requested, read the data from the database-specific
!                  * file. If there was onlydb specified (!= InvalidOid), we
!                  * would not get here because of a break above. So we don't
!                  * need to recheck.
                   */
                  if (deep)
                      pgstat_read_db_statsfile(dbentry->databaseid,
--- 4316,4322 ----

                  /*
                   * If requested, read the data from the database-specific
!                  * file.  Otherwise we just leave the hashtables empty.
                   */
                  if (deep)
                      pgstat_read_db_statsfile(dbentry->databaseid,
*************** done:
*** 4375,4384 ****
   * pgstat_read_db_statsfile() -
   *
   *    Reads in the existing statistics collector file for the given database,
!  *    and initializes the tables and functions hash tables.
   *
!  *    As pgstat_read_statsfiles, if the permanent file is requested, it is
   *    removed after reading.
   * ----------
   */
  static void
--- 4355,4368 ----
   * pgstat_read_db_statsfile() -
   *
   *    Reads in the existing statistics collector file for the given database,
!  *    filling the passed-in tables and functions hash tables.
   *
!  *    As in pgstat_read_statsfiles, if the permanent file is requested, it is
   *    removed after reading.
+  *
+  *    Note: this code has the ability to skip storing per-table or per-function
+  *    data, if NULL is passed for the corresponding hashtable.  That's not used
+  *    at the moment though.
   * ----------
   */
  static void
*************** pgstat_read_db_statsfile(Oid databaseid,
*** 4448,4454 ****
                  }

                  /*
!                  * Skip if table belongs to a not requested database.
                   */
                  if (tabhash == NULL)
                      break;
--- 4432,4438 ----
                  }

                  /*
!                  * Skip if table data not wanted.
                   */
                  if (tabhash == NULL)
                      break;
*************** pgstat_read_db_statsfile(Oid databaseid,
*** 4482,4488 ****
                  }

                  /*
!                  * Skip if function belongs to a not requested database.
                   */
                  if (funchash == NULL)
                      break;
--- 4466,4472 ----
                  }

                  /*
!                  * Skip if function data not wanted.
                   */
                  if (funchash == NULL)
                      break;
*************** done:
*** 4524,4531 ****
          elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
          unlink(statfile);
      }
-
-     return;
  }

  /* ----------
--- 4508,4513 ----
*************** backend_read_statsfile(void)
*** 4669,4674 ****
--- 4651,4657 ----
  {
      TimestampTz min_ts = 0;
      TimestampTz ref_ts = 0;
+     Oid            inquiry_db;
      int            count;

      /* already read it? */
*************** backend_read_statsfile(void)
*** 4677,4682 ****
--- 4660,4676 ----
      Assert(!pgStatRunningInCollector);

      /*
+      * In a normal backend, we check staleness of the data for our own DB, and
+      * so we send MyDatabaseId in inquiry messages.  In the autovac launcher,
+      * check staleness of the shared-catalog data, and send InvalidOid in
+      * inquiry messages so as not to force writing unnecessary data.
+      */
+     if (IsAutoVacuumLauncherProcess())
+         inquiry_db = InvalidOid;
+     else
+         inquiry_db = MyDatabaseId;
+
+     /*
       * Loop until fresh enough stats file is available or we ran out of time.
       * The stats inquiry message is sent repeatedly in case collector drops
       * it; but not every single time, as that just swamps the collector.
*************** backend_read_statsfile(void)
*** 4689,4695 ****

          CHECK_FOR_INTERRUPTS();

!         ok = pgstat_read_db_statsfile_timestamp(MyDatabaseId, false, &file_ts);

          cur_ts = GetCurrentTimestamp();
          /* Calculate min acceptable timestamp, if we didn't already */
--- 4683,4689 ----

          CHECK_FOR_INTERRUPTS();

!         ok = pgstat_read_db_statsfile_timestamp(inquiry_db, false, &file_ts);

          cur_ts = GetCurrentTimestamp();
          /* Calculate min acceptable timestamp, if we didn't already */
*************** backend_read_statsfile(void)
*** 4748,4754 ****
                  pfree(mytime);
              }

!             pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
              break;
          }

--- 4742,4748 ----
                  pfree(mytime);
              }

!             pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
              break;
          }

*************** backend_read_statsfile(void)
*** 4758,4764 ****

          /* Not there or too old, so kick the collector and wait a bit */
          if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
!             pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);

          pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
      }
--- 4752,4758 ----

          /* Not there or too old, so kick the collector and wait a bit */
          if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
!             pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);

          pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
      }
*************** backend_read_statsfile(void)
*** 4770,4776 ****

      /*
       * Autovacuum launcher wants stats about all databases, but a shallow read
!      * is sufficient.
       */
      if (IsAutoVacuumLauncherProcess())
          pgStatDBHash = pgstat_read_statsfiles(InvalidOid, false, false);
--- 4764,4771 ----

      /*
       * Autovacuum launcher wants stats about all databases, but a shallow read
!      * is sufficient.  Regular backends want a deep read for just the tables
!      * they can see (MyDatabaseId + shared catalogs).
       */
      if (IsAutoVacuumLauncherProcess())
          pgStatDBHash = pgstat_read_statsfiles(InvalidOid, false, false);
*************** pgstat_clear_snapshot(void)
*** 4831,4873 ****
  static void
  pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
  {
-     slist_iter    iter;
-     DBWriteRequest *newreq;
      PgStat_StatDBEntry *dbentry;

      elog(DEBUG2, "received inquiry for database %u", msg->databaseid);

      /*
!      * Find the last write request for this DB.  If it's older than the
!      * request's cutoff time, update it; otherwise there's nothing to do.
       *
       * Note that if a request is found, we return early and skip the below
       * check for clock skew.  This is okay, since the only way for a DB
       * request to be present in the list is that we have been here since the
!      * last write round.
       */
!     slist_foreach(iter, &last_statrequests)
!     {
!         DBWriteRequest *req = slist_container(DBWriteRequest, next, iter.cur);
!
!         if (req->databaseid != msg->databaseid)
!             continue;
!
!         if (msg->cutoff_time > req->request_time)
!             req->request_time = msg->cutoff_time;
          return;
-     }
-
-     /*
-      * There's no request for this DB yet, so create one.
-      */
-     newreq = palloc(sizeof(DBWriteRequest));
-
-     newreq->databaseid = msg->databaseid;
-     newreq->request_time = msg->clock_time;
-     slist_push_head(&last_statrequests, &newreq->next);

      /*
       * If the requestor's local clock time is older than stats_timestamp, we
       * should suspect a clock glitch, ie system time going backwards; though
       * the more likely explanation is just delayed message receipt.  It is
--- 4826,4852 ----
  static void
  pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
  {
      PgStat_StatDBEntry *dbentry;

      elog(DEBUG2, "received inquiry for database %u", msg->databaseid);

      /*
!      * If there's already a write request for this DB, there's nothing to do.
       *
       * Note that if a request is found, we return early and skip the below
       * check for clock skew.  This is okay, since the only way for a DB
       * request to be present in the list is that we have been here since the
!      * last write round.  It seems sufficient to check for clock skew once per
!      * write round.
       */
!     if (list_member_oid(pending_write_requests, msg->databaseid))
          return;

      /*
+      * Check to see if we last wrote this database at a time >= the requested
+      * cutoff time.  If so, this is a stale request that was generated before
+      * we updated the DB file, and we don't need to do so again.
+      *
       * If the requestor's local clock time is older than stats_timestamp, we
       * should suspect a clock glitch, ie system time going backwards; though
       * the more likely explanation is just delayed message receipt.  It is
*************** pgstat_recv_inquiry(PgStat_MsgInquiry *m
*** 4876,4882 ****
       * to update the stats file for a long time.
       */
      dbentry = pgstat_get_db_entry(msg->databaseid, false);
!     if ((dbentry != NULL) && (msg->clock_time < dbentry->stats_timestamp))
      {
          TimestampTz cur_ts = GetCurrentTimestamp();

--- 4855,4871 ----
       * to update the stats file for a long time.
       */
      dbentry = pgstat_get_db_entry(msg->databaseid, false);
!     if (dbentry == NULL)
!     {
!         /*
!          * We have no data for this DB.  Enter a write request anyway so that
!          * the global stats will get updated.  This is needed to prevent
!          * backend_read_statsfile from waiting for data that we cannot supply,
!          * in the case of a new DB that nobody has yet reported any stats for.
!          * See the behavior of pgstat_read_db_statsfile_timestamp.
!          */
!     }
!     else if (msg->clock_time < dbentry->stats_timestamp)
      {
          TimestampTz cur_ts = GetCurrentTimestamp();

*************** pgstat_recv_inquiry(PgStat_MsgInquiry *m
*** 4897,4907 ****
                   writetime, mytime, dbentry->databaseid);
              pfree(writetime);
              pfree(mytime);
-
-             newreq->request_time = cur_ts;
-             dbentry->stats_timestamp = cur_ts - 1;
          }
      }
  }


--- 4886,4909 ----
                   writetime, mytime, dbentry->databaseid);
              pfree(writetime);
              pfree(mytime);
          }
+         else
+         {
+             /* No, it's just a stale request, ignore it */
+             return;
+         }
+     }
+     else if (msg->cutoff_time <= dbentry->stats_timestamp)
+     {
+         /* Stale request, ignore it */
+         return;
      }
+
+     /*
+      * We need to write this DB, so create a request.
+      */
+     pending_write_requests = lappend_oid(pending_write_requests,
+                                          msg->databaseid);
  }


*************** pgstat_recv_funcpurge(PgStat_MsgFuncpurg
*** 5480,5492 ****
  /* ----------
   * pgstat_write_statsfile_needed() -
   *
!  *    Do we need to write out the files?
   * ----------
   */
  static bool
  pgstat_write_statsfile_needed(void)
  {
!     if (!slist_is_empty(&last_statrequests))
          return true;

      /* Everything was written recently */
--- 5482,5494 ----
  /* ----------
   * pgstat_write_statsfile_needed() -
   *
!  *    Do we need to write out any stats files?
   * ----------
   */
  static bool
  pgstat_write_statsfile_needed(void)
  {
!     if (pending_write_requests != NIL)
          return true;

      /* Everything was written recently */
*************** pgstat_write_statsfile_needed(void)
*** 5502,5526 ****
  static bool
  pgstat_db_requested(Oid databaseid)
  {
-     slist_iter    iter;
-
      /*
       * If any requests are outstanding at all, we should write the stats for
       * shared catalogs (the "database" with OID 0).  This ensures that
       * backends will see up-to-date stats for shared catalogs, even though
       * they send inquiry messages mentioning only their own DB.
       */
!     if (databaseid == InvalidOid && !slist_is_empty(&last_statrequests))
          return true;

      /* Search to see if there's an open request to write this database. */
!     slist_foreach(iter, &last_statrequests)
!     {
!         DBWriteRequest *req = slist_container(DBWriteRequest, next, iter.cur);
!
!         if (req->databaseid == databaseid)
!             return true;
!     }

      return false;
  }
--- 5504,5521 ----
  static bool
  pgstat_db_requested(Oid databaseid)
  {
      /*
       * If any requests are outstanding at all, we should write the stats for
       * shared catalogs (the "database" with OID 0).  This ensures that
       * backends will see up-to-date stats for shared catalogs, even though
       * they send inquiry messages mentioning only their own DB.
       */
!     if (databaseid == InvalidOid && pending_write_requests != NIL)
          return true;

      /* Search to see if there's an open request to write this database. */
!     if (list_member_oid(pending_write_requests, databaseid))
!         return true;

      return false;
  }
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 4be09fe..a078b61 100644
*** a/src/include/pgstat.h
--- b/src/include/pgstat.h
*************** typedef struct PgStat_MsgDummy
*** 219,225 ****

  /* ----------
   * PgStat_MsgInquiry            Sent by a backend to ask the collector
!  *                                to write the stats file.
   * ----------
   */

--- 219,238 ----

  /* ----------
   * PgStat_MsgInquiry            Sent by a backend to ask the collector
!  *                                to write the stats file(s).
!  *
!  * Ordinarily, an inquiry message prompts writing of the global stats file,
!  * the stats file for shared catalogs, and the stats file for the specified
!  * database.  If databaseid is InvalidOid, only the first two are written.
!  *
!  * New file(s) will be written only if the existing file has a timestamp
!  * older than the specified cutoff_time; this prevents duplicated effort
!  * when multiple requests arrive at nearly the same time, assuming that
!  * backends send requests with cutoff_times a little bit in the past.
!  *
!  * clock_time should be the requestor's current local time; the collector
!  * uses this to check for the system clock going backward, but it has no
!  * effect unless that occurs.  We assume clock_time >= cutoff_time, though.
   * ----------
   */

*************** typedef struct PgStat_MsgInquiry
*** 228,234 ****
      PgStat_MsgHdr m_hdr;
      TimestampTz clock_time;        /* observed local clock time */
      TimestampTz cutoff_time;    /* minimum acceptable file timestamp */
!     Oid            databaseid;        /* requested DB (InvalidOid => all DBs) */
  } PgStat_MsgInquiry;


--- 241,247 ----
      PgStat_MsgHdr m_hdr;
      TimestampTz clock_time;        /* observed local clock time */
      TimestampTz cutoff_time;    /* minimum acceptable file timestamp */
!     Oid            databaseid;        /* requested DB (InvalidOid => shared only) */
  } PgStat_MsgInquiry;



В списке pgsql-hackers по дате отправления:

Предыдущее
От: Andres Freund
Дата:
Сообщение: Re: Is the unfair lwlock behavior intended?
Следующее
От: Alvaro Herrera
Дата:
Сообщение: Re: pg_bsd_indent - improvements around offsetof and sizeof