Re: Instrument checkpoint sync calls

Поиск
Список
Период
Сортировка
От Greg Smith
Тема Re: Instrument checkpoint sync calls
Дата
Msg-id 4D076178.1070109@2ndquadrant.com
обсуждение исходный текст
Ответ на Re: Instrument checkpoint sync calls  (Robert Haas <robertmhaas@gmail.com>)
Ответы Re: Instrument checkpoint sync calls  (Alvaro Herrera <alvherre@commandprompt.com>)
Список pgsql-hackers
Robert Haas wrote:
> I took a look at this and it looks generally good, but I'm wondering
> why md.c is converting the results from an exact value to a floating
> point, only to have xlog.c turn around and convert back to an integer.
>  I think it could just return milliseconds directly, or if you're
> worried about a checkpoint that takes more than 24 days to complete,
> seconds and microseconds.

Attached patch now does something like this, except without the
roll-over concern.  INSTR_TIME_GET_MICROSEC returns a uint64 value.  I
just made that the storage format for all these values until they're
converted for display.  Test output:

LOG:  checkpoint starting: xlog
DEBUG:  checkpoint sync: number=1 file=base/16385/16480 time=10422.859 msec
DEBUG:  checkpoint sync: number=2 file=base/16385/16475_vm time=2896.614
msec
DEBUG:  checkpoint sync: number=3 file=base/16385/16475.1 time=57.836 msec
DEBUG:  checkpoint sync: number=4 file=base/16385/16466 time=20.080 msec
DEBUG:  checkpoint sync: number=5 file=base/16385/16463 time=74.926 msec
DEBUG:  checkpoint sync: number=6 file=base/16385/16482 time=74.263 msec
DEBUG:  checkpoint sync: number=7 file=base/16385/16475_fsm time=7.062 msec
DEBUG:  checkpoint sync: number=8 file=base/16385/16475 time=35.164 msec
LOG:  checkpoint complete: wrote 2143 buffers (52.3%); 0 transaction log
file(s) added, 0 removed, 3 recycled; write=1.213 s, sync=13.589 s,
total=24.744 s; sync files=8, longest=10.422 s, average=1.698 s

This shows the hard truncation used, so 10422.859 msec becomes 10.422
s.  I don't think allowing up to 0.999ms of error there is a problem
given the expected scale.  But since none of the precision is lost until
the end, that could be changed with only touching the final display
formatting conversion of the value.  Following your general idea
further, why throw away any resolution inside of md.c; let xlog.c decide
how to show it.

Note that I also fixed the DEBUG level lines to only show their actual
precision.  Before that was printing 6 digits to the right of the
decimal point each time, the last three of which were always 0.

--
Greg Smith   2ndQuadrant US    greg@2ndQuadrant.com   Baltimore, MD
PostgreSQL Training, Services and Support        www.2ndQuadrant.us
"PostgreSQL 9.0 High Performance": http://www.2ndQuadrant.com/books

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1ed9687..c9778df 100644
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
*************** LogCheckpointEnd(bool restartpoint)
*** 6955,6964 ****
  {
      long        write_secs,
                  sync_secs,
!                 total_secs;
      int            write_usecs,
                  sync_usecs,
!                 total_usecs;

      CheckpointStats.ckpt_end_t = GetCurrentTimestamp();

--- 6955,6969 ----
  {
      long        write_secs,
                  sync_secs,
!                 total_secs,
!                 longest_secs,
!                 average_secs;
      int            write_usecs,
                  sync_usecs,
!                 total_usecs,
!                 longest_usecs,
!                 average_usecs;
!     uint64        average_sync_time;

      CheckpointStats.ckpt_end_t = GetCurrentTimestamp();

*************** LogCheckpointEnd(bool restartpoint)
*** 6974,6991 ****
                          CheckpointStats.ckpt_sync_end_t,
                          &sync_secs, &sync_usecs);

      if (restartpoint)
          elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); "
!              "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
               CheckpointStats.ckpt_bufs_written,
               (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
               write_secs, write_usecs / 1000,
               sync_secs, sync_usecs / 1000,
!              total_secs, total_usecs / 1000);
      else
          elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
               "%d transaction log file(s) added, %d removed, %d recycled; "
!              "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
               CheckpointStats.ckpt_bufs_written,
               (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
               CheckpointStats.ckpt_segs_added,
--- 6979,7017 ----
                          CheckpointStats.ckpt_sync_end_t,
                          &sync_secs, &sync_usecs);

+     /*
+      * Timing values returned from CheckpointStats are in microseconds.
+      * Convert to the second plus microsecond form that TimestampDifference
+      * returns for homogeneous printing.
+      */
+     longest_secs = (long) (CheckpointStats.ckpt_longest_sync / 1000000);
+     longest_usecs = CheckpointStats.ckpt_longest_sync -
+         (uint64) longest_secs * 1000000;
+
+     average_sync_time = 0;
+     if (CheckpointStats.ckpt_sync_rels > 0)
+         average_sync_time = CheckpointStats.ckpt_agg_sync_time /
+             CheckpointStats.ckpt_sync_rels;
+     average_secs = (long) (average_sync_time / 1000000);
+     average_usecs = average_sync_time - (uint64) average_secs * 1000000;
+
      if (restartpoint)
          elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); "
!              "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
!              "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s",
               CheckpointStats.ckpt_bufs_written,
               (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
               write_secs, write_usecs / 1000,
               sync_secs, sync_usecs / 1000,
!              total_secs, total_usecs / 1000,
!              CheckpointStats.ckpt_sync_rels,
!              longest_secs, longest_usecs / 1000,
!              average_secs, average_usecs / 1000);
      else
          elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
               "%d transaction log file(s) added, %d removed, %d recycled; "
!              "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
!              "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s",
               CheckpointStats.ckpt_bufs_written,
               (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
               CheckpointStats.ckpt_segs_added,
*************** LogCheckpointEnd(bool restartpoint)
*** 6993,6999 ****
               CheckpointStats.ckpt_segs_recycled,
               write_secs, write_usecs / 1000,
               sync_secs, sync_usecs / 1000,
!              total_secs, total_usecs / 1000);
  }

  /*
--- 7019,7028 ----
               CheckpointStats.ckpt_segs_recycled,
               write_secs, write_usecs / 1000,
               sync_secs, sync_usecs / 1000,
!              total_secs, total_usecs / 1000,
!              CheckpointStats.ckpt_sync_rels,
!              longest_secs, longest_usecs / 1000,
!              average_secs, average_usecs / 1000);
  }

  /*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 604db21..64f9027 100644
*** a/src/backend/storage/smgr/md.c
--- b/src/backend/storage/smgr/md.c
***************
*** 20,25 ****
--- 20,26 ----

  #include "catalog/catalog.h"
  #include "miscadmin.h"
+ #include "portability/instr_time.h"
  #include "postmaster/bgwriter.h"
  #include "storage/fd.h"
  #include "storage/bufmgr.h"
*************** mdsync(void)
*** 927,932 ****
--- 928,940 ----
      PendingOperationEntry *entry;
      int            absorb_counter;

+     /* Statistics on sync times */
+     int processed = 0;
+     instr_time sync_start, sync_end, sync_diff;
+     uint64 elapsed;
+     uint64 longest = 0;
+     uint64 total_elapsed = 0;
+
      /*
       * This is only called during checkpoints, and checkpoints should only
       * occur in processes that have created a pendingOpsTable.
*************** mdsync(void)
*** 1069,1077 ****
--- 1077,1107 ----
                  seg = _mdfd_getseg(reln, entry->tag.forknum,
                                entry->tag.segno * ((BlockNumber) RELSEG_SIZE),
                                     false, EXTENSION_RETURN_NULL);
+
+                 if (log_checkpoints)
+                     INSTR_TIME_SET_CURRENT(sync_start);
+                 else
+                     INSTR_TIME_SET_ZERO(sync_start);
+
                  if (seg != NULL &&
                      FileSync(seg->mdfd_vfd) >= 0)
+                 {
+                     if (log_checkpoints && (! INSTR_TIME_IS_ZERO(sync_start)))
+                     {
+                         INSTR_TIME_SET_CURRENT(sync_end);
+                         sync_diff = sync_end;
+                         INSTR_TIME_SUBTRACT(sync_diff, sync_start);
+                         elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
+                         if (elapsed > longest)
+                             longest = elapsed;
+                         total_elapsed += elapsed;
+                         processed++;
+                         elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
+                             processed, FilePathName(seg->mdfd_vfd), (double) elapsed / 1000);
+                     }
+
                      break;        /* success; break out of retry loop */
+                 }

                  /*
                   * XXX is there any point in allowing more than one retry?
*************** mdsync(void)
*** 1113,1118 ****
--- 1143,1153 ----
              elog(ERROR, "pendingOpsTable corrupted");
      }                            /* end loop over hashtable entries */

+     /* Return sync performance metrics for report at checkpoint end */
+     CheckpointStats.ckpt_sync_rels = processed;
+     CheckpointStats.ckpt_longest_sync = longest;
+     CheckpointStats.ckpt_agg_sync_time = total_elapsed;
+
      /* Flag successful completion of mdsync */
      mdsync_in_progress = false;
  }
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index fa7ae2a..484cb6c 100644
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
*************** typedef struct CheckpointStatsData
*** 257,262 ****
--- 257,269 ----
      int            ckpt_segs_added;    /* # of new xlog segments created */
      int            ckpt_segs_removed;        /* # of xlog segments deleted */
      int            ckpt_segs_recycled;        /* # of xlog segments recycled */
+
+     int            ckpt_sync_rels;        /* # of relations synced */
+     uint64        ckpt_longest_sync;    /* Longest sync for one relation */
+     uint64        ckpt_agg_sync_time;    /* The sum of all the individual sync
+                                        times, which is not necessarily the
+                                        same as the total elapsed time for
+                                        the entire sync phase. */
  } CheckpointStatsData;

  extern CheckpointStatsData CheckpointStats;

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Peter Eisentraut
Дата:
Сообщение: Re: hstores in pl/python
Следующее
От: Florian Pflug
Дата:
Сообщение: Re: CommitFest wrap-up