Re: Hot standby, recovery infra

Поиск
Список
Период
Сортировка
От Heikki Linnakangas
Тема Re: Hot standby, recovery infra
Дата
Msg-id 498B440A.1030101@enterprisedb.com
обсуждение исходный текст
Ответ на Re: Hot standby, recovery infra  (Simon Riggs <simon@2ndQuadrant.com>)
Ответы Re: Hot standby, recovery infra
Список pgsql-hackers
Ok, here's another version. Major changes since last patch:

- Startup checkpoint is now again performed after the recovery is
finished, before allowing (read-write) connections. This is because we
couldn't solve the problem of re-entering recovery after a crash before
the first online checkpoint.

- minSafeStartPoint is gone, and its functionality has been folded into
minRecoveryPoint. It was really the same semantics. There might have
been some debugging value in keeping the backup stop time around, but
it's in the backup label file in the base backup anyway.

- minRecoveryPoint is now updated in XLogFlush, instead of when a file
is restored from archive.

- log_restartpoints is gone. Use log_checkpoints in postgresql.conf
instead

Outstanding issues:

- If bgwriter is performing a restartpoint when recovery ends, the
startup checkpoint will be queued up behind the restartpoint. And since
it uses the same smoothing logic as checkpoints, it can take quite some
time for that to finish. The original patch had some code to hurry up
the restartpoint by signaling the bgwriter if
LWLockConditionalAcquire(CheckPointLock) fails, but there's a race
condition with that if a restartpoint starts right after that check. We
could let the bgwriter do the checkpoint too, and wait for it, but
bgwriter might not be running yet, and we'd have to allow bgwriter to
write WAL while disallowing it for all other processes, which seems
quite complex. Seems like we need something like the
LWLockConditionalAcquire approach, but built into CreateCheckPoint to
eliminate the race condition

- If you perform a fast shutdown while startup process is waiting for
the restore command, startup process sometimes throws a FATAL error
which leads escalates into an immediate shutdown. That leads to
different messages in the logs, and skipping of the shutdown
restartpoint that we now otherwise perform.

- It's not clear to me if the rest of the xlog flushing related
functions, XLogBackgroundFlush, XLogNeedsFlush and XLogAsyncCommitFlush,
need to work during recovery, and what they should do.

I'll continue working on those outstanding items.

--
   Heikki Linnakangas
   EnterpriseDB   http://www.enterprisedb.com
*** src/backend/access/transam/xlog.c
--- src/backend/access/transam/xlog.c
***************
*** 36,41 ****
--- 36,42 ----
  #include "catalog/pg_control.h"
  #include "catalog/pg_type.h"
  #include "funcapi.h"
+ #include "libpq/pqsignal.h"
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "postmaster/bgwriter.h"
***************
*** 47,52 ****
--- 48,54 ----
  #include "storage/smgr.h"
  #include "storage/spin.h"
  #include "utils/builtins.h"
+ #include "utils/flatfiles.h"
  #include "utils/guc.h"
  #include "utils/ps_status.h"
  #include "pg_trace.h"
***************
*** 119,130 **** CheckpointStatsData CheckpointStats;
   */
  TimeLineID    ThisTimeLineID = 0;

! /* Are we doing recovery from XLOG? */
  bool        InRecovery = false;

  /* Are we recovering using offline XLOG archives? */
  static bool InArchiveRecovery = false;

  /* Was the last xlog file restored from archive, or local? */
  static bool restoredFromArchive = false;

--- 121,146 ----
   */
  TimeLineID    ThisTimeLineID = 0;

! /*
!  * Are we doing recovery from XLOG?
!  *
!  * This is only ever true in the startup process, when it's replaying WAL.
!  * It's used in functions that need to act differently when called from a
!  * redo function (e.g skip WAL logging).  To check whether the system is in
!  * recovery regardless of what process you're running in, use
!  * IsRecoveryProcessingMode().
!  */
  bool        InRecovery = false;

  /* Are we recovering using offline XLOG archives? */
  static bool InArchiveRecovery = false;

+ /*
+  * Local copy of shared RecoveryProcessingMode variable. True actually
+  * means "not known, need to check the shared state"
+  */
+ static bool LocalRecoveryProcessingMode = true;
+
  /* Was the last xlog file restored from archive, or local? */
  static bool restoredFromArchive = false;

***************
*** 133,139 **** static char *recoveryRestoreCommand = NULL;
  static bool recoveryTarget = false;
  static bool recoveryTargetExact = false;
  static bool recoveryTargetInclusive = true;
- static bool recoveryLogRestartpoints = false;
  static TransactionId recoveryTargetXid;
  static TimestampTz recoveryTargetTime;
  static TimestampTz recoveryLastXTime = 0;
--- 149,154 ----
***************
*** 242,250 **** static XLogRecPtr RedoRecPtr;
   * ControlFileLock: must be held to read/update control file or create
   * new log file.
   *
!  * CheckpointLock: must be held to do a checkpoint (ensures only one
!  * checkpointer at a time; currently, with all checkpoints done by the
!  * bgwriter, this is just pro forma).
   *
   *----------
   */
--- 257,264 ----
   * ControlFileLock: must be held to read/update control file or create
   * new log file.
   *
!  * CheckpointLock: must be held to do a checkpoint or restartpoint (ensures
!  * only one checkpointer at a time)
   *
   *----------
   */
***************
*** 313,318 **** typedef struct XLogCtlData
--- 327,351 ----
      int            XLogCacheBlck;    /* highest allocated xlog buffer index */
      TimeLineID    ThisTimeLineID;

+     /*
+      * SharedRecoveryProcessingMode indicates if we're still in crash or
+      * archive recovery. It's checked by IsRecoveryProcessingMode()
+      */
+     bool        SharedRecoveryProcessingMode;
+
+     /*
+      * During recovery, we keep a copy of the latest checkpoint record
+      * here. It's used by the background writer when it wants to create
+      * a restartpoint.
+      *
+      * is info_lck spinlock a bit too light-weight to protect these?
+      */
+     XLogRecPtr    lastCheckPointRecPtr;
+     CheckPoint    lastCheckPoint;
+
+     /* end+1 of the last record replayed (or being replayed) */
+     XLogRecPtr    replayEndRecPtr;
+
      slock_t        info_lck;        /* locks shared variables shown above */
  } XLogCtlData;

***************
*** 387,395 **** static XLogRecPtr ReadRecPtr;    /* start of last record read */
--- 420,435 ----
  static XLogRecPtr EndRecPtr;    /* end+1 of last record read */
  static XLogRecord *nextRecord = NULL;
  static TimeLineID lastPageTLI = 0;
+ static XLogRecPtr minRecoveryPoint; /* local copy of ControlFile->minRecoveryPoint */
+ static bool    updateMinRecoveryPoint = true;

  static bool InRedo = false;

+ /*
+  * Flag set by interrupt handlers for later service in the redo loop.
+  */
+ static volatile sig_atomic_t shutdown_requested = false;
+

  static void XLogArchiveNotify(const char *xlog);
  static void XLogArchiveNotifySeg(uint32 log, uint32 seg);
***************
*** 420,425 **** static void PreallocXlogFiles(XLogRecPtr endptr);
--- 460,466 ----
  static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
  static void ValidateXLOGDirectoryStructure(void);
  static void CleanupBackupHistory(void);
+ static void UpdateMinRecoveryPoint(XLogRecPtr lsn);
  static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode);
  static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
  static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
***************
*** 484,489 **** XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
--- 525,534 ----
      bool        doPageWrites;
      bool        isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);

+     /* cross-check on whether we should be here or not */
+     if (IsRecoveryProcessingMode())
+         elog(FATAL, "cannot make new WAL entries during recovery");
+
      /* info's high bits are reserved for use by me */
      if (info & XLR_INFO_MASK)
          elog(PANIC, "invalid xlog info mask %02X", info);
***************
*** 1718,1723 **** XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN)
--- 1763,1817 ----
  }

  /*
+  * Advance minRecoveryPoint in control file.
+  *
+  * If we crash during recovery, we must reach this point again before the
+  * database is consistent. If minRecoveryPoint is already greater than or
+  * equal to 'lsn', it is not updated.
+  */
+ static void
+ UpdateMinRecoveryPoint(XLogRecPtr lsn)
+ {
+     /* Quick check using our local copy of the variable */
+     if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint))
+         return;
+
+     LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+
+     /* update local copy */
+     minRecoveryPoint = ControlFile->minRecoveryPoint;
+
+     /*
+      * An invalid minRecoveryPoint means that we need to recover all the WAL,
+      * ie. crash recovery. Don't update the control file in that case.
+      */
+     if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
+         updateMinRecoveryPoint = false;
+     else if (XLByteLT(minRecoveryPoint, lsn))
+     {
+         /* use volatile pointer to prevent code rearrangement */
+         volatile XLogCtlData *xlogctl = XLogCtl;
+
+         /*
+          * To avoid having to update the control file too often, we update
+          * it all the way to the last record being replayed, even though 'lsn'
+          * would suffice for correctness.
+          */
+         SpinLockAcquire(&xlogctl->info_lck);
+         minRecoveryPoint = xlogctl->replayEndRecPtr;
+         SpinLockRelease(&xlogctl->info_lck);
+
+         /* update control file */
+         ControlFile->minRecoveryPoint = minRecoveryPoint;
+         UpdateControlFile();
+
+         elog(DEBUG2, "updated min recovery point to %X/%X",
+              minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff);
+     }
+     LWLockRelease(ControlFileLock);
+ }
+
+ /*
   * Ensure that all XLOG data through the given position is flushed to disk.
   *
   * NOTE: this differs from XLogWrite mainly in that the WALWriteLock is not
***************
*** 1729,1737 **** XLogFlush(XLogRecPtr record)
      XLogRecPtr    WriteRqstPtr;
      XLogwrtRqst WriteRqst;

!     /* Disabled during REDO */
!     if (InRedo)
          return;

      /* Quick exit if already known flushed */
      if (XLByteLE(record, LogwrtResult.Flush))
--- 1823,1837 ----
      XLogRecPtr    WriteRqstPtr;
      XLogwrtRqst WriteRqst;

!     /*
!      * During REDO, we don't try to flush the WAL, but update minRecoveryPoint
!      * instead.
!      */
!     if (IsRecoveryProcessingMode())
!     {
!         UpdateMinRecoveryPoint(record);
          return;
+     }

      /* Quick exit if already known flushed */
      if (XLByteLE(record, LogwrtResult.Flush))
***************
*** 1818,1826 **** XLogFlush(XLogRecPtr record)
       * the bad page is encountered again during recovery then we would be
       * unable to restart the database at all!  (This scenario has actually
       * happened in the field several times with 7.1 releases. Note that we
!      * cannot get here while InRedo is true, but if the bad page is brought in
!      * and marked dirty during recovery then CreateCheckPoint will try to
!      * flush it at the end of recovery.)
       *
       * The current approach is to ERROR under normal conditions, but only
       * WARNING during recovery, so that the system can be brought up even if
--- 1918,1926 ----
       * the bad page is encountered again during recovery then we would be
       * unable to restart the database at all!  (This scenario has actually
       * happened in the field several times with 7.1 releases. Note that we
!      * cannot get here while IsRecoveryProcessingMode(), but if the bad page is
!      * brought in and marked dirty during recovery then if a checkpoint were
!      * performed at the end of recovery it will try to flush it.
       *
       * The current approach is to ERROR under normal conditions, but only
       * WARNING during recovery, so that the system can be brought up even if
***************
*** 2677,2687 **** RestoreArchivedFile(char *path, const char *xlogfname,
--- 2777,2799 ----
       * those it's a good bet we should have gotten it too.  Aborting on other
       * signals such as SIGTERM seems a good idea as well.
       *
+      * However, if we were requested to terminate, we don't really care what
+      * happened to the restore command, so we just exit cleanly. In fact,
+      * the restore command most likely received the SIGTERM too, and we don't
+      * want to complain about that.
+      *
       * Per the Single Unix Spec, shells report exit status > 128 when a called
       * command died on a signal.  Also, 126 and 127 are used to report
       * problems such as an unfindable command; treat those as fatal errors
       * too.
       */
+     if (shutdown_requested && InRedo)
+     {
+         /* XXX: Is EndRecPtr always the right value here? */
+         UpdateMinRecoveryPoint(EndRecPtr);
+         proc_exit(0);
+     }
+
      signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;

      ereport(signaled ? FATAL : DEBUG2,
***************
*** 4590,4607 **** readRecoveryCommandFile(void)
              ereport(LOG,
                      (errmsg("recovery_target_inclusive = %s", tok2)));
          }
-         else if (strcmp(tok1, "log_restartpoints") == 0)
-         {
-             /*
-              * does nothing if a recovery_target is not also set
-              */
-             if (!parse_bool(tok2, &recoveryLogRestartpoints))
-                   ereport(ERROR,
-                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                       errmsg("parameter \"log_restartpoints\" requires a Boolean value")));
-             ereport(LOG,
-                     (errmsg("log_restartpoints = %s", tok2)));
-         }
          else
              ereport(FATAL,
                      (errmsg("unrecognized recovery parameter \"%s\"",
--- 4702,4707 ----
***************
*** 4883,4889 **** StartupXLOG(void)
      XLogRecPtr    RecPtr,
                  LastRec,
                  checkPointLoc,
!                 minRecoveryLoc,
                  EndOfLog;
      uint32        endLogId;
      uint32        endLogSeg;
--- 4983,4989 ----
      XLogRecPtr    RecPtr,
                  LastRec,
                  checkPointLoc,
!                 backupStopLoc,
                  EndOfLog;
      uint32        endLogId;
      uint32        endLogSeg;
***************
*** 4891,4896 **** StartupXLOG(void)
--- 4991,4998 ----
      uint32        freespace;
      TransactionId oldestActiveXID;

+     XLogCtl->SharedRecoveryProcessingMode = true;
+
      /*
       * Read control file and check XLOG status looks valid.
       *
***************
*** 4970,4976 **** StartupXLOG(void)
                          recoveryTargetTLI,
                          ControlFile->checkPointCopy.ThisTimeLineID)));

!     if (read_backup_label(&checkPointLoc, &minRecoveryLoc))
      {
          /*
           * When a backup_label file is present, we want to roll forward from
--- 5072,5078 ----
                          recoveryTargetTLI,
                          ControlFile->checkPointCopy.ThisTimeLineID)));

!     if (read_backup_label(&checkPointLoc, &backupStopLoc))
      {
          /*
           * When a backup_label file is present, we want to roll forward from
***************
*** 5108,5118 **** StartupXLOG(void)
          ControlFile->prevCheckPoint = ControlFile->checkPoint;
          ControlFile->checkPoint = checkPointLoc;
          ControlFile->checkPointCopy = checkPoint;
!         if (minRecoveryLoc.xlogid != 0 || minRecoveryLoc.xrecoff != 0)
!             ControlFile->minRecoveryPoint = minRecoveryLoc;
          ControlFile->time = (pg_time_t) time(NULL);
          UpdateControlFile();

          /*
           * If there was a backup label file, it's done its job and the info
           * has now been propagated into pg_control.  We must get rid of the
--- 5210,5232 ----
          ControlFile->prevCheckPoint = ControlFile->checkPoint;
          ControlFile->checkPoint = checkPointLoc;
          ControlFile->checkPointCopy = checkPoint;
!         if (backupStopLoc.xlogid != 0 || backupStopLoc.xrecoff != 0)
!         {
!             if (XLByteLT(ControlFile->minRecoveryPoint, backupStopLoc))
!                 ControlFile->minRecoveryPoint = backupStopLoc;
!         }
          ControlFile->time = (pg_time_t) time(NULL);
+         /* No need to hold ControlFileLock yet, we aren't up far enough */
          UpdateControlFile();

+         /* update our local copy of minRecoveryPoint */
+         minRecoveryPoint = ControlFile->minRecoveryPoint;
+
+         /*
+          * Reset pgstat data, because it may be invalid after recovery.
+          */
+         pgstat_reset_all();
+
          /*
           * If there was a backup label file, it's done its job and the info
           * has now been propagated into pg_control.  We must get rid of the
***************
*** 5157,5168 **** StartupXLOG(void)
          {
              bool        recoveryContinue = true;
              bool        recoveryApply = true;
              ErrorContextCallback errcontext;

              InRedo = true;
!             ereport(LOG,
!                     (errmsg("redo starts at %X/%X",
!                             ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));

              /*
               * main redo apply loop
--- 5271,5306 ----
          {
              bool        recoveryContinue = true;
              bool        recoveryApply = true;
+             bool        reachedMinRecoveryPoint = false;
              ErrorContextCallback errcontext;
+             /* use volatile pointer to prevent code rearrangement */
+             volatile XLogCtlData *xlogctl = XLogCtl;
+
+             /* Update shared copy of replayEndRecPtr */
+             SpinLockAcquire(&xlogctl->info_lck);
+             xlogctl->replayEndRecPtr = ReadRecPtr;
+             SpinLockRelease(&xlogctl->info_lck);

              InRedo = true;
!
!             if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
!                 ereport(LOG,
!                         (errmsg("redo starts at %X/%X",
!                                 ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
!             else
!                 ereport(LOG,
!                         (errmsg("redo starts at %X/%X, consistency will be reached at %X/%X",
!                         ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
!                         minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff)));
!
!             /*
!              * Let postmaster know we've started redo now.
!              *
!              * After this point, we can no longer assume that there's no other
!              * processes running concurrently.
!              */
!             if (InArchiveRecovery && IsUnderPostmaster)
!                 SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);

              /*
               * main redo apply loop
***************
*** 5189,5194 **** StartupXLOG(void)
--- 5327,5361 ----
  #endif

                  /*
+                  * Process any requests or signals received recently.
+                  */
+                 if (shutdown_requested)
+                 {
+                     /*
+                      * We were requested to exit without finishing recovery.
+                      */
+                     UpdateMinRecoveryPoint(ReadRecPtr);
+                     proc_exit(0);
+                 }
+
+                 /*
+                  * Have we reached our safe starting point? If so, we can
+                  * tell postmaster that the database is consistent now.
+                  */
+                 if (!reachedMinRecoveryPoint &&
+                      XLByteLE(minRecoveryPoint, EndRecPtr))
+                 {
+                     reachedMinRecoveryPoint = true;
+                     if (InArchiveRecovery)
+                     {
+                         ereport(LOG,
+                                 (errmsg("consistent recovery state reached")));
+                         if (IsUnderPostmaster)
+                             SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
+                     }
+                 }
+
+                 /*
                   * Have we reached our recovery target?
                   */
                  if (recoveryStopsHere(record, &recoveryApply))
***************
*** 5213,5218 **** StartupXLOG(void)
--- 5380,5390 ----
                      TransactionIdAdvance(ShmemVariableCache->nextXid);
                  }

+                 /* Update shared copy of replayEndRecPtr */
+                 SpinLockAcquire(&xlogctl->info_lck);
+                 xlogctl->replayEndRecPtr = EndRecPtr;
+                 SpinLockRelease(&xlogctl->info_lck);
+
                  RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);

                  /* Pop the error context stack */
***************
*** 5256,5269 **** StartupXLOG(void)
       * Complain if we did not roll forward far enough to render the backup
       * dump consistent.
       */
!     if (XLByteLT(EndOfLog, ControlFile->minRecoveryPoint))
      {
          if (reachedStopPoint)    /* stopped because of stop request */
              ereport(FATAL,
!                     (errmsg("requested recovery stop point is before end time of backup dump")));
          else    /* ran off end of WAL */
              ereport(FATAL,
!                     (errmsg("WAL ends before end time of backup dump")));
      }

      /*
--- 5428,5441 ----
       * Complain if we did not roll forward far enough to render the backup
       * dump consistent.
       */
!     if (InRecovery && XLByteLT(EndOfLog, minRecoveryPoint))
      {
          if (reachedStopPoint)    /* stopped because of stop request */
              ereport(FATAL,
!                     (errmsg("requested recovery stop point is before consistent recovery point")));
          else    /* ran off end of WAL */
              ereport(FATAL,
!                     (errmsg("WAL ended before a consistent state was reached")));
      }

      /*
***************
*** 5358,5363 **** StartupXLOG(void)
--- 5530,5541 ----
      /* Pre-scan prepared transactions to find out the range of XIDs present */
      oldestActiveXID = PrescanPreparedTransactions();

+     /*
+      * Allow writing WAL for us. But not for other backends! That's done
+      * after writing the shutdown checkpoint and finishing recovery.
+      */
+     LocalRecoveryProcessingMode = false;
+
      if (InRecovery)
      {
          int            rmid;
***************
*** 5378,5388 **** StartupXLOG(void)
          XLogCheckInvalidPages();

          /*
-          * Reset pgstat data, because it may be invalid after recovery.
-          */
-         pgstat_reset_all();
-
-         /*
           * Perform a checkpoint to update all our recovery activity to disk.
           *
           * Note that we write a shutdown checkpoint rather than an on-line
--- 5556,5561 ----
***************
*** 5404,5415 **** StartupXLOG(void)
       */
      InRecovery = false;

      ControlFile->state = DB_IN_PRODUCTION;
      ControlFile->time = (pg_time_t) time(NULL);
      UpdateControlFile();

      /* start the archive_timeout timer running */
!     XLogCtl->Write.lastSegSwitchTime = ControlFile->time;

      /* initialize shared-memory copy of latest checkpoint XID/epoch */
      XLogCtl->ckptXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
--- 5577,5590 ----
       */
      InRecovery = false;

+     LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
      ControlFile->state = DB_IN_PRODUCTION;
      ControlFile->time = (pg_time_t) time(NULL);
      UpdateControlFile();
+     LWLockRelease(ControlFileLock);

      /* start the archive_timeout timer running */
!     XLogCtl->Write.lastSegSwitchTime = (pg_time_t) time(NULL);

      /* initialize shared-memory copy of latest checkpoint XID/epoch */
      XLogCtl->ckptXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
***************
*** 5444,5449 **** StartupXLOG(void)
--- 5619,5663 ----
          readRecordBuf = NULL;
          readRecordBufSize = 0;
      }
+
+     /*
+      * All done. Allow others to write WAL.
+      */
+     XLogCtl->SharedRecoveryProcessingMode = false;
+ }
+
+ /*
+  * Is the system still in recovery?
+  *
+  * As a side-effect, we initialize the local TimeLineID and RedoRecPtr
+  * variables the first time we see that recovery is finished.
+  */
+ bool
+ IsRecoveryProcessingMode(void)
+ {
+     /*
+      * We check shared state each time only until we leave recovery mode.
+      * We can't re-enter recovery, so we rely on the local state variable
+      * after that.
+      */
+     if (!LocalRecoveryProcessingMode)
+         return false;
+     else
+     {
+         /* use volatile pointer to prevent code rearrangement */
+         volatile XLogCtlData *xlogctl = XLogCtl;
+
+         LocalRecoveryProcessingMode = xlogctl->SharedRecoveryProcessingMode;
+
+         /*
+          * Initialize TimeLineID and RedoRecPtr the first time we see that
+          * recovery is finished.
+          */
+         if (!LocalRecoveryProcessingMode)
+             InitXLOGAccess();
+
+         return LocalRecoveryProcessingMode;
+     }
  }

  /*
***************
*** 5575,5580 **** InitXLOGAccess(void)
--- 5789,5796 ----
  {
      /* ThisTimeLineID doesn't change so we need no lock to copy it */
      ThisTimeLineID = XLogCtl->ThisTimeLineID;
+     Assert(ThisTimeLineID != 0);
+
      /* Use GetRedoRecPtr to copy the RedoRecPtr safely */
      (void) GetRedoRecPtr();
  }
***************
*** 5686,5692 **** ShutdownXLOG(int code, Datum arg)
      ereport(LOG,
              (errmsg("shutting down")));

!     CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
      ShutdownCLOG();
      ShutdownSUBTRANS();
      ShutdownMultiXact();
--- 5902,5911 ----
      ereport(LOG,
              (errmsg("shutting down")));

!     if (IsRecoveryProcessingMode())
!         CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
!     else
!         CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
      ShutdownCLOG();
      ShutdownSUBTRANS();
      ShutdownMultiXact();
***************
*** 5699,5707 **** ShutdownXLOG(int code, Datum arg)
   * Log start of a checkpoint.
   */
  static void
! LogCheckpointStart(int flags)
  {
!     elog(LOG, "checkpoint starting:%s%s%s%s%s%s",
           (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
           (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
           (flags & CHECKPOINT_FORCE) ? " force" : "",
--- 5918,5937 ----
   * Log start of a checkpoint.
   */
  static void
! LogCheckpointStart(int flags, bool restartpoint)
  {
!     char *msg;
!
!     /*
!      * XXX: This is hopelessly untranslatable. We could call gettext_noop
!      * for the main message, but what about all the flags?
!      */
!     if (restartpoint)
!         msg = "restartpoint starting:%s%s%s%s%s%s";
!     else
!         msg = "checkpoint starting:%s%s%s%s%s%s";
!
!     elog(LOG, msg,
           (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
           (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
           (flags & CHECKPOINT_FORCE) ? " force" : "",
***************
*** 5714,5720 **** LogCheckpointStart(int flags)
   * Log end of a checkpoint.
   */
  static void
! LogCheckpointEnd(void)
  {
      long        write_secs,
                  sync_secs,
--- 5944,5950 ----
   * Log end of a checkpoint.
   */
  static void
! LogCheckpointEnd(bool restartpoint)
  {
      long        write_secs,
                  sync_secs,
***************
*** 5737,5753 **** LogCheckpointEnd(void)
                          CheckpointStats.ckpt_sync_end_t,
                          &sync_secs, &sync_usecs);

!     elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
!          "%d transaction log file(s) added, %d removed, %d recycled; "
!          "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
!          CheckpointStats.ckpt_bufs_written,
!          (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
!          CheckpointStats.ckpt_segs_added,
!          CheckpointStats.ckpt_segs_removed,
!          CheckpointStats.ckpt_segs_recycled,
!          write_secs, write_usecs / 1000,
!          sync_secs, sync_usecs / 1000,
!          total_secs, total_usecs / 1000);
  }

  /*
--- 5967,5992 ----
                          CheckpointStats.ckpt_sync_end_t,
                          &sync_secs, &sync_usecs);

!     if (restartpoint)
!         elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); "
!              "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
!              CheckpointStats.ckpt_bufs_written,
!              (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
!              write_secs, write_usecs / 1000,
!              sync_secs, sync_usecs / 1000,
!              total_secs, total_usecs / 1000);
!     else
!         elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
!              "%d transaction log file(s) added, %d removed, %d recycled; "
!              "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
!              CheckpointStats.ckpt_bufs_written,
!              (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
!              CheckpointStats.ckpt_segs_added,
!              CheckpointStats.ckpt_segs_removed,
!              CheckpointStats.ckpt_segs_recycled,
!              write_secs, write_usecs / 1000,
!              sync_secs, sync_usecs / 1000,
!              total_secs, total_usecs / 1000);
  }

  /*
***************
*** 5778,5788 **** CreateCheckPoint(int flags)
      TransactionId *inCommitXids;
      int            nInCommit;

      /*
       * Acquire CheckpointLock to ensure only one checkpoint happens at a time.
-      * (This is just pro forma, since in the present system structure there is
-      * only one process that is allowed to issue checkpoints at any given
-      * time.)
       */
      LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);

--- 6017,6028 ----
      TransactionId *inCommitXids;
      int            nInCommit;

+     /* shouldn't happen */
+     if (IsRecoveryProcessingMode())
+         elog(ERROR, "can't create a checkpoint during recovery");
+
      /*
       * Acquire CheckpointLock to ensure only one checkpoint happens at a time.
       */
      LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);

***************
*** 5803,5811 **** CreateCheckPoint(int flags)
--- 6043,6053 ----

      if (shutdown)
      {
+         LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
          ControlFile->state = DB_SHUTDOWNING;
          ControlFile->time = (pg_time_t) time(NULL);
          UpdateControlFile();
+         LWLockRelease(ControlFileLock);
      }

      /*
***************
*** 5909,5915 **** CreateCheckPoint(int flags)
       * to log anything if we decided to skip the checkpoint.
       */
      if (log_checkpoints)
!         LogCheckpointStart(flags);

      TRACE_POSTGRESQL_CHECKPOINT_START(flags);

--- 6151,6157 ----
       * to log anything if we decided to skip the checkpoint.
       */
      if (log_checkpoints)
!         LogCheckpointStart(flags, false);

      TRACE_POSTGRESQL_CHECKPOINT_START(flags);

***************
*** 6068,6074 **** CreateCheckPoint(int flags)
       * Truncate pg_subtrans if possible.  We can throw away all data before
       * the oldest XMIN of any running transaction.    No future transaction will
       * attempt to reference any pg_subtrans entry older than that (see Asserts
!      * in subtrans.c).    During recovery, though, we mustn't do this because
       * StartupSUBTRANS hasn't been called yet.
       */
      if (!InRecovery)
--- 6310,6316 ----
       * Truncate pg_subtrans if possible.  We can throw away all data before
       * the oldest XMIN of any running transaction.    No future transaction will
       * attempt to reference any pg_subtrans entry older than that (see Asserts
!      * in subtrans.c).  During recovery, though, we mustn't do this because
       * StartupSUBTRANS hasn't been called yet.
       */
      if (!InRecovery)
***************
*** 6076,6082 **** CreateCheckPoint(int flags)

      /* All real work is done, but log before releasing lock. */
      if (log_checkpoints)
!         LogCheckpointEnd();

          TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
                                  NBuffers, CheckpointStats.ckpt_segs_added,
--- 6318,6324 ----

      /* All real work is done, but log before releasing lock. */
      if (log_checkpoints)
!         LogCheckpointEnd(false);

          TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
                                  NBuffers, CheckpointStats.ckpt_segs_added,
***************
*** 6104,6135 **** CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
  }

  /*
!  * Set a recovery restart point if appropriate
!  *
!  * This is similar to CreateCheckPoint, but is used during WAL recovery
!  * to establish a point from which recovery can roll forward without
!  * replaying the entire recovery log.  This function is called each time
!  * a checkpoint record is read from XLOG; it must determine whether a
!  * restartpoint is needed or not.
   */
  static void
  RecoveryRestartPoint(const CheckPoint *checkPoint)
  {
-     int            elapsed_secs;
      int            rmid;
!
!     /*
!      * Do nothing if the elapsed time since the last restartpoint is less than
!      * half of checkpoint_timeout.    (We use a value less than
!      * checkpoint_timeout so that variations in the timing of checkpoints on
!      * the master, or speed of transmission of WAL segments to a slave, won't
!      * make the slave skip a restartpoint once it's synced with the master.)
!      * Checking true elapsed time keeps us from doing restartpoints too often
!      * while rapidly scanning large amounts of WAL.
!      */
!     elapsed_secs = (pg_time_t) time(NULL) - ControlFile->time;
!     if (elapsed_secs < CheckPointTimeout / 2)
!         return;

      /*
       * Is it safe to checkpoint?  We must ask each of the resource managers
--- 6346,6362 ----
  }

  /*
!  * This is used during WAL recovery to establish a point from which recovery
!  * can roll forward without replaying the entire recovery log.  This function
!  * is called each time a checkpoint record is read from XLOG. It is stored
!  * in shared memory, so that it can be used as a restartpoint later on.
   */
  static void
  RecoveryRestartPoint(const CheckPoint *checkPoint)
  {
      int            rmid;
!     /* use volatile pointer to prevent code rearrangement */
!     volatile XLogCtlData *xlogctl = XLogCtl;

      /*
       * Is it safe to checkpoint?  We must ask each of the resource managers
***************
*** 6151,6178 **** RecoveryRestartPoint(const CheckPoint *checkPoint)
      }

      /*
!      * OK, force data out to disk
       */
!     CheckPointGuts(checkPoint->redo, CHECKPOINT_IMMEDIATE);

      /*
!      * Update pg_control so that any subsequent crash will restart from this
!      * checkpoint.    Note: ReadRecPtr gives the XLOG address of the checkpoint
!      * record itself.
       */
      ControlFile->prevCheckPoint = ControlFile->checkPoint;
!     ControlFile->checkPoint = ReadRecPtr;
!     ControlFile->checkPointCopy = *checkPoint;
      ControlFile->time = (pg_time_t) time(NULL);
      UpdateControlFile();

!     ereport((recoveryLogRestartpoints ? LOG : DEBUG2),
              (errmsg("recovery restart point at %X/%X",
!                     checkPoint->redo.xlogid, checkPoint->redo.xrecoff)));
      if (recoveryLastXTime)
!         ereport((recoveryLogRestartpoints ? LOG : DEBUG2),
!                 (errmsg("last completed transaction was at log time %s",
!                         timestamptz_to_str(recoveryLastXTime))));
  }

  /*
--- 6378,6487 ----
      }

      /*
!      * Copy the checkpoint record to shared memory, so that bgwriter can
!      * use it the next time it wants to perform a restartpoint.
!      */
!     SpinLockAcquire(&xlogctl->info_lck);
!     XLogCtl->lastCheckPointRecPtr = ReadRecPtr;
!     memcpy(&XLogCtl->lastCheckPoint, checkPoint, sizeof(CheckPoint));
!     SpinLockRelease(&xlogctl->info_lck);
! }
!
! /*
!  * This is similar to CreateCheckPoint, but is used during WAL recovery
!  * to establish a point from which recovery can roll forward without
!  * replaying the entire recovery log.
!  */
! void
! CreateRestartPoint(int flags)
! {
!     XLogRecPtr lastCheckPointRecPtr;
!     CheckPoint lastCheckPoint;
!     /* use volatile pointer to prevent code rearrangement */
!     volatile XLogCtlData *xlogctl = XLogCtl;
!
!     /*
!      * Acquire CheckpointLock to ensure only one restartpoint or checkpoint
!      * happens at a time.
       */
!     LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
!
!     /* Get the a local copy of the last checkpoint record. */
!     SpinLockAcquire(&xlogctl->info_lck);
!     lastCheckPointRecPtr = xlogctl->lastCheckPointRecPtr;
!     memcpy(&lastCheckPoint, &XLogCtl->lastCheckPoint, sizeof(CheckPoint));
!     SpinLockRelease(&xlogctl->info_lck);

      /*
!      * If the last checkpoint record we've replayed is already our last
!      * restartpoint, we're done.
       */
+     if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
+         XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo))
+     {
+         ereport(DEBUG2,
+                 (errmsg("skipping restartpoint, already performed at %X/%X",
+                         lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
+         LWLockRelease(CheckpointLock);
+         return;
+     }
+
+     /*
+      * Check that we're still in recovery mode. It's ok if we exit recovery
+      * mode after this check, the restart point is valid anyway.
+      */
+     if (!IsRecoveryProcessingMode())
+     {
+         ereport(DEBUG2,
+                 (errmsg("skipping restartpoint, recovery has already ended")));
+         LWLockRelease(CheckpointLock);
+         return;
+     }
+
+     if (log_checkpoints)
+     {
+         /*
+          * Prepare to accumulate statistics.
+          */
+         MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
+         CheckpointStats.ckpt_start_t = GetCurrentTimestamp();
+
+         LogCheckpointStart(flags, true);
+     }
+
+     CheckPointGuts(lastCheckPoint.redo, flags);
+
+     /*
+      * Update pg_control, using current time
+      */
+     LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
      ControlFile->prevCheckPoint = ControlFile->checkPoint;
!     ControlFile->checkPoint = lastCheckPointRecPtr;
!     ControlFile->checkPointCopy = lastCheckPoint;
      ControlFile->time = (pg_time_t) time(NULL);
      UpdateControlFile();
+     LWLockRelease(ControlFileLock);
+
+     /*
+      * Currently, there is no need to truncate pg_subtrans during recovery.
+      * If we did do that, we will need to have called StartupSUBTRANS()
+      * already and then TruncateSUBTRANS() would go here.
+      */
+
+     /* All real work is done, but log before releasing lock. */
+     if (log_checkpoints)
+         LogCheckpointEnd(true);

!     ereport((log_checkpoints ? LOG : DEBUG2),
              (errmsg("recovery restart point at %X/%X",
!                     lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
!
      if (recoveryLastXTime)
!         ereport((log_checkpoints ? LOG : DEBUG2),
!             (errmsg("last completed transaction was at log time %s",
!                     timestamptz_to_str(recoveryLastXTime))));
!
!     LWLockRelease(CheckpointLock);
  }

  /*
***************
*** 6238,6243 **** RequestXLogSwitch(void)
--- 6547,6555 ----

  /*
   * XLOG resource manager's routines
+  *
+  * Definitions of message info are in include/catalog/pg_control.h,
+  * though not all messages relate to control file processing.
   */
  void
  xlog_redo(XLogRecPtr lsn, XLogRecord *record)
***************
*** 6284,6292 **** xlog_redo(XLogRecPtr lsn, XLogRecord *record)
                                   (int) checkPoint.ThisTimeLineID))
                  ereport(PANIC,
                          (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
!                                 checkPoint.ThisTimeLineID, ThisTimeLineID)));
!             /* Following WAL records should be run with new TLI */
!             ThisTimeLineID = checkPoint.ThisTimeLineID;
          }

          RecoveryRestartPoint(&checkPoint);
--- 6596,6604 ----
                                   (int) checkPoint.ThisTimeLineID))
                  ereport(PANIC,
                          (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
!                                checkPoint.ThisTimeLineID, ThisTimeLineID)));
!            /* Following WAL records should be run with new TLI */
!            ThisTimeLineID = checkPoint.ThisTimeLineID;
          }

          RecoveryRestartPoint(&checkPoint);
***************
*** 7227,7229 **** CancelBackup(void)
--- 7539,7627 ----
      }
  }

+ /* ------------------------------------------------------
+  *  Startup Process main entry point and signal handlers
+  * ------------------------------------------------------
+  */
+
+ /*
+  * startupproc_quickdie() occurs when signalled SIGQUIT by the postmaster.
+  *
+  * Some backend has bought the farm,
+  * so we need to stop what we're doing and exit.
+  */
+ static void
+ startupproc_quickdie(SIGNAL_ARGS)
+ {
+     PG_SETMASK(&BlockSig);
+
+     /*
+      * DO NOT proc_exit() -- we're here because shared memory may be
+      * corrupted, so we don't want to try to clean up our transaction. Just
+      * nail the windows shut and get out of town.
+      *
+      * Note we do exit(2) not exit(0).    This is to force the postmaster into a
+      * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
+      * backend.  This is necessary precisely because we don't clean up our
+      * shared memory state.
+      */
+     exit(2);
+ }
+
+
+ /* SIGTERM: set flag to abort redo and exit */
+ static void
+ StartupProcShutdownHandler(SIGNAL_ARGS)
+ {
+     shutdown_requested = true;
+ }
+
+ /* Main entry point for startup process */
+ void
+ StartupProcessMain(void)
+ {
+     /*
+      * If possible, make this process a group leader, so that the postmaster
+      * can signal any child processes too.
+      */
+ #ifdef HAVE_SETSID
+     if (setsid() < 0)
+         elog(FATAL, "setsid() failed: %m");
+ #endif
+
+     /*
+      * Properly accept or ignore signals the postmaster might send us
+      */
+     pqsignal(SIGHUP, SIG_IGN);    /* ignore config file updates */
+     pqsignal(SIGINT, SIG_IGN);        /* ignore query cancel */
+     pqsignal(SIGTERM, StartupProcShutdownHandler); /* request shutdown */
+     pqsignal(SIGQUIT, startupproc_quickdie);        /* hard crash time */
+     pqsignal(SIGALRM, SIG_IGN);
+     pqsignal(SIGPIPE, SIG_IGN);
+     pqsignal(SIGUSR1, SIG_IGN);
+     pqsignal(SIGUSR2, SIG_IGN);
+
+     /*
+      * Reset some signals that are accepted by postmaster but not here
+      */
+     pqsignal(SIGCHLD, SIG_DFL);
+     pqsignal(SIGTTIN, SIG_DFL);
+     pqsignal(SIGTTOU, SIG_DFL);
+     pqsignal(SIGCONT, SIG_DFL);
+     pqsignal(SIGWINCH, SIG_DFL);
+
+     /*
+      * Unblock signals (they were blocked when the postmaster forked us)
+      */
+     PG_SETMASK(&UnBlockSig);
+
+     StartupXLOG();
+
+     BuildFlatFiles(false);
+
+     /* Let postmaster know that startup is finished */
+     SendPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED);
+
+     /* exit normally */
+     proc_exit(0);
+ }
*** src/backend/bootstrap/bootstrap.c
--- src/backend/bootstrap/bootstrap.c
***************
*** 37,43 ****
  #include "storage/proc.h"
  #include "tcop/tcopprot.h"
  #include "utils/builtins.h"
- #include "utils/flatfiles.h"
  #include "utils/fmgroids.h"
  #include "utils/memutils.h"
  #include "utils/ps_status.h"
--- 37,42 ----
***************
*** 416,429 **** AuxiliaryProcessMain(int argc, char *argv[])
              proc_exit(1);        /* should never return */

          case StartupProcess:
!             bootstrap_signals();
!             StartupXLOG();
!             BuildFlatFiles(false);
!             proc_exit(0);        /* startup done */

          case BgWriterProcess:
              /* don't set signals, bgwriter has its own agenda */
-             InitXLOGAccess();
              BackgroundWriterMain();
              proc_exit(1);        /* should never return */

--- 415,426 ----
              proc_exit(1);        /* should never return */

          case StartupProcess:
!             /* don't set signals, startup process has its own agenda */
!             StartupProcessMain();
!             proc_exit(1);        /* should never return */

          case BgWriterProcess:
              /* don't set signals, bgwriter has its own agenda */
              BackgroundWriterMain();
              proc_exit(1);        /* should never return */

*** src/backend/postmaster/bgwriter.c
--- src/backend/postmaster/bgwriter.c
***************
*** 49,54 ****
--- 49,55 ----
  #include <unistd.h>

  #include "access/xlog_internal.h"
+ #include "catalog/pg_control.h"
  #include "libpq/pqsignal.h"
  #include "miscadmin.h"
  #include "pgstat.h"
***************
*** 197,202 **** BackgroundWriterMain(void)
--- 198,204 ----
  {
      sigjmp_buf    local_sigjmp_buf;
      MemoryContext bgwriter_context;
+     bool        BgWriterRecoveryMode = true;

      BgWriterShmem->bgwriter_pid = MyProcPid;
      am_bg_writer = true;
***************
*** 418,423 **** BackgroundWriterMain(void)
--- 420,439 ----
          }

          /*
+          * Check if we've exited recovery. We do this after determining
+          * whether to perform a checkpoint or not, to be sure that we
+          * perform a real checkpoint and not a restartpoint, if someone
+          * requested a checkpoint immediately after exiting recovery. And
+          * we must have the right TimeLineID when we perform a checkpoint;
+          * IsRecoveryProcessingMode() initializes that as a side-effect.
+          */
+          if (BgWriterRecoveryMode && !IsRecoveryProcessingMode())
+           {
+             elog(DEBUG1, "bgwriter changing from recovery to normal mode");
+             BgWriterRecoveryMode = false;
+         }
+
+         /*
           * Do a checkpoint if requested, otherwise do one cycle of
           * dirty-buffer writing.
           */
***************
*** 444,450 **** BackgroundWriterMain(void)
               * implementation will not generate warnings caused by
               * CheckPointTimeout < CheckPointWarning.
               */
!             if ((flags & CHECKPOINT_CAUSE_XLOG) &&
                  elapsed_secs < CheckPointWarning)
                  ereport(LOG,
                          (errmsg("checkpoints are occurring too frequently (%d seconds apart)",
--- 460,467 ----
               * implementation will not generate warnings caused by
               * CheckPointTimeout < CheckPointWarning.
               */
!             if (!BgWriterRecoveryMode &&
!                 (flags & CHECKPOINT_CAUSE_XLOG) &&
                  elapsed_secs < CheckPointWarning)
                  ereport(LOG,
                          (errmsg("checkpoints are occurring too frequently (%d seconds apart)",
***************
*** 455,468 **** BackgroundWriterMain(void)
               * Initialize bgwriter-private variables used during checkpoint.
               */
              ckpt_active = true;
!             ckpt_start_recptr = GetInsertRecPtr();
              ckpt_start_time = now;
              ckpt_cached_elapsed = 0;

              /*
               * Do the checkpoint.
               */
!             CreateCheckPoint(flags);

              /*
               * After any checkpoint, close all smgr files.    This is so we
--- 472,489 ----
               * Initialize bgwriter-private variables used during checkpoint.
               */
              ckpt_active = true;
!             if (!BgWriterRecoveryMode)
!                 ckpt_start_recptr = GetInsertRecPtr();
              ckpt_start_time = now;
              ckpt_cached_elapsed = 0;

              /*
               * Do the checkpoint.
               */
!             if (!BgWriterRecoveryMode)
!                 CreateCheckPoint(flags);
!             else
!                 CreateRestartPoint(flags);

              /*
               * After any checkpoint, close all smgr files.    This is so we
***************
*** 507,513 **** CheckArchiveTimeout(void)
      pg_time_t    now;
      pg_time_t    last_time;

!     if (XLogArchiveTimeout <= 0)
          return;

      now = (pg_time_t) time(NULL);
--- 528,534 ----
      pg_time_t    now;
      pg_time_t    last_time;

!     if (XLogArchiveTimeout <= 0 || IsRecoveryProcessingMode())
          return;

      now = (pg_time_t) time(NULL);
***************
*** 586,592 **** BgWriterNap(void)
          (ckpt_active ? ImmediateCheckpointRequested() : checkpoint_requested))
              break;
          pg_usleep(1000000L);
!         AbsorbFsyncRequests();
          udelay -= 1000000L;
      }

--- 607,614 ----
          (ckpt_active ? ImmediateCheckpointRequested() : checkpoint_requested))
              break;
          pg_usleep(1000000L);
!         if (!IsRecoveryProcessingMode())
!             AbsorbFsyncRequests();
          udelay -= 1000000L;
      }

***************
*** 714,729 **** IsCheckpointOnSchedule(double progress)
       * However, it's good enough for our purposes, we're only calculating an
       * estimate anyway.
       */
!     recptr = GetInsertRecPtr();
!     elapsed_xlogs =
!         (((double) (int32) (recptr.xlogid - ckpt_start_recptr.xlogid)) * XLogSegsPerFile +
!          ((double) recptr.xrecoff - (double) ckpt_start_recptr.xrecoff) / XLogSegSize) /
!         CheckPointSegments;
!
!     if (progress < elapsed_xlogs)
      {
!         ckpt_cached_elapsed = elapsed_xlogs;
!         return false;
      }

      /*
--- 736,754 ----
       * However, it's good enough for our purposes, we're only calculating an
       * estimate anyway.
       */
!     if (!IsRecoveryProcessingMode())
      {
!         recptr = GetInsertRecPtr();
!         elapsed_xlogs =
!             (((double) (int32) (recptr.xlogid - ckpt_start_recptr.xlogid)) * XLogSegsPerFile +
!              ((double) recptr.xrecoff - (double) ckpt_start_recptr.xrecoff) / XLogSegSize) /
!             CheckPointSegments;
!
!         if (progress < elapsed_xlogs)
!         {
!             ckpt_cached_elapsed = elapsed_xlogs;
!             return false;
!         }
      }

      /*
*** src/backend/postmaster/postmaster.c
--- src/backend/postmaster/postmaster.c
***************
*** 225,235 **** static pid_t StartupPID = 0,
--- 225,262 ----
  static int    Shutdown = NoShutdown;

  static bool FatalError = false; /* T if recovering from backend crash */
+ static bool RecoveryError = false; /* T if recovery failed */
+
+ /* State of WAL redo */
+ #define            NoRecovery            0
+ #define            RecoveryStarted        1
+ #define            RecoveryConsistent    2
+ #define            RecoveryCompleted    3
+
+ static int    RecoveryStatus = NoRecovery;

  /*
   * We use a simple state machine to control startup, shutdown, and
   * crash recovery (which is rather like shutdown followed by startup).
   *
+  * After doing all the postmaster initialization work, we enter PM_STARTUP
+  * state and the startup process is launched. The startup process begins by
+  * reading the control file and other preliminary initialization steps. When
+  * it's ready to start WAL redo, it signals postmaster, and we switch to
+  * PM_RECOVERY phase. The background writer is launched, while the startup
+  * process continues applying WAL.
+  *
+  * After reaching a consistent point in WAL redo, startup process signals
+  * us again, and we switch to PM_RECOVERY_CONSISTENT phase. There's currently
+  * no difference between PM_RECOVERY and PM_RECOVERY_CONSISTENT, but we
+  * could start accepting connections to perform read-only queries at this
+  * point, if we had the infrastructure to do that.
+  *
+  * When the WAL redo is finished, the startup process signals us the third
+  * time, and we switch to PM_RUN state. The startup process can also skip the
+  * recovery and consistent recovery phases altogether, as it will during
+  * normal startup when there's no recovery to be done, for example.
+  *
   * Normal child backends can only be launched when we are in PM_RUN state.
   * (We also allow it in PM_WAIT_BACKUP state, but only for superusers.)
   * In other states we handle connection requests by launching "dead_end"
***************
*** 245,259 **** static bool FatalError = false; /* T if recovering from backend crash */
   *
   * Notice that this state variable does not distinguish *why* we entered
   * states later than PM_RUN --- Shutdown and FatalError must be consulted
!  * to find that out.  FatalError is never true in PM_RUN state, nor in
!  * PM_SHUTDOWN states (because we don't enter those states when trying to
!  * recover from a crash).  It can be true in PM_STARTUP state, because we
!  * don't clear it until we've successfully recovered.
   */
  typedef enum
  {
      PM_INIT,                    /* postmaster starting */
      PM_STARTUP,                    /* waiting for startup subprocess */
      PM_RUN,                        /* normal "database is alive" state */
      PM_WAIT_BACKUP,                /* waiting for online backup mode to end */
      PM_WAIT_BACKENDS,            /* waiting for live backends to exit */
--- 272,288 ----
   *
   * Notice that this state variable does not distinguish *why* we entered
   * states later than PM_RUN --- Shutdown and FatalError must be consulted
!  * to find that out.  FatalError is never true in PM_RECOVERY_* or PM_RUN
!  * states, nor in PM_SHUTDOWN states (because we don't enter those states
!  * when trying to recover from a crash).  It can be true in PM_STARTUP state,
!  * because we don't clear it until we've successfully started WAL redo.
   */
  typedef enum
  {
      PM_INIT,                    /* postmaster starting */
      PM_STARTUP,                    /* waiting for startup subprocess */
+     PM_RECOVERY,                /* in recovery mode */
+     PM_RECOVERY_CONSISTENT,        /* consistent recovery mode */
      PM_RUN,                        /* normal "database is alive" state */
      PM_WAIT_BACKUP,                /* waiting for online backup mode to end */
      PM_WAIT_BACKENDS,            /* waiting for live backends to exit */
***************
*** 307,312 **** static void pmdie(SIGNAL_ARGS);
--- 336,342 ----
  static void reaper(SIGNAL_ARGS);
  static void sigusr1_handler(SIGNAL_ARGS);
  static void dummy_handler(SIGNAL_ARGS);
+ static void CheckRecoverySignals(void);
  static void CleanupBackend(int pid, int exitstatus);
  static void HandleChildCrash(int pid, int exitstatus, const char *procname);
  static void LogChildExit(int lev, const char *procname,
***************
*** 1302,1308 **** ServerLoop(void)
           * state that prevents it, start one.  It doesn't matter if this
           * fails, we'll just try again later.
           */
!         if (BgWriterPID == 0 && pmState == PM_RUN)
              BgWriterPID = StartBackgroundWriter();

          /*
--- 1332,1340 ----
           * state that prevents it, start one.  It doesn't matter if this
           * fails, we'll just try again later.
           */
!         if (BgWriterPID == 0 &&
!             (pmState == PM_RUN || pmState == PM_RECOVERY ||
!              pmState == PM_RECOVERY_CONSISTENT))
              BgWriterPID = StartBackgroundWriter();

          /*
***************
*** 1752,1758 **** canAcceptConnections(void)
              return CAC_WAITBACKUP;    /* allow superusers only */
          if (Shutdown > NoShutdown)
              return CAC_SHUTDOWN;    /* shutdown is pending */
!         if (pmState == PM_STARTUP && !FatalError)
              return CAC_STARTUP; /* normal startup */
          return CAC_RECOVERY;    /* else must be crash recovery */
      }
--- 1784,1793 ----
              return CAC_WAITBACKUP;    /* allow superusers only */
          if (Shutdown > NoShutdown)
              return CAC_SHUTDOWN;    /* shutdown is pending */
!         if (!FatalError &&
!             (pmState == PM_STARTUP ||
!              pmState == PM_RECOVERY ||
!              pmState == PM_RECOVERY_CONSISTENT))
              return CAC_STARTUP; /* normal startup */
          return CAC_RECOVERY;    /* else must be crash recovery */
      }
***************
*** 1982,1988 **** pmdie(SIGNAL_ARGS)
              ereport(LOG,
                      (errmsg("received smart shutdown request")));

!             if (pmState == PM_RUN)
              {
                  /* autovacuum workers are told to shut down immediately */
                  SignalAutovacWorkers(SIGTERM);
--- 2017,2023 ----
              ereport(LOG,
                      (errmsg("received smart shutdown request")));

!             if (pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT)
              {
                  /* autovacuum workers are told to shut down immediately */
                  SignalAutovacWorkers(SIGTERM);
***************
*** 2019,2025 **** pmdie(SIGNAL_ARGS)

              if (StartupPID != 0)
                  signal_child(StartupPID, SIGTERM);
!             if (pmState == PM_RUN || pmState == PM_WAIT_BACKUP)
              {
                  ereport(LOG,
                          (errmsg("aborting any active transactions")));
--- 2054,2067 ----

              if (StartupPID != 0)
                  signal_child(StartupPID, SIGTERM);
!             if (pmState == PM_RECOVERY)
!             {
!                 /* only bgwriter is active in this state */
!                 pmState = PM_WAIT_BACKENDS;
!             }
!             if (pmState == PM_RUN ||
!                 pmState == PM_WAIT_BACKUP ||
!                 pmState == PM_RECOVERY_CONSISTENT)
              {
                  ereport(LOG,
                          (errmsg("aborting any active transactions")));
***************
*** 2116,2125 **** reaper(SIGNAL_ARGS)
          if (pid == StartupPID)
          {
              StartupPID = 0;
-             Assert(pmState == PM_STARTUP);

!             /* FATAL exit of startup is treated as catastrophic */
!             if (!EXIT_STATUS_0(exitstatus))
              {
                  LogChildExit(LOG, _("startup process"),
                               pid, exitstatus);
--- 2158,2179 ----
          if (pid == StartupPID)
          {
              StartupPID = 0;

!             /*
!              * Check if we've received a signal from the startup process
!              * first. This can change pmState. If the startup process sends
!              * a signal, and exits immediately after that, we might not have
!              * processed the signal yet, and we need to know if it completed
!              * recovery before exiting.
!              */
!             CheckRecoverySignals();
!
!             /*
!              * Unexpected exit of startup process (including FATAL exit)
!              * during PM_STARTUP is treated as catastrophic. There is no
!              * other processes running yet.
!              */
!             if (pmState == PM_STARTUP)
              {
                  LogChildExit(LOG, _("startup process"),
                               pid, exitstatus);
***************
*** 2127,2186 **** reaper(SIGNAL_ARGS)
                  (errmsg("aborting startup due to startup process failure")));
                  ExitPostmaster(1);
              }
-
              /*
!              * Startup succeeded - we are done with system startup or
!              * recovery.
               */
!             FatalError = false;
!
!             /*
!              * Go to shutdown mode if a shutdown request was pending.
!              */
!             if (Shutdown > NoShutdown)
              {
!                 pmState = PM_WAIT_BACKENDS;
!                 /* PostmasterStateMachine logic does the rest */
                  continue;
              }
-
              /*
!              * Otherwise, commence normal operations.
!              */
!             pmState = PM_RUN;
!
!             /*
!              * Load the flat authorization file into postmaster's cache. The
!              * startup process has recomputed this from the database contents,
!              * so we wait till it finishes before loading it.
!              */
!             load_role();
!
!             /*
!              * Crank up the background writer.    It doesn't matter if this
!              * fails, we'll just try again later.
               */
!             Assert(BgWriterPID == 0);
!             BgWriterPID = StartBackgroundWriter();
!
!             /*
!              * Likewise, start other special children as needed.  In a restart
!              * situation, some of them may be alive already.
!              */
!             if (WalWriterPID == 0)
!                 WalWriterPID = StartWalWriter();
!             if (AutoVacuumingActive() && AutoVacPID == 0)
!                 AutoVacPID = StartAutoVacLauncher();
!             if (XLogArchivingActive() && PgArchPID == 0)
!                 PgArchPID = pgarch_start();
!             if (PgStatPID == 0)
!                 PgStatPID = pgstat_start();
!
!             /* at this point we are really open for business */
!             ereport(LOG,
!                  (errmsg("database system is ready to accept connections")));
!
!             continue;
          }

          /*
--- 2181,2210 ----
                  (errmsg("aborting startup due to startup process failure")));
                  ExitPostmaster(1);
              }
              /*
!              * Any unexpected exit (including FATAL exit) of the startup
!              * process is treated as a crash, except that we don't want
!              * to reinitialize.
               */
!             if (!EXIT_STATUS_0(exitstatus))
              {
!                 RecoveryError = true;
!                 HandleChildCrash(pid, exitstatus,
!                                  _("startup process"));
                  continue;
              }
              /*
!              * Startup process exited normally, but didn't finish recovery.
!              * This can happen if someone else than postmaster kills the
!              * startup process with SIGTERM. Treat it like a crash.
               */
!             if (pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT)
!             {
!                 RecoveryError = true;
!                 HandleChildCrash(pid, exitstatus,
!                                  _("startup process"));
!                 continue;
!             }
          }

          /*
***************
*** 2443,2448 **** HandleChildCrash(int pid, int exitstatus, const char *procname)
--- 2467,2484 ----
          }
      }

+     /* Take care of the startup process too */
+     if (pid == StartupPID)
+         StartupPID = 0;
+     else if (StartupPID != 0 && !FatalError)
+     {
+         ereport(DEBUG2,
+                 (errmsg_internal("sending %s to process %d",
+                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
+                                  (int) StartupPID)));
+         signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
+     }
+
      /* Take care of the bgwriter too */
      if (pid == BgWriterPID)
          BgWriterPID = 0;
***************
*** 2514,2520 **** HandleChildCrash(int pid, int exitstatus, const char *procname)

      FatalError = true;
      /* We now transit into a state of waiting for children to die */
!     if (pmState == PM_RUN ||
          pmState == PM_WAIT_BACKUP ||
          pmState == PM_SHUTDOWN)
          pmState = PM_WAIT_BACKENDS;
--- 2550,2558 ----

      FatalError = true;
      /* We now transit into a state of waiting for children to die */
!     if (pmState == PM_RECOVERY ||
!         pmState == PM_RECOVERY_CONSISTENT ||
!         pmState == PM_RUN ||
          pmState == PM_WAIT_BACKUP ||
          pmState == PM_SHUTDOWN)
          pmState = PM_WAIT_BACKENDS;
***************
*** 2582,2587 **** LogChildExit(int lev, const char *procname, int pid, int exitstatus)
--- 2620,2746 ----
  static void
  PostmasterStateMachine(void)
  {
+     /* Startup states */
+
+     if (pmState == PM_STARTUP && RecoveryStatus > NoRecovery)
+     {
+         /* WAL redo has started. We're out of reinitialization. */
+         FatalError = false;
+
+         /*
+          * Go to shutdown mode if a shutdown request was pending.
+          */
+         if (Shutdown > NoShutdown)
+         {
+             pmState = PM_WAIT_BACKENDS;
+             /* PostmasterStateMachine logic does the rest */
+         }
+         else
+         {
+             /*
+              * Crank up the background writer.    It doesn't matter if this
+              * fails, we'll just try again later.
+              */
+             Assert(BgWriterPID == 0);
+             BgWriterPID = StartBackgroundWriter();
+
+             pmState = PM_RECOVERY;
+         }
+     }
+     if (pmState == PM_RECOVERY && RecoveryStatus >= RecoveryConsistent)
+     {
+         /*
+          * Go to shutdown mode if a shutdown request was pending.
+          */
+         if (Shutdown > NoShutdown)
+         {
+             pmState = PM_WAIT_BACKENDS;
+             /* PostmasterStateMachine logic does the rest */
+         }
+         else
+         {
+             /*
+              * Startup process has entered recovery. We consider that good
+              * enough to reset FatalError.
+              */
+             pmState = PM_RECOVERY_CONSISTENT;
+
+             /*
+              * Load the flat authorization file into postmaster's cache. The
+              * startup process won't have recomputed this from the database yet,
+              * so we it may change following recovery.
+              */
+             load_role();
+
+             /*
+              * Likewise, start other special children as needed.
+              */
+             Assert(PgStatPID == 0);
+             PgStatPID = pgstat_start();
+
+             /* XXX at this point we could accept read-only connections */
+             ereport(DEBUG1,
+                  (errmsg("database system is in consistent recovery mode")));
+         }
+     }
+     if ((pmState == PM_RECOVERY ||
+          pmState == PM_RECOVERY_CONSISTENT ||
+          pmState == PM_STARTUP) &&
+         RecoveryStatus == RecoveryCompleted)
+     {
+         /*
+          * Startup succeeded.
+          *
+          * Go to shutdown mode if a shutdown request was pending.
+          */
+         if (Shutdown > NoShutdown)
+         {
+             pmState = PM_WAIT_BACKENDS;
+             /* PostmasterStateMachine logic does the rest */
+         }
+         else
+         {
+             /*
+              * Otherwise, commence normal operations.
+              */
+             pmState = PM_RUN;
+
+             /*
+              * Load the flat authorization file into postmaster's cache. The
+              * startup process has recomputed this from the database contents,
+              * so we wait till it finishes before loading it.
+              */
+             load_role();
+
+             /*
+              * Crank up the background writer, if we didn't do that already
+              * when we entered consistent recovery phase.  It doesn't matter
+              * if this fails, we'll just try again later.
+              */
+             if (BgWriterPID == 0)
+                 BgWriterPID = StartBackgroundWriter();
+
+             /*
+              * Likewise, start other special children as needed.  In a restart
+              * situation, some of them may be alive already.
+              */
+             if (WalWriterPID == 0)
+                 WalWriterPID = StartWalWriter();
+             if (AutoVacuumingActive() && AutoVacPID == 0)
+                 AutoVacPID = StartAutoVacLauncher();
+             if (XLogArchivingActive() && PgArchPID == 0)
+                 PgArchPID = pgarch_start();
+             if (PgStatPID == 0)
+                 PgStatPID = pgstat_start();
+
+             /* at this point we are really open for business */
+             ereport(LOG,
+                 (errmsg("database system is ready to accept connections")));
+         }
+     }
+
+     /* Shutdown states */
+
      if (pmState == PM_WAIT_BACKUP)
      {
          /*
***************
*** 2723,2728 **** PostmasterStateMachine(void)
--- 2882,2896 ----
      }

      /*
+      * If recovery failed, wait for all non-syslogger children to exit,
+      * and then exit postmaster. We don't try to reinitialize when recovery
+      * fails, because more than likely it will just fail again and we will
+      * keep trying forever.
+      */
+     if (RecoveryError && pmState == PM_NO_CHILDREN)
+         ExitPostmaster(1);
+
+     /*
       * If we need to recover from a crash, wait for all non-syslogger
       * children to exit, then reset shmem and StartupDataBase.
       */
***************
*** 2734,2739 **** PostmasterStateMachine(void)
--- 2902,2909 ----
          shmem_exit(1);
          reset_shared(PostPortNumber);

+         RecoveryStatus = NoRecovery;
+
          StartupPID = StartupDataBase();
          Assert(StartupPID != 0);
          pmState = PM_STARTUP;
***************
*** 3838,3843 **** ExitPostmaster(int status)
--- 4008,4044 ----
  }

  /*
+  * common code used in sigusr1_handler() and reaper() to handle
+  * recovery-related signals from startup process
+  */
+ static void
+ CheckRecoverySignals(void)
+ {
+     bool changed = false;
+
+     if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED))
+     {
+         Assert(pmState == PM_STARTUP);
+
+         RecoveryStatus = RecoveryStarted;
+         changed = true;
+     }
+     if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT))
+     {
+         RecoveryStatus = RecoveryConsistent;
+         changed = true;
+     }
+     if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED))
+     {
+         RecoveryStatus = RecoveryCompleted;
+         changed = true;
+     }
+
+     if (changed)
+         PostmasterStateMachine();
+ }
+
+ /*
   * sigusr1_handler - handle signal conditions from child processes
   */
  static void
***************
*** 3847,3852 **** sigusr1_handler(SIGNAL_ARGS)
--- 4048,4055 ----

      PG_SETMASK(&BlockSig);

+     CheckRecoverySignals();
+
      if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
      {
          /*
*** src/backend/storage/buffer/README
--- src/backend/storage/buffer/README
***************
*** 268,270 **** out (and anyone else who flushes buffer contents to disk must do so too).
--- 268,279 ----
  This ensures that the page image transferred to disk is reasonably consistent.
  We might miss a hint-bit update or two but that isn't a problem, for the same
  reasons mentioned under buffer access rules.
+
+ As of 8.4, background writer starts during recovery mode when there is
+ some form of potentially extended recovery to perform. It performs an
+ identical service to normal processing, except that checkpoints it
+ writes are technically restartpoints. Flushing outstanding WAL for dirty
+ buffers is also skipped, though there shouldn't ever be new WAL entries
+ at that time in any case. We could choose to start background writer
+ immediately but we hold off until we can prove the database is in a
+ consistent state so that postmaster has a single, clean state change.
*** src/backend/utils/init/postinit.c
--- src/backend/utils/init/postinit.c
***************
*** 324,330 **** InitCommunication(void)
   * If you're wondering why this is separate from InitPostgres at all:
   * the critical distinction is that this stuff has to happen before we can
   * run XLOG-related initialization, which is done before InitPostgres --- in
!  * fact, for cases such as checkpoint creation processes, InitPostgres may
   * never be done at all.
   */
  void
--- 324,330 ----
   * If you're wondering why this is separate from InitPostgres at all:
   * the critical distinction is that this stuff has to happen before we can
   * run XLOG-related initialization, which is done before InitPostgres --- in
!  * fact, for cases such as the background writer process, InitPostgres may
   * never be done at all.
   */
  void
*** src/include/access/xlog.h
--- src/include/access/xlog.h
***************
*** 133,139 **** typedef struct XLogRecData
  } XLogRecData;

  extern TimeLineID ThisTimeLineID;        /* current TLI */
! extern bool InRecovery;
  extern XLogRecPtr XactLastRecEnd;

  /* these variables are GUC parameters related to XLOG */
--- 133,148 ----
  } XLogRecData;

  extern TimeLineID ThisTimeLineID;        /* current TLI */
!
! /*
!  * Prior to 8.4, all activity during recovery were carried out by Startup
!  * process. This local variable continues to be used in many parts of the
!  * code to indicate actions taken by RecoveryManagers. Other processes who
!  * potentially perform work during recovery should check
!  * IsRecoveryProcessingMode(), see XLogCtl notes in xlog.c
!  */
! extern bool InRecovery;
!
  extern XLogRecPtr XactLastRecEnd;

  /* these variables are GUC parameters related to XLOG */
***************
*** 199,204 **** extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup);
--- 208,215 ----
  extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);

+ extern bool IsRecoveryProcessingMode(void);
+
  extern void UpdateControlFile(void);
  extern Size XLOGShmemSize(void);
  extern void XLOGShmemInit(void);
***************
*** 207,215 **** extern void StartupXLOG(void);
--- 218,229 ----
  extern void ShutdownXLOG(int code, Datum arg);
  extern void InitXLOGAccess(void);
  extern void CreateCheckPoint(int flags);
+ extern void CreateRestartPoint(int flags);
  extern void XLogPutNextOid(Oid nextOid);
  extern XLogRecPtr GetRedoRecPtr(void);
  extern XLogRecPtr GetInsertRecPtr(void);
  extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);

+ extern void StartupProcessMain(void);
+
  #endif   /* XLOG_H */
*** src/include/storage/pmsignal.h
--- src/include/storage/pmsignal.h
***************
*** 22,27 ****
--- 22,30 ----
   */
  typedef enum
  {
+     PMSIGNAL_RECOVERY_STARTED,    /* recovery has started */
+     PMSIGNAL_RECOVERY_CONSISTENT, /* recovery has reached consistent state */
+     PMSIGNAL_RECOVERY_COMPLETED, /* recovery completed */
      PMSIGNAL_PASSWORD_CHANGE,    /* pg_auth file has changed */
      PMSIGNAL_WAKEN_ARCHIVER,    /* send a NOTIFY signal to xlog archiver */
      PMSIGNAL_ROTATE_LOGFILE,    /* send SIGUSR1 to syslogger to rotate logfile */

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Tom Lane
Дата:
Сообщение: Re: Fixing Grittner's planner issues
Следующее
От: Simon Riggs
Дата:
Сообщение: Re: Hot standby, recovery infra