Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL

Поиск
Список
Период
Сортировка
От Heikki Linnakangas
Тема Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL
Дата
Msg-id 4BAB5D4E.4000009@enterprisedb.com
обсуждение исходный текст
Ответ на Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL  (Fujii Masao <masao.fujii@gmail.com>)
Ответы Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL  (Robert Haas <robertmhaas@gmail.com>)
Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL  (Fujii Masao <masao.fujii@gmail.com>)
Список pgsql-hackers
Fujii Masao wrote:
> On second thought, the following lines seem to be necessary just after
> calling XLogPageRead() since it reads new WAL file from another source.
>
>>     if (readSource == XLOG_FROM_STREAM || readSource == XLOG_FROM_ARCHIVE)
>>         emode = PANIC;
>>     else
>>         emode = emode_arg;

Yep.

Here's an updated patch, with these changes since the last patch:

* Fix the bug of a spurious PANIC in archive recovery, if the WAL ends
in the middle of a WAL record that continues over a WAL segment boundary.

* If a corrupt WAL record is found in archive or streamed from master in
standby mode, throw WARNING instead of PANIC, and keep trying. In
archive recovery (ie. standby_mode=off) it's still a PANIC. We can make
it a WARNING too, which gives the pre-9.0 behavior of starting up the
server on corruption. I prefer PANIC but the discussion is still going on.

* Small code changes to handling of failedSources, inspired by your
comment. No change in functionality.

This is also available in my git repository at
git://git.postgresql.org/git/users/heikki/postgres.git, branch "xlogchanges"

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e57f22e..4aa1870 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -450,20 +450,33 @@ static uint32 openLogSeg = 0;
 static uint32 openLogOff = 0;

 /*
+ * Codes indicating where we got a WAL file from during recovery, or where
+ * to attempt to get one.
+ */
+#define XLOG_FROM_ARCHIVE        (1<<0)    /* Restored using restore_command */
+#define XLOG_FROM_PG_XLOG        (1<<1)    /* Existing file in pg_xlog */
+#define XLOG_FROM_STREAM        (1<<2)    /* Streamed from master */
+
+/*
  * These variables are used similarly to the ones above, but for reading
  * the XLOG.  Note, however, that readOff generally represents the offset
  * of the page just read, not the seek position of the FD itself, which
  * will be just past that page. readLen indicates how much of the current
- * page has been read into readBuf.
+ * page has been read into readBuf, and readSource indicates where we got
+ * the currently open file from.
  */
 static int    readFile = -1;
 static uint32 readId = 0;
 static uint32 readSeg = 0;
 static uint32 readOff = 0;
 static uint32 readLen = 0;
+static int readSource = 0;        /* XLOG_FROM_* code */

-/* Is the currently open segment being streamed from primary? */
-static bool readStreamed = false;
+/*
+ * Keeps track of which sources we've tried to read the current WAL
+ * record from and failed.
+ */
+static int failedSources = 0;

 /* Buffer for currently read page (XLOG_BLCKSZ bytes) */
 static char *readBuf = NULL;
@@ -517,11 +530,12 @@ static bool InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
                        bool find_free, int *max_advance,
                        bool use_lock);
 static int XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
-             bool fromArchive, bool notexistOk);
+             int source, bool notexistOk);
 static int XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode,
-                   bool fromArchive);
+                   int sources);
 static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
              bool randAccess);
+static int emode_for_corrupt_record(int endofwalmode);
 static void XLogFileClose(void);
 static bool RestoreArchivedFile(char *path, const char *xlogfname,
                     const char *recovername, off_t expectedSize);
@@ -2573,7 +2587,7 @@ XLogFileOpen(uint32 log, uint32 seg)
  */
 static int
 XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
-             bool fromArchive, bool notfoundOk)
+             int source, bool notfoundOk)
 {
     char        xlogfname[MAXFNAMELEN];
     char        activitymsg[MAXFNAMELEN + 16];
@@ -2582,23 +2596,28 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,

     XLogFileName(xlogfname, tli, log, seg);

-    if (fromArchive)
+    switch (source)
     {
-        /* Report recovery progress in PS display */
-        snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
-                 xlogfname);
-        set_ps_display(activitymsg, false);
+        case XLOG_FROM_ARCHIVE:
+            /* Report recovery progress in PS display */
+            snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
+                     xlogfname);
+            set_ps_display(activitymsg, false);

-        restoredFromArchive = RestoreArchivedFile(path, xlogfname,
-                                                  "RECOVERYXLOG",
-                                                  XLogSegSize);
-        if (!restoredFromArchive)
-            return -1;
-    }
-    else
-    {
-        XLogFilePath(path, tli, log, seg);
-        restoredFromArchive = false;
+            restoredFromArchive = RestoreArchivedFile(path, xlogfname,
+                                                      "RECOVERYXLOG",
+                                                      XLogSegSize);
+            if (!restoredFromArchive)
+                return -1;
+            break;
+
+        case XLOG_FROM_PG_XLOG:
+            XLogFilePath(path, tli, log, seg);
+            restoredFromArchive = false;
+            break;
+
+        default:
+            elog(ERROR, "invalid XLogFileRead source %d", source);
     }

     fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
@@ -2612,6 +2631,8 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
                  xlogfname);
         set_ps_display(activitymsg, false);

+        readSource = source;
+
         return fd;
     }
     if (errno != ENOENT || !notfoundOk) /* unexpected failure? */
@@ -2630,7 +2651,7 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
  * searched in pg_xlog if not found in archive.
  */
 static int
-XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, bool fromArchive)
+XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, int sources)
 {
     char        path[MAXPGPATH];
     ListCell   *cell;
@@ -2653,20 +2674,19 @@ XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, bool fromArchive)
         if (tli < curFileTLI)
             break;                /* don't bother looking at too-old TLIs */

-        fd = XLogFileRead(log, seg, emode, tli, fromArchive, true);
-        if (fd != -1)
-            return fd;
+        if (sources & XLOG_FROM_ARCHIVE)
+        {
+            fd = XLogFileRead(log, seg, emode, tli, XLOG_FROM_ARCHIVE, true);
+            if (fd != -1)
+            {
+                elog(DEBUG1, "got WAL segment from archive");
+                return fd;
+            }
+        }

-        /*
-         * If not in StandbyMode, fall back to searching pg_xlog. In
-         * StandbyMode we're streaming segments from the primary to pg_xlog,
-         * and we mustn't confuse the (possibly partial) segments in pg_xlog
-         * with complete segments ready to be applied. We rather wait for the
-         * records to arrive through streaming.
-         */
-        if (!StandbyMode && fromArchive)
+        if (sources & XLOG_FROM_PG_XLOG)
         {
-            fd = XLogFileRead(log, seg, emode, tli, false, true);
+            fd = XLogFileRead(log, seg, emode, tli, XLOG_FROM_PG_XLOG, true);
             if (fd != -1)
                 return fd;
         }
@@ -3520,7 +3540,7 @@ RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode)
  * the returned record pointer always points there.
  */
 static XLogRecord *
-ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
+ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
 {
     XLogRecord *record;
     char       *buffer;
@@ -3530,17 +3550,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
                 total_len;
     uint32        targetRecOff;
     uint32        pageHeaderSize;
-    int            emode;
-
-    /*
-     * We don't expect any invalid records during streaming recovery: we
-     * should never hit the end of WAL because we wait for it to be streamed.
-     * Therefore treat any broken WAL as PANIC, instead of failing over.
-     */
-    if (StandbyMode)
-        emode = PANIC;
-    else
-        emode = emode_arg;

     if (readBuf == NULL)
     {
@@ -3593,6 +3602,9 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
         randAccess = true;        /* allow curFileTLI to go backwards too */
     }

+    /* This is the first try to read this page. */
+    failedSources = 0;
+retry:
     /* Read the page containing the record */
     if (!XLogPageRead(RecPtr, emode, fetching_ckpt, randAccess))
         return NULL;
@@ -3611,7 +3623,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
     }
     else if (targetRecOff < pageHeaderSize)
     {
-        ereport(emode,
+        ereport(emode_for_corrupt_record(emode),
                 (errmsg("invalid record offset at %X/%X",
                         RecPtr->xlogid, RecPtr->xrecoff)));
         goto next_record_is_invalid;
@@ -3619,7 +3631,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
     if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
         targetRecOff == pageHeaderSize)
     {
-        ereport(emode,
+        ereport(emode_for_corrupt_record(emode),
                 (errmsg("contrecord is requested by %X/%X",
                         RecPtr->xlogid, RecPtr->xrecoff)));
         goto next_record_is_invalid;
@@ -3634,7 +3646,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
     {
         if (record->xl_len != 0)
         {
-            ereport(emode,
+            ereport(emode_for_corrupt_record(emode),
                     (errmsg("invalid xlog switch record at %X/%X",
                             RecPtr->xlogid, RecPtr->xrecoff)));
             goto next_record_is_invalid;
@@ -3642,7 +3654,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
     }
     else if (record->xl_len == 0)
     {
-        ereport(emode,
+        ereport(emode_for_corrupt_record(emode),
                 (errmsg("record with zero length at %X/%X",
                         RecPtr->xlogid, RecPtr->xrecoff)));
         goto next_record_is_invalid;
@@ -3651,14 +3663,14 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
         record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
         XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
     {
-        ereport(emode,
+        ereport(emode_for_corrupt_record(emode),
                 (errmsg("invalid record length at %X/%X",
                         RecPtr->xlogid, RecPtr->xrecoff)));
         goto next_record_is_invalid;
     }
     if (record->xl_rmid > RM_MAX_ID)
     {
-        ereport(emode,
+        ereport(emode_for_corrupt_record(emode),
                 (errmsg("invalid resource manager ID %u at %X/%X",
                         record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff)));
         goto next_record_is_invalid;
@@ -3671,7 +3683,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
          */
         if (!XLByteLT(record->xl_prev, *RecPtr))
         {
-            ereport(emode,
+            ereport(emode_for_corrupt_record(emode),
                     (errmsg("record with incorrect prev-link %X/%X at %X/%X",
                             record->xl_prev.xlogid, record->xl_prev.xrecoff,
                             RecPtr->xlogid, RecPtr->xrecoff)));
@@ -3687,7 +3699,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
          */
         if (!XLByteEQ(record->xl_prev, ReadRecPtr))
         {
-            ereport(emode,
+            ereport(emode_for_corrupt_record(emode),
                     (errmsg("record with incorrect prev-link %X/%X at %X/%X",
                             record->xl_prev.xlogid, record->xl_prev.xrecoff,
                             RecPtr->xlogid, RecPtr->xrecoff)));
@@ -3716,7 +3728,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
         {
             readRecordBufSize = 0;
             /* We treat this as a "bogus data" condition */
-            ereport(emode,
+            ereport(emode_for_corrupt_record(emode),
                     (errmsg("record length %u at %X/%X too long",
                             total_len, RecPtr->xlogid, RecPtr->xrecoff)));
             goto next_record_is_invalid;
@@ -3756,7 +3768,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
             /* Check that the continuation record looks valid */
             if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
             {
-                ereport(emode,
+                ereport(emode_for_corrupt_record(emode),
                         (errmsg("there is no contrecord flag in log file %u, segment %u, offset %u",
                                 readId, readSeg, readOff)));
                 goto next_record_is_invalid;
@@ -3766,7 +3778,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
             if (contrecord->xl_rem_len == 0 ||
                 total_len != (contrecord->xl_rem_len + gotlen))
             {
-                ereport(emode,
+                ereport(emode_for_corrupt_record(emode),
                         (errmsg("invalid contrecord length %u in log file %u, segment %u, offset %u",
                                 contrecord->xl_rem_len,
                                 readId, readSeg, readOff)));
@@ -3784,7 +3796,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
                    contrecord->xl_rem_len);
             break;
         }
-        if (!RecordIsValid(record, *RecPtr, emode))
+        if (!RecordIsValid(record, *RecPtr, emode_for_corrupt_record(emode)))
             goto next_record_is_invalid;
         pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
         EndRecPtr.xlogid = readId;
@@ -3798,7 +3810,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
     }

     /* Record does not cross a page boundary */
-    if (!RecordIsValid(record, *RecPtr, emode))
+    if (!RecordIsValid(record, *RecPtr, emode_for_corrupt_record(emode)))
         goto next_record_is_invalid;
     EndRecPtr.xlogid = RecPtr->xlogid;
     EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len);
@@ -3824,13 +3836,20 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
     }
     return (XLogRecord *) buffer;

-next_record_is_invalid:;
+next_record_is_invalid:
+    failedSources |= readSource;
+
     if (readFile >= 0)
     {
         close(readFile);
         readFile = -1;
     }
-    return NULL;
+
+    /* In standby-mode, keep trying */
+    if (StandbyMode)
+        goto retry;
+    else
+        return NULL;
 }

 /*
@@ -8731,10 +8750,15 @@ StartupProcessMain(void)

 /*
  * Read the XLOG page containing RecPtr into readBuf (if not read already).
- * Returns true if successful, false otherwise or fails if emode is PANIC.
+ * Returns true if the page is read successfully.
  *
  * This is responsible for restoring files from archive as needed, as well
  * as for waiting for the requested WAL record to arrive in standby mode.
+ *
+ * 'emode' specifies the log level used for reporting "file not found" or
+ * "end of WAL" situations in archive recovery, or in standby mode if a trigger
+ * file is found. If set to WARNING or below, XLogPageRead() returns false
+ * in those situations, otherwise the ereport() will cause an error exit.
  */
 static bool
 XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
@@ -8746,13 +8770,14 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
     uint32        targetRecOff;
     uint32        targetId;
     uint32        targetSeg;
+    static pg_time_t last_fail_time = 0;

     XLByteToSeg(*RecPtr, targetId, targetSeg);
     targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / XLOG_BLCKSZ) * XLOG_BLCKSZ;
     targetRecOff = RecPtr->xrecoff % XLOG_BLCKSZ;

     /* Fast exit if we have read the record in the current buffer already */
-    if (targetId == readId && targetSeg == readSeg &&
+    if (failedSources == 0 && targetId == readId && targetSeg == readSeg &&
         targetPageOff == readOff && targetRecOff < readLen)
         return true;

@@ -8764,18 +8789,18 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
     {
         close(readFile);
         readFile = -1;
+        readSource = 0;
     }

     XLByteToSeg(*RecPtr, readId, readSeg);

+retry:
     /* See if we need to retrieve more data */
     if (readFile < 0 ||
-        (readStreamed && !XLByteLT(*RecPtr, receivedUpto)))
+        (readSource == XLOG_FROM_STREAM && !XLByteLT(*RecPtr, receivedUpto)))
     {
         if (StandbyMode)
         {
-            bool        last_restore_failed = false;
-
             /*
              * In standby mode, wait for the requested record to become
              * available, either via restore_command succeeding to restore the
@@ -8800,15 +8825,16 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
                         {
                             readFile =
                                 XLogFileRead(readId, readSeg, PANIC,
-                                             recoveryTargetTLI, false, false);
+                                             recoveryTargetTLI,
+                                             XLOG_FROM_PG_XLOG, false);
                             switched_segment = true;
-                            readStreamed = true;
+                            readSource = XLOG_FROM_STREAM;
                         }
                         break;
                     }

                     if (CheckForStandbyTrigger())
-                        goto next_record_is_invalid;
+                        goto triggered;

                     /*
                      * When streaming is active, we want to react quickly when
@@ -8818,6 +8844,9 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
                 }
                 else
                 {
+                    int sources;
+                    pg_time_t now;
+
                     /*
                      * Until walreceiver manages to reconnect, poll the
                      * archive.
@@ -8830,48 +8859,73 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
                     /* Reset curFileTLI if random fetch. */
                     if (randAccess)
                         curFileTLI = 0;
-                    readFile = XLogFileReadAnyTLI(readId, readSeg, DEBUG2, true);
-                    switched_segment = true;
-                    readStreamed = false;
-                    if (readFile != -1)
-                    {
-                        elog(DEBUG1, "got WAL segment from archive");
-                        break;
-                    }

                     /*
-                     * If we succeeded restoring some segments from archive
-                     * since the last connection attempt (or we haven't tried
-                     * streaming yet, retry immediately. But if we haven't,
-                     * assume the problem is persistent, so be less
-                     * aggressive.
+                     * Try to restore the file from archive, or read an
+                     * existing file from pg_xlog.
                      */
-                    if (last_restore_failed)
+                    sources = XLOG_FROM_ARCHIVE | XLOG_FROM_PG_XLOG;
+                    if (!(sources & ~failedSources))
                     {
                         /*
-                         * Check to see if the trigger file exists. Note that
-                         * we do this only after failure, so when you create
-                         * the trigger file, we still finish replaying as much
-                         * as we can before failover.
+                         * We've exhausted all options for retrieving the
+                         * file. Retry ...
                          */
-                        if (CheckForStandbyTrigger())
-                            goto next_record_is_invalid;
-                        pg_usleep(5000000L);    /* 5 seconds */
+                        failedSources = 0;
+
+                        /*
+                         * ... but sleep first if it hasn't been long since
+                         * last attempt.
+                         */
+                        now = (pg_time_t) time(NULL);
+                        if ((now - last_fail_time) < 5)
+                        {
+                            pg_usleep(1000000L * (5 - (now - last_fail_time)));
+                            now = (pg_time_t) time(NULL);
+                        }
+                        last_fail_time = now;
+
+                        /*
+                         * If primary_conninfo is set, launch walreceiver to
+                         * try to stream the missing WAL, before retrying
+                         * to restore from archive/pg_xlog.
+                         *
+                         * If fetching_ckpt is TRUE, RecPtr points to the
+                         * initial checkpoint location. In that case, we use
+                         * RedoStartLSN as the streaming start position instead
+                         * of RecPtr, so that when we later jump backwards to
+                         * start redo at RedoStartLSN, we will have the logs
+                         * streamed already.
+                         */
+                        if (PrimaryConnInfo)
+                        {
+                            RequestXLogStreaming(
+                                fetching_ckpt ? RedoStartLSN : *RecPtr,
+                                PrimaryConnInfo);
+                            continue;
+                        }
                     }
-                    last_restore_failed = true;
+                    /* Don't try to read from a source that just failed */
+                    sources &= ~failedSources;
+                    readFile = XLogFileReadAnyTLI(readId, readSeg, DEBUG2,
+                                                  sources);
+                    switched_segment = true;
+                    if (readFile != -1)
+                        break;

                     /*
-                     * Nope, not found in archive. Try to stream it.
-                     *
-                     * If fetching_ckpt is TRUE, RecPtr points to the initial
-                     * checkpoint location. In that case, we use RedoStartLSN
-                     * as the streaming start position instead of RecPtr, so
-                     * that when we later jump backwards to start redo at
-                     * RedoStartLSN, we will have the logs streamed already.
+                     * Nope, not found in archive and/or pg_xlog.
                      */
-                    if (PrimaryConnInfo)
-                        RequestXLogStreaming(fetching_ckpt ? RedoStartLSN : *RecPtr,
-                                             PrimaryConnInfo);
+                    failedSources |= sources;
+
+                    /*
+                     * Check to see if the trigger file exists. Note that
+                     * we do this only after failure, so when you create
+                     * the trigger file, we still finish replaying as much
+                     * as we can from archive and pg_xlog before failover.
+                     */
+                    if (CheckForStandbyTrigger())
+                        goto triggered;
                 }

                 /*
@@ -8886,13 +8940,18 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
             /* In archive or crash recovery. */
             if (readFile < 0)
             {
+                int sources;
                 /* Reset curFileTLI if random fetch. */
                 if (randAccess)
                     curFileTLI = 0;
+
+                sources = XLOG_FROM_PG_XLOG;
+                if (InArchiveRecovery)
+                    sources |= XLOG_FROM_ARCHIVE;
+
                 readFile = XLogFileReadAnyTLI(readId, readSeg, emode,
-                                              InArchiveRecovery);
+                                              sources);
                 switched_segment = true;
-                readStreamed = false;
                 if (readFile < 0)
                     return false;
             }
@@ -8900,8 +8959,8 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
     }

     /*
-     * At this point, we have the right segment open and we know the requested
-     * record is in it.
+     * At this point, we have the right segment open and if we're streaming
+     * we know the requested record is in it.
      */
     Assert(readFile != -1);

@@ -8911,7 +8970,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
      * requested record has been received, but this is for the benefit of
      * future calls, to allow quick exit at the top of this function.
      */
-    if (readStreamed)
+    if (readSource == XLOG_FROM_STREAM)
     {
         if (RecPtr->xlogid != receivedUpto.xlogid ||
             (RecPtr->xrecoff / XLOG_BLCKSZ) != (receivedUpto.xrecoff / XLOG_BLCKSZ))
@@ -8936,13 +8995,14 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
         readOff = 0;
         if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
         {
-            ereport(emode,
+            ereport(emode_for_corrupt_record(emode),
                     (errcode_for_file_access(),
                      errmsg("could not read from log file %u, segment %u, offset %u: %m",
                             readId, readSeg, readOff)));
             goto next_record_is_invalid;
         }
-        if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
+        if (!ValidXLOGHeader((XLogPageHeader) readBuf,
+                             emode_for_corrupt_record(emode)))
             goto next_record_is_invalid;
     }

@@ -8950,7 +9010,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
     readOff = targetPageOff;
     if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
     {
-        ereport(emode,
+        ereport(emode_for_corrupt_record(emode),
                 (errcode_for_file_access(),
          errmsg("could not seek in log file %u, segment %u to offset %u: %m",
                 readId, readSeg, readOff)));
@@ -8958,13 +9018,13 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
     }
     if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
     {
-        ereport(emode,
+        ereport(emode_for_corrupt_record(emode),
                 (errcode_for_file_access(),
          errmsg("could not read from log file %u, segment %u, offset %u: %m",
                 readId, readSeg, readOff)));
         goto next_record_is_invalid;
     }
-    if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
+    if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode_for_corrupt_record(emode)))
         goto next_record_is_invalid;

     Assert(targetId == readId);
@@ -8975,16 +9035,67 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
     return true;

 next_record_is_invalid:
+    failedSources |= readSource;
+
     if (readFile >= 0)
         close(readFile);
     readFile = -1;
-    readStreamed = false;
     readLen = 0;
+    readSource = 0;
+
+    /* In standby-mode, keep trying */
+    if (StandbyMode)
+        goto retry;
+    else
+        return false;
+
+triggered:
+    if (readFile >= 0)
+        close(readFile);
+    readFile = -1;
+    readLen = 0;
+    readSource = 0;

     return false;
 }

 /*
+ * Determine what log level should be used to report a corrupt WAL record
+ * in the current WAL page, previously read by XLogPageRead().
+ *
+ * 'emode' is the error mode that would be used to report a file-not-found
+ * or legitimate end-of-WAL situation. It is upgraded to WARNING or PANIC
+ * if the an corrupt record is not expected at this point.
+ */
+static int
+emode_for_corrupt_record(int emode)
+{
+    /*
+     * We don't expect any invalid records in archive or in records streamed
+     * from master. Files in the archive should be complete, and we should
+     * never hit the end of WAL because we stop and wait for more WAL to
+     * arrive before replaying it.
+     *
+     * In standby mode, throw a WARNING and keep retrying. If we're lucky
+     * it's a transient error and will go away by itself, and in any case
+     * it's better to keep the standby open for any possible read-only
+     * queries. In PITR, however, stop recovery immediately, rather than
+     * fail over.
+     */
+    if (readSource == XLOG_FROM_STREAM || readSource == XLOG_FROM_ARCHIVE)
+    {
+        if (StandbyMode)
+        {
+            if (emode < WARNING)
+                emode = WARNING;
+        }
+        else
+            emode = PANIC;
+    }
+    return emode;
+}
+
+/*
  * Check to see if the trigger file exists. If it does, request postmaster
  * to shut down walreceiver, wait for it to exit, remove the trigger
  * file, and return true.

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Heikki Linnakangas
Дата:
Сообщение: Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL
Следующее
От: chaoyong wang
Дата:
Сообщение: Access Violation when using Berkely DB in C-Language Function in VS2005