Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL
От | Heikki Linnakangas |
---|---|
Тема | Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL |
Дата | |
Msg-id | 4BAB5D4E.4000009@enterprisedb.com обсуждение исходный текст |
Ответ на | Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL (Fujii Masao <masao.fujii@gmail.com>) |
Ответы |
Re: Re: [COMMITTERS] pgsql: Make standby server
continuously retry restoring the next WAL
(Robert Haas <robertmhaas@gmail.com>)
Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL (Fujii Masao <masao.fujii@gmail.com>) |
Список | pgsql-hackers |
Fujii Masao wrote: > On second thought, the following lines seem to be necessary just after > calling XLogPageRead() since it reads new WAL file from another source. > >> if (readSource == XLOG_FROM_STREAM || readSource == XLOG_FROM_ARCHIVE) >> emode = PANIC; >> else >> emode = emode_arg; Yep. Here's an updated patch, with these changes since the last patch: * Fix the bug of a spurious PANIC in archive recovery, if the WAL ends in the middle of a WAL record that continues over a WAL segment boundary. * If a corrupt WAL record is found in archive or streamed from master in standby mode, throw WARNING instead of PANIC, and keep trying. In archive recovery (ie. standby_mode=off) it's still a PANIC. We can make it a WARNING too, which gives the pre-9.0 behavior of starting up the server on corruption. I prefer PANIC but the discussion is still going on. * Small code changes to handling of failedSources, inspired by your comment. No change in functionality. This is also available in my git repository at git://git.postgresql.org/git/users/heikki/postgres.git, branch "xlogchanges" -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e57f22e..4aa1870 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -450,20 +450,33 @@ static uint32 openLogSeg = 0; static uint32 openLogOff = 0; /* + * Codes indicating where we got a WAL file from during recovery, or where + * to attempt to get one. + */ +#define XLOG_FROM_ARCHIVE (1<<0) /* Restored using restore_command */ +#define XLOG_FROM_PG_XLOG (1<<1) /* Existing file in pg_xlog */ +#define XLOG_FROM_STREAM (1<<2) /* Streamed from master */ + +/* * These variables are used similarly to the ones above, but for reading * the XLOG. Note, however, that readOff generally represents the offset * of the page just read, not the seek position of the FD itself, which * will be just past that page. readLen indicates how much of the current - * page has been read into readBuf. + * page has been read into readBuf, and readSource indicates where we got + * the currently open file from. */ static int readFile = -1; static uint32 readId = 0; static uint32 readSeg = 0; static uint32 readOff = 0; static uint32 readLen = 0; +static int readSource = 0; /* XLOG_FROM_* code */ -/* Is the currently open segment being streamed from primary? */ -static bool readStreamed = false; +/* + * Keeps track of which sources we've tried to read the current WAL + * record from and failed. + */ +static int failedSources = 0; /* Buffer for currently read page (XLOG_BLCKSZ bytes) */ static char *readBuf = NULL; @@ -517,11 +530,12 @@ static bool InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath, bool find_free, int *max_advance, bool use_lock); static int XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, - bool fromArchive, bool notexistOk); + int source, bool notexistOk); static int XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, - bool fromArchive); + int sources); static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, bool randAccess); +static int emode_for_corrupt_record(int endofwalmode); static void XLogFileClose(void); static bool RestoreArchivedFile(char *path, const char *xlogfname, const char *recovername, off_t expectedSize); @@ -2573,7 +2587,7 @@ XLogFileOpen(uint32 log, uint32 seg) */ static int XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, - bool fromArchive, bool notfoundOk) + int source, bool notfoundOk) { char xlogfname[MAXFNAMELEN]; char activitymsg[MAXFNAMELEN + 16]; @@ -2582,23 +2596,28 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, XLogFileName(xlogfname, tli, log, seg); - if (fromArchive) + switch (source) { - /* Report recovery progress in PS display */ - snprintf(activitymsg, sizeof(activitymsg), "waiting for %s", - xlogfname); - set_ps_display(activitymsg, false); + case XLOG_FROM_ARCHIVE: + /* Report recovery progress in PS display */ + snprintf(activitymsg, sizeof(activitymsg), "waiting for %s", + xlogfname); + set_ps_display(activitymsg, false); - restoredFromArchive = RestoreArchivedFile(path, xlogfname, - "RECOVERYXLOG", - XLogSegSize); - if (!restoredFromArchive) - return -1; - } - else - { - XLogFilePath(path, tli, log, seg); - restoredFromArchive = false; + restoredFromArchive = RestoreArchivedFile(path, xlogfname, + "RECOVERYXLOG", + XLogSegSize); + if (!restoredFromArchive) + return -1; + break; + + case XLOG_FROM_PG_XLOG: + XLogFilePath(path, tli, log, seg); + restoredFromArchive = false; + break; + + default: + elog(ERROR, "invalid XLogFileRead source %d", source); } fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); @@ -2612,6 +2631,8 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, xlogfname); set_ps_display(activitymsg, false); + readSource = source; + return fd; } if (errno != ENOENT || !notfoundOk) /* unexpected failure? */ @@ -2630,7 +2651,7 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, * searched in pg_xlog if not found in archive. */ static int -XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, bool fromArchive) +XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, int sources) { char path[MAXPGPATH]; ListCell *cell; @@ -2653,20 +2674,19 @@ XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, bool fromArchive) if (tli < curFileTLI) break; /* don't bother looking at too-old TLIs */ - fd = XLogFileRead(log, seg, emode, tli, fromArchive, true); - if (fd != -1) - return fd; + if (sources & XLOG_FROM_ARCHIVE) + { + fd = XLogFileRead(log, seg, emode, tli, XLOG_FROM_ARCHIVE, true); + if (fd != -1) + { + elog(DEBUG1, "got WAL segment from archive"); + return fd; + } + } - /* - * If not in StandbyMode, fall back to searching pg_xlog. In - * StandbyMode we're streaming segments from the primary to pg_xlog, - * and we mustn't confuse the (possibly partial) segments in pg_xlog - * with complete segments ready to be applied. We rather wait for the - * records to arrive through streaming. - */ - if (!StandbyMode && fromArchive) + if (sources & XLOG_FROM_PG_XLOG) { - fd = XLogFileRead(log, seg, emode, tli, false, true); + fd = XLogFileRead(log, seg, emode, tli, XLOG_FROM_PG_XLOG, true); if (fd != -1) return fd; } @@ -3520,7 +3540,7 @@ RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode) * the returned record pointer always points there. */ static XLogRecord * -ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) +ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt) { XLogRecord *record; char *buffer; @@ -3530,17 +3550,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) total_len; uint32 targetRecOff; uint32 pageHeaderSize; - int emode; - - /* - * We don't expect any invalid records during streaming recovery: we - * should never hit the end of WAL because we wait for it to be streamed. - * Therefore treat any broken WAL as PANIC, instead of failing over. - */ - if (StandbyMode) - emode = PANIC; - else - emode = emode_arg; if (readBuf == NULL) { @@ -3593,6 +3602,9 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) randAccess = true; /* allow curFileTLI to go backwards too */ } + /* This is the first try to read this page. */ + failedSources = 0; +retry: /* Read the page containing the record */ if (!XLogPageRead(RecPtr, emode, fetching_ckpt, randAccess)) return NULL; @@ -3611,7 +3623,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) } else if (targetRecOff < pageHeaderSize) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("invalid record offset at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; @@ -3619,7 +3631,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && targetRecOff == pageHeaderSize) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("contrecord is requested by %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; @@ -3634,7 +3646,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) { if (record->xl_len != 0) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("invalid xlog switch record at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; @@ -3642,7 +3654,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) } else if (record->xl_len == 0) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("record with zero length at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; @@ -3651,14 +3663,14 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) record->xl_tot_len > SizeOfXLogRecord + record->xl_len + XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ)) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("invalid record length at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } if (record->xl_rmid > RM_MAX_ID) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("invalid resource manager ID %u at %X/%X", record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; @@ -3671,7 +3683,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) */ if (!XLByteLT(record->xl_prev, *RecPtr)) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("record with incorrect prev-link %X/%X at %X/%X", record->xl_prev.xlogid, record->xl_prev.xrecoff, RecPtr->xlogid, RecPtr->xrecoff))); @@ -3687,7 +3699,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) */ if (!XLByteEQ(record->xl_prev, ReadRecPtr)) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("record with incorrect prev-link %X/%X at %X/%X", record->xl_prev.xlogid, record->xl_prev.xrecoff, RecPtr->xlogid, RecPtr->xrecoff))); @@ -3716,7 +3728,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) { readRecordBufSize = 0; /* We treat this as a "bogus data" condition */ - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("record length %u at %X/%X too long", total_len, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; @@ -3756,7 +3768,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) /* Check that the continuation record looks valid */ if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD)) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("there is no contrecord flag in log file %u, segment %u, offset %u", readId, readSeg, readOff))); goto next_record_is_invalid; @@ -3766,7 +3778,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) if (contrecord->xl_rem_len == 0 || total_len != (contrecord->xl_rem_len + gotlen)) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errmsg("invalid contrecord length %u in log file %u, segment %u, offset %u", contrecord->xl_rem_len, readId, readSeg, readOff))); @@ -3784,7 +3796,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) contrecord->xl_rem_len); break; } - if (!RecordIsValid(record, *RecPtr, emode)) + if (!RecordIsValid(record, *RecPtr, emode_for_corrupt_record(emode))) goto next_record_is_invalid; pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); EndRecPtr.xlogid = readId; @@ -3798,7 +3810,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) } /* Record does not cross a page boundary */ - if (!RecordIsValid(record, *RecPtr, emode)) + if (!RecordIsValid(record, *RecPtr, emode_for_corrupt_record(emode))) goto next_record_is_invalid; EndRecPtr.xlogid = RecPtr->xlogid; EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len); @@ -3824,13 +3836,20 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt) } return (XLogRecord *) buffer; -next_record_is_invalid:; +next_record_is_invalid: + failedSources |= readSource; + if (readFile >= 0) { close(readFile); readFile = -1; } - return NULL; + + /* In standby-mode, keep trying */ + if (StandbyMode) + goto retry; + else + return NULL; } /* @@ -8731,10 +8750,15 @@ StartupProcessMain(void) /* * Read the XLOG page containing RecPtr into readBuf (if not read already). - * Returns true if successful, false otherwise or fails if emode is PANIC. + * Returns true if the page is read successfully. * * This is responsible for restoring files from archive as needed, as well * as for waiting for the requested WAL record to arrive in standby mode. + * + * 'emode' specifies the log level used for reporting "file not found" or + * "end of WAL" situations in archive recovery, or in standby mode if a trigger + * file is found. If set to WARNING or below, XLogPageRead() returns false + * in those situations, otherwise the ereport() will cause an error exit. */ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, @@ -8746,13 +8770,14 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, uint32 targetRecOff; uint32 targetId; uint32 targetSeg; + static pg_time_t last_fail_time = 0; XLByteToSeg(*RecPtr, targetId, targetSeg); targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / XLOG_BLCKSZ) * XLOG_BLCKSZ; targetRecOff = RecPtr->xrecoff % XLOG_BLCKSZ; /* Fast exit if we have read the record in the current buffer already */ - if (targetId == readId && targetSeg == readSeg && + if (failedSources == 0 && targetId == readId && targetSeg == readSeg && targetPageOff == readOff && targetRecOff < readLen) return true; @@ -8764,18 +8789,18 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, { close(readFile); readFile = -1; + readSource = 0; } XLByteToSeg(*RecPtr, readId, readSeg); +retry: /* See if we need to retrieve more data */ if (readFile < 0 || - (readStreamed && !XLByteLT(*RecPtr, receivedUpto))) + (readSource == XLOG_FROM_STREAM && !XLByteLT(*RecPtr, receivedUpto))) { if (StandbyMode) { - bool last_restore_failed = false; - /* * In standby mode, wait for the requested record to become * available, either via restore_command succeeding to restore the @@ -8800,15 +8825,16 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, { readFile = XLogFileRead(readId, readSeg, PANIC, - recoveryTargetTLI, false, false); + recoveryTargetTLI, + XLOG_FROM_PG_XLOG, false); switched_segment = true; - readStreamed = true; + readSource = XLOG_FROM_STREAM; } break; } if (CheckForStandbyTrigger()) - goto next_record_is_invalid; + goto triggered; /* * When streaming is active, we want to react quickly when @@ -8818,6 +8844,9 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, } else { + int sources; + pg_time_t now; + /* * Until walreceiver manages to reconnect, poll the * archive. @@ -8830,48 +8859,73 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, /* Reset curFileTLI if random fetch. */ if (randAccess) curFileTLI = 0; - readFile = XLogFileReadAnyTLI(readId, readSeg, DEBUG2, true); - switched_segment = true; - readStreamed = false; - if (readFile != -1) - { - elog(DEBUG1, "got WAL segment from archive"); - break; - } /* - * If we succeeded restoring some segments from archive - * since the last connection attempt (or we haven't tried - * streaming yet, retry immediately. But if we haven't, - * assume the problem is persistent, so be less - * aggressive. + * Try to restore the file from archive, or read an + * existing file from pg_xlog. */ - if (last_restore_failed) + sources = XLOG_FROM_ARCHIVE | XLOG_FROM_PG_XLOG; + if (!(sources & ~failedSources)) { /* - * Check to see if the trigger file exists. Note that - * we do this only after failure, so when you create - * the trigger file, we still finish replaying as much - * as we can before failover. + * We've exhausted all options for retrieving the + * file. Retry ... */ - if (CheckForStandbyTrigger()) - goto next_record_is_invalid; - pg_usleep(5000000L); /* 5 seconds */ + failedSources = 0; + + /* + * ... but sleep first if it hasn't been long since + * last attempt. + */ + now = (pg_time_t) time(NULL); + if ((now - last_fail_time) < 5) + { + pg_usleep(1000000L * (5 - (now - last_fail_time))); + now = (pg_time_t) time(NULL); + } + last_fail_time = now; + + /* + * If primary_conninfo is set, launch walreceiver to + * try to stream the missing WAL, before retrying + * to restore from archive/pg_xlog. + * + * If fetching_ckpt is TRUE, RecPtr points to the + * initial checkpoint location. In that case, we use + * RedoStartLSN as the streaming start position instead + * of RecPtr, so that when we later jump backwards to + * start redo at RedoStartLSN, we will have the logs + * streamed already. + */ + if (PrimaryConnInfo) + { + RequestXLogStreaming( + fetching_ckpt ? RedoStartLSN : *RecPtr, + PrimaryConnInfo); + continue; + } } - last_restore_failed = true; + /* Don't try to read from a source that just failed */ + sources &= ~failedSources; + readFile = XLogFileReadAnyTLI(readId, readSeg, DEBUG2, + sources); + switched_segment = true; + if (readFile != -1) + break; /* - * Nope, not found in archive. Try to stream it. - * - * If fetching_ckpt is TRUE, RecPtr points to the initial - * checkpoint location. In that case, we use RedoStartLSN - * as the streaming start position instead of RecPtr, so - * that when we later jump backwards to start redo at - * RedoStartLSN, we will have the logs streamed already. + * Nope, not found in archive and/or pg_xlog. */ - if (PrimaryConnInfo) - RequestXLogStreaming(fetching_ckpt ? RedoStartLSN : *RecPtr, - PrimaryConnInfo); + failedSources |= sources; + + /* + * Check to see if the trigger file exists. Note that + * we do this only after failure, so when you create + * the trigger file, we still finish replaying as much + * as we can from archive and pg_xlog before failover. + */ + if (CheckForStandbyTrigger()) + goto triggered; } /* @@ -8886,13 +8940,18 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, /* In archive or crash recovery. */ if (readFile < 0) { + int sources; /* Reset curFileTLI if random fetch. */ if (randAccess) curFileTLI = 0; + + sources = XLOG_FROM_PG_XLOG; + if (InArchiveRecovery) + sources |= XLOG_FROM_ARCHIVE; + readFile = XLogFileReadAnyTLI(readId, readSeg, emode, - InArchiveRecovery); + sources); switched_segment = true; - readStreamed = false; if (readFile < 0) return false; } @@ -8900,8 +8959,8 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, } /* - * At this point, we have the right segment open and we know the requested - * record is in it. + * At this point, we have the right segment open and if we're streaming + * we know the requested record is in it. */ Assert(readFile != -1); @@ -8911,7 +8970,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, * requested record has been received, but this is for the benefit of * future calls, to allow quick exit at the top of this function. */ - if (readStreamed) + if (readSource == XLOG_FROM_STREAM) { if (RecPtr->xlogid != receivedUpto.xlogid || (RecPtr->xrecoff / XLOG_BLCKSZ) != (receivedUpto.xrecoff / XLOG_BLCKSZ)) @@ -8936,13 +8995,14 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, readOff = 0; if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } - if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode)) + if (!ValidXLOGHeader((XLogPageHeader) readBuf, + emode_for_corrupt_record(emode))) goto next_record_is_invalid; } @@ -8950,7 +9010,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, readOff = targetPageOff; if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errcode_for_file_access(), errmsg("could not seek in log file %u, segment %u to offset %u: %m", readId, readSeg, readOff))); @@ -8958,13 +9018,13 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, } if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { - ereport(emode, + ereport(emode_for_corrupt_record(emode), (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } - if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode)) + if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode_for_corrupt_record(emode))) goto next_record_is_invalid; Assert(targetId == readId); @@ -8975,16 +9035,67 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, return true; next_record_is_invalid: + failedSources |= readSource; + if (readFile >= 0) close(readFile); readFile = -1; - readStreamed = false; readLen = 0; + readSource = 0; + + /* In standby-mode, keep trying */ + if (StandbyMode) + goto retry; + else + return false; + +triggered: + if (readFile >= 0) + close(readFile); + readFile = -1; + readLen = 0; + readSource = 0; return false; } /* + * Determine what log level should be used to report a corrupt WAL record + * in the current WAL page, previously read by XLogPageRead(). + * + * 'emode' is the error mode that would be used to report a file-not-found + * or legitimate end-of-WAL situation. It is upgraded to WARNING or PANIC + * if the an corrupt record is not expected at this point. + */ +static int +emode_for_corrupt_record(int emode) +{ + /* + * We don't expect any invalid records in archive or in records streamed + * from master. Files in the archive should be complete, and we should + * never hit the end of WAL because we stop and wait for more WAL to + * arrive before replaying it. + * + * In standby mode, throw a WARNING and keep retrying. If we're lucky + * it's a transient error and will go away by itself, and in any case + * it's better to keep the standby open for any possible read-only + * queries. In PITR, however, stop recovery immediately, rather than + * fail over. + */ + if (readSource == XLOG_FROM_STREAM || readSource == XLOG_FROM_ARCHIVE) + { + if (StandbyMode) + { + if (emode < WARNING) + emode = WARNING; + } + else + emode = PANIC; + } + return emode; +} + +/* * Check to see if the trigger file exists. If it does, request postmaster * to shut down walreceiver, wait for it to exit, remove the trigger * file, and return true.
В списке pgsql-hackers по дате отправления:
Предыдущее
От: Heikki LinnakangasДата:
Сообщение: Re: Re: [COMMITTERS] pgsql: Make standby server continuously retry restoring the next WAL
Следующее
От: chaoyong wangДата:
Сообщение: Access Violation when using Berkely DB in C-Language Function in VS2005