diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 40b780c..5244ce1 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -89,18 +89,11 @@ bool XLOG_DEBUG = false; #endif /* - * XLOGfileslop is the maximum number of preallocated future XLOG segments. - * When we are done with an old XLOG segment file, we will recycle it as a - * future XLOG segment as long as there aren't already XLOGfileslop future - * segments; else we'll delete it. This could be made a separate GUC - * variable, but at present I think it's sufficient to hardwire it as - * 2*CheckPointSegments+1. Under normal conditions, a checkpoint will free - * no more than 2*CheckPointSegments log segments, and we want to recycle all - * of them; the +1 allows boundary cases to happen without wasting a - * delete/create-segment cycle. + * Estimated distance between checkpoints, in bytes, and measured distance of + * previous checkpoint cycle. */ -#define XLOGfileslop (2*CheckPointSegments + 1) - +static double CheckPointDistanceEstimate = 0; +static double PrevCheckPointDistance = 0; /* * GUC support @@ -668,7 +661,7 @@ static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); static void PreallocXlogFiles(XLogRecPtr endptr); -static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr); +static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr); static void UpdateLastRemovedPtr(char *filename); static void ValidateXLOGDirectoryStructure(void); static void CleanupBackupHistory(void); @@ -1458,11 +1451,85 @@ AdvanceXLInsertBuffer(bool new_segment) } /* + * XLOGfileslop is the maximum number of preallocated future XLOG segments. + * When we are done with an old XLOG segment file, we will recycle it as a + * future XLOG segment as long as there aren't already XLOGfileslop future + * segments; else we'll delete it. + */ +static int +XLOGfileslop(XLogRecPtr PriorRedoPtr, XLogRecPtr CurrPtr) +{ + double nsegments; + double targetPtr; + double distance; + + /* + * The number segments to preallocate/recycle is based on two things: + * an estimate of how much WAL is consumed between checkpoints, and the + * current distance from the prior checkpoint (ie. the point at which + * we're about to truncate the WAL) to the current WAL insert location. + * + * First, calculate how much WAL space the system would need, if it ran + * steady, using the estimated amount of WAL generated between every + * checkpoint cycle. Then see how much WAL is actually in use at the moment + * (= the distance between Prior redo pointer and current WAL insert + * location). The difference between the two is how much WAL we should keep + * preallocated, so that backends won't have to create new WAL segments. + * + * The reason we do these calculations from the prior checkpoint, not the + * one that just finished, is that this behaves better if some checkpoint + * cycles are abnormally short, like if you perform a manual checkpoint + * right after a timed one. The manual checkpoint will make almost + * a full cycle's worth of WAL segments available for recycling, because + * the segments from the prior's prior, fully-sized checkpoint cycle are + * no longer needed. However, the next checkpoint will make only few + * segments available for recycling, the ones generated between the timed + * checkpoint and the manual one right after that. If at the manual + * checkpoint we only retained enough segments to get us to the next timed + * one, and removed the rest, then at the next checkpoint we would not have + * enough segments around for recycling, to get us to the checkpoint after + * that. Basing the calculations on the distance from the prior redo + * pointer largely fixes that problem. + */ + + /* + * First calculate the expected distance from the redo pointer of a prior + * checkpoint to the point where the next one finishes, assuming that + * the system runs steady all the time. + */ + distance = (2 + CheckPointCompletionTarget) * CheckPointDistanceEstimate; + + /* add 10% for good measure */ + distance *= 1.10; + + /* + * Based on that, calculate the expected point where the next checkpoint + * finishes. + */ + targetPtr = (double) PriorRedoPtr + distance; + + /* + * How many segments do we need to get from the current insert location + * to the end of next checkpoint? That's how many segments we should keep + * preallocated. + */ + if (targetPtr > CurrPtr) + nsegments = (targetPtr - CurrPtr) / XLOG_SEG_SIZE; + else + nsegments = 0; + + /* add one segment to round up. */ + nsegments += 1.0; + + return (int) nsegments; +} + +/* * Check whether we've consumed enough xlog space that a checkpoint is needed. * * new_segno indicates a log file that has just been filled up (or read - * during recovery). We measure the distance from RedoRecPtr to new_segno - * and see if that exceeds CheckPointSegments. + * during recovery). We measure the distance from RedoRecPtr to new_segno, + * and estimate based on that if we're about to exceed checkpoint_segments. * * Note: it is caller's responsibility that RedoRecPtr is up-to-date. */ @@ -2357,9 +2424,14 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) * pre-existing file. Otherwise, cope with possibility that someone else * has created the file while we were filling ours: if so, use ours to * pre-create a future log segment. + * + * XXX: We don't have a good estimate of how many WAL files we should keep + * preallocated here. Quite arbitrarily, use max_advance=5. That's good + * enough for current use of this function; this only gets called when + * there are no more preallocated WAL segments available. */ installed_segno = logsegno; - max_advance = XLOGfileslop; + max_advance = CheckPointSegments; if (!InstallXLogFileSegment(&installed_segno, tmppath, *use_existent, &max_advance, use_lock)) @@ -2888,7 +2960,7 @@ UpdateLastRemovedPtr(char *filename) * whether we want to recycle rather than delete no-longer-wanted log files. */ static void -RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr) +RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr) { XLogSegNo endlogSegNo; int max_advance; @@ -2907,7 +2979,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr) * segments up to XLOGfileslop segments beyond the current XLOG location. */ XLByteToPrevSeg(endptr, endlogSegNo); - max_advance = XLOGfileslop; + max_advance = XLOGfileslop(PriorRedoPtr, endptr); xldir = AllocateDir(XLOGDIR); if (xldir == NULL) @@ -6708,7 +6780,8 @@ LogCheckpointEnd(bool restartpoint) elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); " "%d transaction log file(s) added, %d removed, %d recycled; " "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; " - "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s", + "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; " + "distance=%d KB, estimate=%d KB", CheckpointStats.ckpt_bufs_written, (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers, CheckpointStats.ckpt_segs_added, @@ -6719,12 +6792,14 @@ LogCheckpointEnd(bool restartpoint) total_secs, total_usecs / 1000, CheckpointStats.ckpt_sync_rels, longest_secs, longest_usecs / 1000, - average_secs, average_usecs / 1000); + average_secs, average_usecs / 1000, + (int) (PrevCheckPointDistance / 1024.0), (int) (CheckPointDistanceEstimate / 1024.0)); else elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); " "%d transaction log file(s) added, %d removed, %d recycled; " "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; " - "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s", + "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; " + "distance=%d KB, estimate=%d KB", CheckpointStats.ckpt_bufs_written, (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers, CheckpointStats.ckpt_segs_added, @@ -6735,7 +6810,45 @@ LogCheckpointEnd(bool restartpoint) total_secs, total_usecs / 1000, CheckpointStats.ckpt_sync_rels, longest_secs, longest_usecs / 1000, - average_secs, average_usecs / 1000); + average_secs, average_usecs / 1000, + (int) (PrevCheckPointDistance / 1024.0), (int) (CheckPointDistanceEstimate / 1024.0)); +} + +/* + * Update the estimate of distance between checkpoints. + * + * The estimate is maintained for calculating the number of WAL segments to + * keep preallocated, see XLOGFileSlop(). + */ +static void +UpdateCheckPointDistanceEstimate(uint64 nbytes) +{ + /* + * To estimate the number of segments consumed between checkpoints, keep + * a moving average of the actual number of segments consumed in previous + * checkpoint cycles. However, if the load is bursty, with quiet periods and + * busy periods, we want to cater for the peak load. So instead of a plain + * moving average, we let the average decline slowly if the previous cycle + * used less segments than estimated, but increase it immediately if it + * used more. + * + * When checkpoints are triggered by checkpoint_segments, this should + * converge to (1.0 + checkpoint_completion_target) * CheckpointSegments, + * + * XXX should we differentiate between explicitly triggered checkpoints, + * and others? The slow-decline will largely mask them out, if they only + * happen every now and then. If they are frequent, maybe the estimate + * really should count them in as any others; if you issue a manual + * checkpoint every 5 minutes and never let a timed checkpoint happen, it + * makes sense to base the preallocation on that 5 minute interval rather + * than whatever checkpoint_timeout is set to. + */ + PrevCheckPointDistance = nbytes; + if (CheckPointDistanceEstimate < nbytes) + CheckPointDistanceEstimate = nbytes; + else + CheckPointDistanceEstimate = + (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes); } /* @@ -6775,7 +6888,7 @@ CreateCheckPoint(int flags) XLogCtlInsert *Insert = &XLogCtl->Insert; XLogRecData rdata; uint32 freespace; - XLogSegNo _logSegNo; + XLogRecPtr PriorRedoPtr; VirtualTransactionId *vxids; int nvxids; @@ -7084,10 +7197,10 @@ CreateCheckPoint(int flags) (errmsg("concurrent transaction log activity while database system is shutting down"))); /* - * Select point at which we can truncate the log, which we base on the - * prior checkpoint's earliest info. + * Remember the prior checkpoint's redo pointer, used later to determine + * the point at which we can truncate the log. */ - XLByteToSeg(ControlFile->checkPointCopy.redo, _logSegNo); + PriorRedoPtr = ControlFile->checkPointCopy.redo; /* * Update the control file. @@ -7141,11 +7254,17 @@ CreateCheckPoint(int flags) * Delete old log files (those no longer needed even for previous * checkpoint or the standbys in XLOG streaming). */ - if (_logSegNo) + if (PriorRedoPtr != InvalidXLogRecPtr) { + XLogSegNo _logSegNo; + + /* Update the average distance between checkpoints. */ + UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr); + + XLByteToSeg(PriorRedoPtr, _logSegNo); KeepLogSeg(recptr, &_logSegNo); _logSegNo--; - RemoveOldXlogFiles(_logSegNo, recptr); + RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, recptr); } /* @@ -7333,7 +7452,7 @@ CreateRestartPoint(int flags) { XLogRecPtr lastCheckPointRecPtr; CheckPoint lastCheckPoint; - XLogSegNo _logSegNo; + XLogRecPtr PriorRedoPtr; TimestampTz xtime; /* use volatile pointer to prevent code rearrangement */ @@ -7429,10 +7548,10 @@ CreateRestartPoint(int flags) CheckPointGuts(lastCheckPoint.redo, flags); /* - * Select point at which we can truncate the xlog, which we base on the - * prior checkpoint's earliest info. + * Remember the prior checkpoint's redo pointer, used later to determine + * the point at which we can truncate the log. */ - XLByteToSeg(ControlFile->checkPointCopy.redo, _logSegNo); + PriorRedoPtr = ControlFile->checkPointCopy.redo; /* * Update pg_control, using current time. Check that it still shows @@ -7459,12 +7578,15 @@ CreateRestartPoint(int flags) * checkpoint/restartpoint) to prevent the disk holding the xlog from * growing full. */ - if (_logSegNo) + if (PriorRedoPtr != InvalidXLogRecPtr) { XLogRecPtr receivePtr; XLogRecPtr replayPtr; TimeLineID replayTLI; XLogRecPtr endptr; + XLogSegNo _logSegNo; + + XLByteToSeg(PriorRedoPtr, _logSegNo); /* * Get the current end of xlog replayed or received, whichever is @@ -7493,7 +7615,7 @@ CreateRestartPoint(int flags) if (RecoveryInProgress()) ThisTimeLineID = replayTLI; - RemoveOldXlogFiles(_logSegNo, endptr); + RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, endptr); /* * Make more log segments if needed. (Do this after recycling old log