diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index f4083c3fe570ebac9a6d172854c86fb0809603f3..e3941c9391b7fe4f277e673e7a2f31ba7a168714 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -590,7 +590,11 @@ A restartpoint is triggered when a checkpoint record is reached if at least <varname>checkpoint_timeout</> seconds have passed since the last restartpoint, or if WAL size is about to exceed - <varname>max_wal_size</>. + <varname>max_wal_size</>. However, because of limitations on when a + restartpoint can be performed, <varname>max_wal_size</> is often exceeded + during recovery, by up to one checkpoint cycle's worth of WAL. + (<varname>max_wal_size</> is never a hard limit anyway, so you should + always leave plenty of headroom to avoid running out of disk space.) </para> <para> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7830b47c8d1cb27ff6c55a73e0e4d4f112a48144..0def47d6ed5bb71e75c5d3b341d96f986082e7f0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10943,7 +10943,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, * Request a restartpoint if we've replayed too much xlog since the * last one. */ - if (StandbyModeRequested && bgwriterLaunched) + if (bgwriterLaunched) { if (XLogCheckpointNeeded(readSegNo)) { diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 0dce6a8ffaa3f23a88bc55b22c0cbc98fff13268..3b3a09ef8860b0497a415cec0ca1d823aaa78841 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -475,10 +475,12 @@ CheckpointerMain(void) /* * Initialize checkpointer-private variables used during - * checkpoint + * checkpoint. */ ckpt_active = true; - if (!do_restartpoint) + if (do_restartpoint) + ckpt_start_recptr = GetXLogReplayRecPtr(NULL); + else ckpt_start_recptr = GetInsertRecPtr(); ckpt_start_time = now; ckpt_cached_elapsed = 0; @@ -720,7 +722,7 @@ CheckpointWriteDelay(int flags, double progress) /* * IsCheckpointOnSchedule -- are we on schedule to finish this checkpoint - * in time? + * (or restartpoint) in time? * * Compares the current progress against the time/segments elapsed since last * checkpoint, and returns true if the progress we've made this far is greater @@ -757,17 +759,27 @@ IsCheckpointOnSchedule(double progress) * compares against RedoRecptr, so this is not completely accurate. * However, it's good enough for our purposes, we're only calculating an * estimate anyway. + * + * During recovery, we compare last replayed WAL record's location with + * the location computed before calling CreateRestartPoint. That maintains + * the same pacing as we have during checkpoints in normal operation, but + * we might exceed max_wal_size by a fair amount. That's because there can + * be a large gap between a checkpoint's redo-pointer and the checkpoint + * record itself, and we only start the restartpoint after we've seen the + * checkpoint record. (The gap is typically up to CheckPointSegments * + * checkpoint_completion_target where checkpoint_completion_target is the + * value that was in effect when the WAL was generated). */ - if (!RecoveryInProgress()) - { + if (RecoveryInProgress()) + recptr = GetXLogReplayRecPtr(NULL); + else recptr = GetInsertRecPtr(); - elapsed_xlogs = (((double) (recptr - ckpt_start_recptr)) / XLogSegSize) / CheckPointSegments; + elapsed_xlogs = (((double) (recptr - ckpt_start_recptr)) / XLogSegSize) / CheckPointSegments; - if (progress < elapsed_xlogs) - { - ckpt_cached_elapsed = elapsed_xlogs; - return false; - } + if (progress < elapsed_xlogs) + { + ckpt_cached_elapsed = elapsed_xlogs; + return false; } /*