diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c index ad4f3162c53852140fa1cb80902fcc4851eb206c..51b37ca8f8c2febf5ba4914a3d76c57e0d1ad4db 100644 --- a/src/backend/access/transam/timeline.c +++ b/src/backend/access/transam/timeline.c @@ -40,6 +40,28 @@ #include "access/xlogdefs.h" #include "storage/fd.h" +/* + * Copies all timeline history files with id's between 'begin' and 'end' + * from archive to pg_xlog. + */ +void +restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end) +{ + char path[MAXPGPATH]; + char histfname[MAXFNAMELEN]; + TimeLineID tli; + + for (tli = begin; tli < end; tli++) + { + if (tli == 1) + continue; + + TLHistoryFileName(histfname, tli); + if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false)) + KeepFileRestoredFromArchive(path, histfname); + } +} + /* * Try to read a timeline's history file. * diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 9ad92271795b5904887cf58d5843c5d6c0dcbf17..d316c97926553588bf05716d3ef59d170786b211 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -3276,8 +3276,8 @@ rescanLatestTimeLine(void) bool found; ListCell *cell; TimeLineID newtarget; + TimeLineID oldtarget = recoveryTargetTLI; TimeLineHistoryEntry *currentTle = NULL; - /* use volatile pointer to prevent code rearrangement */ newtarget = findNewestTimeLine(recoveryTargetTLI); if (newtarget == recoveryTargetTLI) @@ -3336,6 +3336,12 @@ rescanLatestTimeLine(void) list_free_deep(expectedTLEs); expectedTLEs = newExpectedTLEs; + /* + * As in StartupXLOG(), try to ensure we have all the history files + * between the old target and new target in pg_xlog. + */ + restoreTimeLineHistoryFiles(oldtarget + 1, newtarget); + ereport(LOG, (errmsg("new target timeline is %u", recoveryTargetTLI))); @@ -4993,6 +4999,20 @@ StartupXLOG(void) */ ThisTimeLineID = checkPoint.ThisTimeLineID; + /* + * Copy any missing timeline history files between 'now' and the + * recovery target timeline from archive to pg_xlog. While we don't need + * those files ourselves - the history file of the recovery target + * timeline covers all the previous timelines in the history too - a + * cascading standby server might be interested in them. Or, if you + * archive the WAL from this server to a different archive than the + * master, it'd be good for all the history files to get archived there + * after failover, so that you can use one of the old timelines as a + * PITR target. Timeline history files are small, so it's better to copy + * them unnecessarily than not copy them and regret later. + */ + restoreTimeLineHistoryFiles(ThisTimeLineID, recoveryTargetTLI); + lastFullPageWrites = checkPoint.fullPageWrites; RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index ba138e73da387a6489ed8cbf26fe3950cce9aa60..10e40506965fef3a9b2208b00fbbda26c2fd2e64 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -110,6 +110,9 @@ static int sendFile = -1; static XLogSegNo sendSegNo = 0; static uint32 sendOff = 0; +/* Timeline ID of the currently open file */ +static TimeLineID curFileTimeLine = 0; + /* * These variables keep track of the state of the timeline we're currently * sending. sendTimeLine identifies the timeline. If sendTimeLineIsHistoric, @@ -1201,8 +1204,8 @@ WalSndKill(int code, Datum arg) * always be one descriptor left open until the process ends, but never * more than one. */ -void -XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) +static void +XLogRead(char *buf, XLogRecPtr startptr, Size count) { char *p; XLogRecPtr recptr; @@ -1222,7 +1225,7 @@ retry: startoff = recptr % XLogSegSize; - if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo) || sendTimeLine != tli) + if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo)) { char path[MAXPGPATH]; @@ -1230,9 +1233,45 @@ retry: if (sendFile >= 0) close(sendFile); - sendTimeLine = tli; XLByteToSeg(recptr, sendSegNo); - XLogFilePath(path, sendTimeLine, sendSegNo); + + /*------- + * When reading from a historic timeline, and there is a timeline + * switch within this segment, read from the WAL segment belonging + * to the new timeline. + * + * For example, imagine that this server is currently on timeline + * 5, and we're streaming timeline 4. The switch from timeline 4 + * to 5 happened at 0/13002088. In pg_xlog, we have these files: + * + * ... + * 000000040000000000000012 + * 000000040000000000000013 + * 000000050000000000000013 + * 000000050000000000000014 + * ... + * + * In this situation, when requested to send the WAL from + * segment 0x13, on timeline 4, we read the WAL from file + * 000000050000000000000013. Archive recovery prefers files from + * newer timelines, so if the segment was restored from the + * archive on this server, the file belonging to the old timeline, + * 000000040000000000000013, might not exist. Their contents are + * equal up to the switchpoint, because at a timeline switch, the + * used portion of the old segment is copied to the new file. + *------- + */ + curFileTimeLine = sendTimeLine; + if (sendTimeLineIsHistoric) + { + XLogSegNo endSegNo; + + XLByteToSeg(sendTimeLineValidUpto, endSegNo); + if (sendSegNo == endSegNo) + curFileTimeLine = sendTimeLineNextTLI; + } + + XLogFilePath(path, curFileTimeLine, sendSegNo); sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); if (sendFile < 0) @@ -1246,7 +1285,7 @@ retry: ereport(ERROR, (errcode_for_file_access(), errmsg("requested WAL segment %s has already been removed", - XLogFileNameP(sendTimeLine, sendSegNo)))); + XLogFileNameP(curFileTimeLine, sendSegNo)))); else ereport(ERROR, (errcode_for_file_access(), @@ -1263,7 +1302,7 @@ retry: ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in log segment %s to offset %u: %m", - XLogFileNameP(sendTimeLine, sendSegNo), + XLogFileNameP(curFileTimeLine, sendSegNo), startoff))); sendOff = startoff; } @@ -1280,7 +1319,7 @@ retry: ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from log segment %s, offset %u, length %lu: %m", - XLogFileNameP(sendTimeLine, sendSegNo), + XLogFileNameP(curFileTimeLine, sendSegNo), sendOff, (unsigned long) segbytes))); } @@ -1524,7 +1563,7 @@ XLogSend(bool *caughtup) * calls. */ enlargeStringInfo(&output_message, nbytes); - XLogRead(&output_message.data[output_message.len], sendTimeLine, startptr, nbytes); + XLogRead(&output_message.data[output_message.len], startptr, nbytes); output_message.len += nbytes; output_message.data[output_message.len] = '\0'; diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h index 7d45fcad8a4379f101d98cc69f8348c85936dbb2..2e5e9a42a386d5aebe4e8cc262333280b9ee1e81 100644 --- a/src/include/access/timeline.h +++ b/src/include/access/timeline.h @@ -35,6 +35,7 @@ extern TimeLineID findNewestTimeLine(TimeLineID startTLI); extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason); extern void writeTimeLineHistoryFile(TimeLineID tli, char *content, int size); +extern void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end); extern bool tliInHistory(TimeLineID tli, List *expectedTLIs); extern TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history); extern XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history, diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h index 8f479fda7e5b91158dd097007cdd43f8b876975c..7eaa21b9f7e6eb8ce02a96d1d35b0e916356dc9f 100644 --- a/src/include/replication/walsender_private.h +++ b/src/include/replication/walsender_private.h @@ -95,7 +95,6 @@ extern WalSndCtlData *WalSndCtl; extern void WalSndSetState(WalSndState state); -extern void XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count); /* * Internal functions for parsing the replication grammar, in repl_gram.y and