From d5497b95f3ca2fc50c6eef46d3394ab6e6855956 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas <heikki.linnakangas@iki.fi> Date: Tue, 2 Oct 2012 13:37:19 +0300 Subject: [PATCH] Split off functions related to timeline history files and XLOG archiving. This is just refactoring, to make the functions accessible outside xlog.c. A followup patch will make use of that, to allow fetching timeline history files over streaming replication. --- src/backend/access/transam/Makefile | 3 +- src/backend/access/transam/timeline.c | 378 +++++++++ src/backend/access/transam/xlog.c | 988 ++--------------------- src/backend/access/transam/xlogarchive.c | 572 +++++++++++++ src/include/access/timeline.h | 23 + src/include/access/xlog_internal.h | 23 + 6 files changed, 1058 insertions(+), 929 deletions(-) create mode 100644 src/backend/access/transam/timeline.c create mode 100644 src/backend/access/transam/xlogarchive.c create mode 100644 src/include/access/timeline.h diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile index f82f10e989e..700cfd85c08 100644 --- a/src/backend/access/transam/Makefile +++ b/src/backend/access/transam/Makefile @@ -13,7 +13,8 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = clog.o transam.o varsup.o xact.o rmgr.o slru.o subtrans.o multixact.o \ - twophase.o twophase_rmgr.o xlog.o xlogfuncs.o xlogutils.o + timeline.o twophase.o twophase_rmgr.o xlog.o xlogarchive.o xlogfuncs.o \ + xlogutils.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c new file mode 100644 index 00000000000..89d31c5a1bf --- /dev/null +++ b/src/backend/access/transam/timeline.c @@ -0,0 +1,378 @@ +/*------------------------------------------------------------------------- + * + * timeline.c + * Functions for reading and writing timeline history files. + * + * A timeline history file lists the timeline changes of the timeline, in + * a simple text format. They are archived along with the WAL segments. + * + * The files are named like "<WAL segment>.history". For example, if the + * database starts up and switches to timeline 5, while processing WAL + * segment 000000030000002A00000006 (the old timeline was 3), the timeline + * history file would be called "000000050000002A00000006.history". + * + * Each line in the file represents a timeline switch: + * + * <parentTLI> <xlogfname> <reason> + * + * parentTLI ID of the parent timeline + * xlogfname filename of the WAL segment where the switch happened + * reason human-readable explanation of why the timeline was changed + * + * The fields are separated by tabs. Lines beginning with # are comments, and + * are ignored. Empty lines are also ignored. + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/transam/timeline.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <stdio.h> +#include <unistd.h> + +#include "access/timeline.h" +#include "access/xlog_internal.h" +#include "access/xlogdefs.h" +#include "storage/fd.h" + +/* + * Try to read a timeline's history file. + * + * If successful, return the list of component TLIs (the given TLI followed by + * its ancestor TLIs). If we can't find the history file, assume that the + * timeline has no parents, and return a list of just the specified timeline + * ID. + */ +List * +readTimeLineHistory(TimeLineID targetTLI) +{ + List *result; + char path[MAXPGPATH]; + char histfname[MAXFNAMELEN]; + char fline[MAXPGPATH]; + FILE *fd; + + /* Timeline 1 does not have a history file, so no need to check */ + if (targetTLI == 1) + return list_make1_int((int) targetTLI); + + if (InArchiveRecovery) + { + TLHistoryFileName(histfname, targetTLI); + RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + } + else + TLHistoryFilePath(path, targetTLI); + + fd = AllocateFile(path, "r"); + if (fd == NULL) + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", path))); + /* Not there, so assume no parents */ + return list_make1_int((int) targetTLI); + } + + result = NIL; + + /* + * Parse the file... + */ + while (fgets(fline, sizeof(fline), fd) != NULL) + { + /* skip leading whitespace and check for # comment */ + char *ptr; + char *endptr; + TimeLineID tli; + + for (ptr = fline; *ptr; ptr++) + { + if (!isspace((unsigned char) *ptr)) + break; + } + if (*ptr == '\0' || *ptr == '#') + continue; + + /* expect a numeric timeline ID as first field of line */ + tli = (TimeLineID) strtoul(ptr, &endptr, 0); + if (endptr == ptr) + ereport(FATAL, + (errmsg("syntax error in history file: %s", fline), + errhint("Expected a numeric timeline ID."))); + + if (result && + tli <= (TimeLineID) linitial_int(result)) + ereport(FATAL, + (errmsg("invalid data in history file: %s", fline), + errhint("Timeline IDs must be in increasing sequence."))); + + /* Build list with newest item first */ + result = lcons_int((int) tli, result); + + /* we ignore the remainder of each line */ + } + + FreeFile(fd); + + if (result && + targetTLI <= (TimeLineID) linitial_int(result)) + ereport(FATAL, + (errmsg("invalid data in history file \"%s\"", path), + errhint("Timeline IDs must be less than child timeline's ID."))); + + result = lcons_int((int) targetTLI, result); + + ereport(DEBUG3, + (errmsg_internal("history of timeline %u is %s", + targetTLI, nodeToString(result)))); + + return result; +} + +/* + * Probe whether a timeline history file exists for the given timeline ID + */ +bool +existsTimeLineHistory(TimeLineID probeTLI) +{ + char path[MAXPGPATH]; + char histfname[MAXFNAMELEN]; + FILE *fd; + + /* Timeline 1 does not have a history file, so no need to check */ + if (probeTLI == 1) + return false; + + if (InArchiveRecovery) + { + TLHistoryFileName(histfname, probeTLI); + RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + } + else + TLHistoryFilePath(path, probeTLI); + + fd = AllocateFile(path, "r"); + if (fd != NULL) + { + FreeFile(fd); + return true; + } + else + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", path))); + return false; + } +} + +/* + * Find the newest existing timeline, assuming that startTLI exists. + * + * Note: while this is somewhat heuristic, it does positively guarantee + * that (result + 1) is not a known timeline, and therefore it should + * be safe to assign that ID to a new timeline. + */ +TimeLineID +findNewestTimeLine(TimeLineID startTLI) +{ + TimeLineID newestTLI; + TimeLineID probeTLI; + + /* + * The algorithm is just to probe for the existence of timeline history + * files. XXX is it useful to allow gaps in the sequence? + */ + newestTLI = startTLI; + + for (probeTLI = startTLI + 1;; probeTLI++) + { + if (existsTimeLineHistory(probeTLI)) + { + newestTLI = probeTLI; /* probeTLI exists */ + } + else + { + /* doesn't exist, assume we're done */ + break; + } + } + + return newestTLI; +} + +/* + * Create a new timeline history file. + * + * newTLI: ID of the new timeline + * parentTLI: ID of its immediate parent + * endTLI et al: ID of the last used WAL file, for annotation purposes + * reason: human-readable explanation of why the timeline was switched + * + * Currently this is only used at the end recovery, and so there are no locking + * considerations. But we should be just as tense as XLogFileInit to avoid + * emplacing a bogus file. + */ +void +writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, + TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason) +{ + char path[MAXPGPATH]; + char tmppath[MAXPGPATH]; + char histfname[MAXFNAMELEN]; + char xlogfname[MAXFNAMELEN]; + char buffer[BLCKSZ]; + int srcfd; + int fd; + int nbytes; + + Assert(newTLI > parentTLI); /* else bad selection of newTLI */ + + /* + * Write into a temp file name. + */ + snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid()); + + unlink(tmppath); + + /* do not use get_sync_bit() here --- want to fsync only at end of fill */ + fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL, + S_IRUSR | S_IWUSR); + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", tmppath))); + + /* + * If a history file exists for the parent, copy it verbatim + */ + if (InArchiveRecovery) + { + TLHistoryFileName(histfname, parentTLI); + RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + } + else + TLHistoryFilePath(path, parentTLI); + + srcfd = BasicOpenFile(path, O_RDONLY, 0); + if (srcfd < 0) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", path))); + /* Not there, so assume parent has no parents */ + } + else + { + for (;;) + { + errno = 0; + nbytes = (int) read(srcfd, buffer, sizeof(buffer)); + if (nbytes < 0 || errno != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", path))); + if (nbytes == 0) + break; + errno = 0; + if ((int) write(fd, buffer, nbytes) != nbytes) + { + int save_errno = errno; + + /* + * If we fail to make the file, delete it to release disk + * space + */ + unlink(tmppath); + + /* + * if write didn't set errno, assume problem is no disk space + */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); + } + } + close(srcfd); + } + + /* + * Append one line with the details of this timeline split. + * + * If we did have a parent file, insert an extra newline just in case the + * parent file failed to end with one. + */ + XLogFileName(xlogfname, endTLI, endLogSegNo); + + snprintf(buffer, sizeof(buffer), + "%s%u\t%s\t%s\n", + (srcfd < 0) ? "" : "\n", + parentTLI, + xlogfname, + reason); + + nbytes = strlen(buffer); + errno = 0; + if ((int) write(fd, buffer, nbytes) != nbytes) + { + int save_errno = errno; + + /* + * If we fail to make the file, delete it to release disk space + */ + unlink(tmppath); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); + } + + if (pg_fsync(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", tmppath))); + + if (close(fd)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", tmppath))); + + + /* + * Now move the completed history file into place with its final name. + */ + TLHistoryFilePath(path, newTLI); + + /* + * Prefer link() to rename() here just to be really sure that we don't + * overwrite an existing logfile. However, there shouldn't be one, so + * rename() is an acceptable substitute except for the truly paranoid. + */ +#if HAVE_WORKING_LINK + if (link(tmppath, path) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not link file \"%s\" to \"%s\": %m", + tmppath, path))); + unlink(tmppath); +#else + if (rename(tmppath, path) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + tmppath, path))); +#endif +} diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ff56c26ab4c..b3519b04b33 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -26,6 +26,7 @@ #include "access/clog.h" #include "access/multixact.h" #include "access/subtrans.h" +#include "access/timeline.h" #include "access/transam.h" #include "access/tuptoaster.h" #include "access/twophase.h" @@ -190,14 +191,14 @@ static bool LocalHotStandbyActive = false; */ static int LocalXLogInsertAllowed = -1; -/* Are we recovering using offline XLOG archives? */ -static bool InArchiveRecovery = false; +/* Are we recovering using offline XLOG archives? (only valid in the startup process) */ +bool InArchiveRecovery = false; /* Was the last xlog file restored from archive, or local? */ static bool restoredFromArchive = false; /* options taken from recovery.conf for archive recovery */ -static char *recoveryRestoreCommand = NULL; +char *recoveryRestoreCommand = NULL; static char *recoveryEndCommand = NULL; static char *archiveCleanupCommand = NULL; static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET; @@ -208,7 +209,7 @@ static TimestampTz recoveryTargetTime; static char *recoveryTargetName; /* options taken from recovery.conf for XLOG streaming */ -static bool StandbyMode = false; +bool StandbyMode = false; static char *PrimaryConnInfo = NULL; static char *TriggerFile = NULL; @@ -604,11 +605,6 @@ typedef struct xl_restore_point } xl_restore_point; -static void XLogArchiveNotify(const char *xlog); -static void XLogArchiveNotifySeg(XLogSegNo segno); -static bool XLogArchiveCheckDone(const char *xlog); -static bool XLogArchiveIsBusy(const char *xlog); -static void XLogArchiveCleanup(const char *xlog); static void readRecoveryCommandFile(void); static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo); static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); @@ -636,10 +632,6 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, bool randAccess); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); -static bool RestoreArchivedFile(char *path, const char *xlogfname, - const char *recovername, off_t expectedSize); -static void ExecuteRecoveryCommand(char *command, char *commandName, - bool failOnerror); static void PreallocXlogFiles(XLogRecPtr endptr); static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr); static void UpdateLastRemovedPtr(char *filename); @@ -652,12 +644,7 @@ static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode); static bool ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode, bool randAccess); static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt); -static List *readTimeLineHistory(TimeLineID targetTLI); -static bool existsTimeLineHistory(TimeLineID probeTLI); static bool rescanLatestTimeLine(void); -static TimeLineID findNewestTimeLine(TimeLineID startTLI); -static void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, - TimeLineID endTLI, XLogSegNo endLogSegNo); static void WriteControlFile(void); static void ReadControlFile(void); static char *str_time(pg_time_t tnow); @@ -1278,168 +1265,6 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites, return false; /* buffer does not need to be backed up */ } -/* - * XLogArchiveNotify - * - * Create an archive notification file - * - * The name of the notification file is the message that will be picked up - * by the archiver, e.g. we write 0000000100000001000000C6.ready - * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6, - * then when complete, rename it to 0000000100000001000000C6.done - */ -static void -XLogArchiveNotify(const char *xlog) -{ - char archiveStatusPath[MAXPGPATH]; - FILE *fd; - - /* insert an otherwise empty file called <XLOG>.ready */ - StatusFilePath(archiveStatusPath, xlog, ".ready"); - fd = AllocateFile(archiveStatusPath, "w"); - if (fd == NULL) - { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not create archive status file \"%s\": %m", - archiveStatusPath))); - return; - } - if (FreeFile(fd)) - { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not write archive status file \"%s\": %m", - archiveStatusPath))); - return; - } - - /* Notify archiver that it's got something to do */ - if (IsUnderPostmaster) - SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER); -} - -/* - * Convenience routine to notify using segment number representation of filename - */ -static void -XLogArchiveNotifySeg(XLogSegNo segno) -{ - char xlog[MAXFNAMELEN]; - - XLogFileName(xlog, ThisTimeLineID, segno); - XLogArchiveNotify(xlog); -} - -/* - * XLogArchiveCheckDone - * - * This is called when we are ready to delete or recycle an old XLOG segment - * file or backup history file. If it is okay to delete it then return true. - * If it is not time to delete it, make sure a .ready file exists, and return - * false. - * - * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists, - * then return false; else create <XLOG>.ready and return false. - * - * The reason we do things this way is so that if the original attempt to - * create <XLOG>.ready fails, we'll retry during subsequent checkpoints. - */ -static bool -XLogArchiveCheckDone(const char *xlog) -{ - char archiveStatusPath[MAXPGPATH]; - struct stat stat_buf; - - /* Always deletable if archiving is off */ - if (!XLogArchivingActive()) - return true; - - /* First check for .done --- this means archiver is done with it */ - StatusFilePath(archiveStatusPath, xlog, ".done"); - if (stat(archiveStatusPath, &stat_buf) == 0) - return true; - - /* check for .ready --- this means archiver is still busy with it */ - StatusFilePath(archiveStatusPath, xlog, ".ready"); - if (stat(archiveStatusPath, &stat_buf) == 0) - return false; - - /* Race condition --- maybe archiver just finished, so recheck */ - StatusFilePath(archiveStatusPath, xlog, ".done"); - if (stat(archiveStatusPath, &stat_buf) == 0) - return true; - - /* Retry creation of the .ready file */ - XLogArchiveNotify(xlog); - return false; -} - -/* - * XLogArchiveIsBusy - * - * Check to see if an XLOG segment file is still unarchived. - * This is almost but not quite the inverse of XLogArchiveCheckDone: in - * the first place we aren't chartered to recreate the .ready file, and - * in the second place we should consider that if the file is already gone - * then it's not busy. (This check is needed to handle the race condition - * that a checkpoint already deleted the no-longer-needed file.) - */ -static bool -XLogArchiveIsBusy(const char *xlog) -{ - char archiveStatusPath[MAXPGPATH]; - struct stat stat_buf; - - /* First check for .done --- this means archiver is done with it */ - StatusFilePath(archiveStatusPath, xlog, ".done"); - if (stat(archiveStatusPath, &stat_buf) == 0) - return false; - - /* check for .ready --- this means archiver is still busy with it */ - StatusFilePath(archiveStatusPath, xlog, ".ready"); - if (stat(archiveStatusPath, &stat_buf) == 0) - return true; - - /* Race condition --- maybe archiver just finished, so recheck */ - StatusFilePath(archiveStatusPath, xlog, ".done"); - if (stat(archiveStatusPath, &stat_buf) == 0) - return false; - - /* - * Check to see if the WAL file has been removed by checkpoint, which - * implies it has already been archived, and explains why we can't see a - * status file for it. - */ - snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog); - if (stat(archiveStatusPath, &stat_buf) != 0 && - errno == ENOENT) - return false; - - return true; -} - -/* - * XLogArchiveCleanup - * - * Cleanup archive notification file(s) for a particular xlog segment - */ -static void -XLogArchiveCleanup(const char *xlog) -{ - char archiveStatusPath[MAXPGPATH]; - - /* Remove the .done file */ - StatusFilePath(archiveStatusPath, xlog, ".done"); - unlink(archiveStatusPath); - /* should we complain about failure? */ - - /* Remove the .ready file if present --- normally it shouldn't be */ - StatusFilePath(archiveStatusPath, xlog, ".ready"); - unlink(archiveStatusPath); - /* should we complain about failure? */ -} - /* * Advance the Insert state to the next buffer page, writing out the next * buffer if it still contains unwritten data. @@ -2954,386 +2779,6 @@ XLogFileClose(void) openLogFile = -1; } -/* - * Attempt to retrieve the specified file from off-line archival storage. - * If successful, fill "path" with its complete path (note that this will be - * a temp file name that doesn't follow the normal naming convention), and - * return TRUE. - * - * If not successful, fill "path" with the name of the normal on-line file - * (which may or may not actually exist, but we'll try to use it), and return - * FALSE. - * - * For fixed-size files, the caller may pass the expected size as an - * additional crosscheck on successful recovery. If the file size is not - * known, set expectedSize = 0. - */ -static bool -RestoreArchivedFile(char *path, const char *xlogfname, - const char *recovername, off_t expectedSize) -{ - char xlogpath[MAXPGPATH]; - char xlogRestoreCmd[MAXPGPATH]; - char lastRestartPointFname[MAXPGPATH]; - char *dp; - char *endp; - const char *sp; - int rc; - bool signaled; - struct stat stat_buf; - XLogSegNo restartSegNo; - - /* In standby mode, restore_command might not be supplied */ - if (recoveryRestoreCommand == NULL) - goto not_available; - - /* - * When doing archive recovery, we always prefer an archived log file even - * if a file of the same name exists in XLOGDIR. The reason is that the - * file in XLOGDIR could be an old, un-filled or partly-filled version - * that was copied and restored as part of backing up $PGDATA. - * - * We could try to optimize this slightly by checking the local copy - * lastchange timestamp against the archived copy, but we have no API to - * do this, nor can we guarantee that the lastchange timestamp was - * preserved correctly when we copied to archive. Our aim is robustness, - * so we elect not to do this. - * - * If we cannot obtain the log file from the archive, however, we will try - * to use the XLOGDIR file if it exists. This is so that we can make use - * of log segments that weren't yet transferred to the archive. - * - * Notice that we don't actually overwrite any files when we copy back - * from archive because the recoveryRestoreCommand may inadvertently - * restore inappropriate xlogs, or they may be corrupt, so we may wish to - * fallback to the segments remaining in current XLOGDIR later. The - * copy-from-archive filename is always the same, ensuring that we don't - * run out of disk space on long recoveries. - */ - snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername); - - /* - * Make sure there is no existing file named recovername. - */ - if (stat(xlogpath, &stat_buf) != 0) - { - if (errno != ENOENT) - ereport(FATAL, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - xlogpath))); - } - else - { - if (unlink(xlogpath) != 0) - ereport(FATAL, - (errcode_for_file_access(), - errmsg("could not remove file \"%s\": %m", - xlogpath))); - } - - /* - * Calculate the archive file cutoff point for use during log shipping - * replication. All files earlier than this point can be deleted from the - * archive, though there is no requirement to do so. - * - * We initialise this with the filename of an InvalidXLogRecPtr, which - * will prevent the deletion of any WAL files from the archive because of - * the alphabetic sorting property of WAL filenames. - * - * Once we have successfully located the redo pointer of the checkpoint - * from which we start recovery we never request a file prior to the redo - * pointer of the last restartpoint. When redo begins we know that we have - * successfully located it, so there is no need for additional status - * flags to signify the point when we can begin deleting WAL files from - * the archive. - */ - if (InRedo) - { - XLByteToSeg(ControlFile->checkPointCopy.redo, restartSegNo); - XLogFileName(lastRestartPointFname, - ControlFile->checkPointCopy.ThisTimeLineID, - restartSegNo); - /* we shouldn't need anything earlier than last restart point */ - Assert(strcmp(lastRestartPointFname, xlogfname) <= 0); - } - else - XLogFileName(lastRestartPointFname, 0, 0L); - - /* - * construct the command to be executed - */ - dp = xlogRestoreCmd; - endp = xlogRestoreCmd + MAXPGPATH - 1; - *endp = '\0'; - - for (sp = recoveryRestoreCommand; *sp; sp++) - { - if (*sp == '%') - { - switch (sp[1]) - { - case 'p': - /* %p: relative path of target file */ - sp++; - StrNCpy(dp, xlogpath, endp - dp); - make_native_path(dp); - dp += strlen(dp); - break; - case 'f': - /* %f: filename of desired file */ - sp++; - StrNCpy(dp, xlogfname, endp - dp); - dp += strlen(dp); - break; - case 'r': - /* %r: filename of last restartpoint */ - sp++; - StrNCpy(dp, lastRestartPointFname, endp - dp); - dp += strlen(dp); - break; - case '%': - /* convert %% to a single % */ - sp++; - if (dp < endp) - *dp++ = *sp; - break; - default: - /* otherwise treat the % as not special */ - if (dp < endp) - *dp++ = *sp; - break; - } - } - else - { - if (dp < endp) - *dp++ = *sp; - } - } - *dp = '\0'; - - ereport(DEBUG3, - (errmsg_internal("executing restore command \"%s\"", - xlogRestoreCmd))); - - /* - * Check signals before restore command and reset afterwards. - */ - PreRestoreCommand(); - - /* - * Copy xlog from archival storage to XLOGDIR - */ - rc = system(xlogRestoreCmd); - - PostRestoreCommand(); - - if (rc == 0) - { - /* - * command apparently succeeded, but let's make sure the file is - * really there now and has the correct size. - */ - if (stat(xlogpath, &stat_buf) == 0) - { - if (expectedSize > 0 && stat_buf.st_size != expectedSize) - { - int elevel; - - /* - * If we find a partial file in standby mode, we assume it's - * because it's just being copied to the archive, and keep - * trying. - * - * Otherwise treat a wrong-sized file as FATAL to ensure the - * DBA would notice it, but is that too strong? We could try - * to plow ahead with a local copy of the file ... but the - * problem is that there probably isn't one, and we'd - * incorrectly conclude we've reached the end of WAL and we're - * done recovering ... - */ - if (StandbyMode && stat_buf.st_size < expectedSize) - elevel = DEBUG1; - else - elevel = FATAL; - ereport(elevel, - (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu", - xlogfname, - (unsigned long) stat_buf.st_size, - (unsigned long) expectedSize))); - return false; - } - else - { - ereport(LOG, - (errmsg("restored log file \"%s\" from archive", - xlogfname))); - strcpy(path, xlogpath); - return true; - } - } - else - { - /* stat failed */ - if (errno != ENOENT) - ereport(FATAL, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - xlogpath))); - } - } - - /* - * Remember, we rollforward UNTIL the restore fails so failure here is - * just part of the process... that makes it difficult to determine - * whether the restore failed because there isn't an archive to restore, - * or because the administrator has specified the restore program - * incorrectly. We have to assume the former. - * - * However, if the failure was due to any sort of signal, it's best to - * punt and abort recovery. (If we "return false" here, upper levels will - * assume that recovery is complete and start up the database!) It's - * essential to abort on child SIGINT and SIGQUIT, because per spec - * system() ignores SIGINT and SIGQUIT while waiting; if we see one of - * those it's a good bet we should have gotten it too. - * - * On SIGTERM, assume we have received a fast shutdown request, and exit - * cleanly. It's pure chance whether we receive the SIGTERM first, or the - * child process. If we receive it first, the signal handler will call - * proc_exit, otherwise we do it here. If we or the child process received - * SIGTERM for any other reason than a fast shutdown request, postmaster - * will perform an immediate shutdown when it sees us exiting - * unexpectedly. - * - * Per the Single Unix Spec, shells report exit status > 128 when a called - * command died on a signal. Also, 126 and 127 are used to report - * problems such as an unfindable command; treat those as fatal errors - * too. - */ - if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) - proc_exit(1); - - signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; - - ereport(signaled ? FATAL : DEBUG2, - (errmsg("could not restore file \"%s\" from archive: return code %d", - xlogfname, rc))); - -not_available: - - /* - * if an archived file is not available, there might still be a version of - * this file in XLOGDIR, so return that as the filename to open. - * - * In many recovery scenarios we expect this to fail also, but if so that - * just means we've reached the end of WAL. - */ - snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname); - return false; -} - -/* - * Attempt to execute an external shell command during recovery. - * - * 'command' is the shell command to be executed, 'commandName' is a - * human-readable name describing the command emitted in the logs. If - * 'failOnSignal' is true and the command is killed by a signal, a FATAL - * error is thrown. Otherwise a WARNING is emitted. - * - * This is currently used for recovery_end_command and archive_cleanup_command. - */ -static void -ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal) -{ - char xlogRecoveryCmd[MAXPGPATH]; - char lastRestartPointFname[MAXPGPATH]; - char *dp; - char *endp; - const char *sp; - int rc; - bool signaled; - XLogSegNo restartSegNo; - - Assert(command && commandName); - - /* - * Calculate the archive file cutoff point for use during log shipping - * replication. All files earlier than this point can be deleted from the - * archive, though there is no requirement to do so. - */ - LWLockAcquire(ControlFileLock, LW_SHARED); - XLByteToSeg(ControlFile->checkPointCopy.redo, restartSegNo); - XLogFileName(lastRestartPointFname, - ControlFile->checkPointCopy.ThisTimeLineID, - restartSegNo); - LWLockRelease(ControlFileLock); - - /* - * construct the command to be executed - */ - dp = xlogRecoveryCmd; - endp = xlogRecoveryCmd + MAXPGPATH - 1; - *endp = '\0'; - - for (sp = command; *sp; sp++) - { - if (*sp == '%') - { - switch (sp[1]) - { - case 'r': - /* %r: filename of last restartpoint */ - sp++; - StrNCpy(dp, lastRestartPointFname, endp - dp); - dp += strlen(dp); - break; - case '%': - /* convert %% to a single % */ - sp++; - if (dp < endp) - *dp++ = *sp; - break; - default: - /* otherwise treat the % as not special */ - if (dp < endp) - *dp++ = *sp; - break; - } - } - else - { - if (dp < endp) - *dp++ = *sp; - } - } - *dp = '\0'; - - ereport(DEBUG3, - (errmsg_internal("executing %s \"%s\"", commandName, command))); - - /* - * execute the constructed command - */ - rc = system(xlogRecoveryCmd); - if (rc != 0) - { - /* - * If the failure was due to any sort of signal, it's best to punt and - * abort recovery. See also detailed comments on signals in - * RestoreArchivedFile(). - */ - signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; - - ereport((signaled && failOnSignal) ? FATAL : WARNING, - /*------ - translator: First %s represents a recovery.conf parameter name like - "recovery_end_command", and the 2nd is the value of that parameter. */ - (errmsg("%s \"%s\": return code %d", commandName, - command, rc))); - } -} - /* * Preallocate log files beyond the specified log endpoint. * @@ -4304,140 +3749,6 @@ ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode, return true; } -/* - * Try to read a timeline's history file. - * - * If successful, return the list of component TLIs (the given TLI followed by - * its ancestor TLIs). If we can't find the history file, assume that the - * timeline has no parents, and return a list of just the specified timeline - * ID. - */ -static List * -readTimeLineHistory(TimeLineID targetTLI) -{ - List *result; - char path[MAXPGPATH]; - char histfname[MAXFNAMELEN]; - char fline[MAXPGPATH]; - FILE *fd; - - /* Timeline 1 does not have a history file, so no need to check */ - if (targetTLI == 1) - return list_make1_int((int) targetTLI); - - if (InArchiveRecovery) - { - TLHistoryFileName(histfname, targetTLI); - RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); - } - else - TLHistoryFilePath(path, targetTLI); - - fd = AllocateFile(path, "r"); - if (fd == NULL) - { - if (errno != ENOENT) - ereport(FATAL, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", path))); - /* Not there, so assume no parents */ - return list_make1_int((int) targetTLI); - } - - result = NIL; - - /* - * Parse the file... - */ - while (fgets(fline, sizeof(fline), fd) != NULL) - { - /* skip leading whitespace and check for # comment */ - char *ptr; - char *endptr; - TimeLineID tli; - - for (ptr = fline; *ptr; ptr++) - { - if (!isspace((unsigned char) *ptr)) - break; - } - if (*ptr == '\0' || *ptr == '#') - continue; - - /* expect a numeric timeline ID as first field of line */ - tli = (TimeLineID) strtoul(ptr, &endptr, 0); - if (endptr == ptr) - ereport(FATAL, - (errmsg("syntax error in history file: %s", fline), - errhint("Expected a numeric timeline ID."))); - - if (result && - tli <= (TimeLineID) linitial_int(result)) - ereport(FATAL, - (errmsg("invalid data in history file: %s", fline), - errhint("Timeline IDs must be in increasing sequence."))); - - /* Build list with newest item first */ - result = lcons_int((int) tli, result); - - /* we ignore the remainder of each line */ - } - - FreeFile(fd); - - if (result && - targetTLI <= (TimeLineID) linitial_int(result)) - ereport(FATAL, - (errmsg("invalid data in history file \"%s\"", path), - errhint("Timeline IDs must be less than child timeline's ID."))); - - result = lcons_int((int) targetTLI, result); - - ereport(DEBUG3, - (errmsg_internal("history of timeline %u is %s", - targetTLI, nodeToString(result)))); - - return result; -} - -/* - * Probe whether a timeline history file exists for the given timeline ID - */ -static bool -existsTimeLineHistory(TimeLineID probeTLI) -{ - char path[MAXPGPATH]; - char histfname[MAXFNAMELEN]; - FILE *fd; - - /* Timeline 1 does not have a history file, so no need to check */ - if (probeTLI == 1) - return false; - - if (InArchiveRecovery) - { - TLHistoryFileName(histfname, probeTLI); - RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); - } - else - TLHistoryFilePath(path, probeTLI); - - fd = AllocateFile(path, "r"); - if (fd != NULL) - { - FreeFile(fd); - return true; - } - else - { - if (errno != ENOENT) - ereport(FATAL, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", path))); - return false; - } -} - /* * Scan for new timelines that might have appeared in the archive since we * started recovery. @@ -4501,239 +3812,6 @@ rescanLatestTimeLine(void) return false; } -/* - * Find the newest existing timeline, assuming that startTLI exists. - * - * Note: while this is somewhat heuristic, it does positively guarantee - * that (result + 1) is not a known timeline, and therefore it should - * be safe to assign that ID to a new timeline. - */ -static TimeLineID -findNewestTimeLine(TimeLineID startTLI) -{ - TimeLineID newestTLI; - TimeLineID probeTLI; - - /* - * The algorithm is just to probe for the existence of timeline history - * files. XXX is it useful to allow gaps in the sequence? - */ - newestTLI = startTLI; - - for (probeTLI = startTLI + 1;; probeTLI++) - { - if (existsTimeLineHistory(probeTLI)) - { - newestTLI = probeTLI; /* probeTLI exists */ - } - else - { - /* doesn't exist, assume we're done */ - break; - } - } - - return newestTLI; -} - -/* - * Create a new timeline history file. - * - * newTLI: ID of the new timeline - * parentTLI: ID of its immediate parent - * endTLI et al: ID of the last used WAL file, for annotation purposes - * - * Currently this is only used during recovery, and so there are no locking - * considerations. But we should be just as tense as XLogFileInit to avoid - * emplacing a bogus file. - */ -static void -writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, - TimeLineID endTLI, XLogSegNo endLogSegNo) -{ - char path[MAXPGPATH]; - char tmppath[MAXPGPATH]; - char histfname[MAXFNAMELEN]; - char xlogfname[MAXFNAMELEN]; - char buffer[BLCKSZ]; - int srcfd; - int fd; - int nbytes; - - Assert(newTLI > parentTLI); /* else bad selection of newTLI */ - - /* - * Write into a temp file name. - */ - snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid()); - - unlink(tmppath); - - /* do not use get_sync_bit() here --- want to fsync only at end of fill */ - fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL, - S_IRUSR | S_IWUSR); - if (fd < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create file \"%s\": %m", tmppath))); - - /* - * If a history file exists for the parent, copy it verbatim - */ - if (InArchiveRecovery) - { - TLHistoryFileName(histfname, parentTLI); - RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); - } - else - TLHistoryFilePath(path, parentTLI); - - srcfd = BasicOpenFile(path, O_RDONLY, 0); - if (srcfd < 0) - { - if (errno != ENOENT) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", path))); - /* Not there, so assume parent has no parents */ - } - else - { - for (;;) - { - errno = 0; - nbytes = (int) read(srcfd, buffer, sizeof(buffer)); - if (nbytes < 0 || errno != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", path))); - if (nbytes == 0) - break; - errno = 0; - if ((int) write(fd, buffer, nbytes) != nbytes) - { - int save_errno = errno; - - /* - * If we fail to make the file, delete it to release disk - * space - */ - unlink(tmppath); - - /* - * if write didn't set errno, assume problem is no disk space - */ - errno = save_errno ? save_errno : ENOSPC; - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); - } - } - close(srcfd); - } - - /* - * Append one line with the details of this timeline split. - * - * If we did have a parent file, insert an extra newline just in case the - * parent file failed to end with one. - */ - XLogFileName(xlogfname, endTLI, endLogSegNo); - - /* - * Write comment to history file to explain why and where timeline - * changed. Comment varies according to the recovery target used. - */ - if (recoveryTarget == RECOVERY_TARGET_XID) - snprintf(buffer, sizeof(buffer), - "%s%u\t%s\t%s transaction %u\n", - (srcfd < 0) ? "" : "\n", - parentTLI, - xlogfname, - recoveryStopAfter ? "after" : "before", - recoveryStopXid); - else if (recoveryTarget == RECOVERY_TARGET_TIME) - snprintf(buffer, sizeof(buffer), - "%s%u\t%s\t%s %s\n", - (srcfd < 0) ? "" : "\n", - parentTLI, - xlogfname, - recoveryStopAfter ? "after" : "before", - timestamptz_to_str(recoveryStopTime)); - else if (recoveryTarget == RECOVERY_TARGET_NAME) - snprintf(buffer, sizeof(buffer), - "%s%u\t%s\tat restore point \"%s\"\n", - (srcfd < 0) ? "" : "\n", - parentTLI, - xlogfname, - recoveryStopName); - else - snprintf(buffer, sizeof(buffer), - "%s%u\t%s\tno recovery target specified\n", - (srcfd < 0) ? "" : "\n", - parentTLI, - xlogfname); - - nbytes = strlen(buffer); - errno = 0; - if ((int) write(fd, buffer, nbytes) != nbytes) - { - int save_errno = errno; - - /* - * If we fail to make the file, delete it to release disk space - */ - unlink(tmppath); - /* if write didn't set errno, assume problem is no disk space */ - errno = save_errno ? save_errno : ENOSPC; - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); - } - - if (pg_fsync(fd) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", tmppath))); - - if (close(fd)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not close file \"%s\": %m", tmppath))); - - - /* - * Now move the completed history file into place with its final name. - */ - TLHistoryFilePath(path, newTLI); - - /* - * Prefer link() to rename() here just to be really sure that we don't - * overwrite an existing logfile. However, there shouldn't be one, so - * rename() is an acceptable substitute except for the truly paranoid. - */ -#if HAVE_WORKING_LINK - if (link(tmppath, path) < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not link file \"%s\" to \"%s\": %m", - tmppath, path))); - unlink(tmppath); -#else - if (rename(tmppath, path) < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not rename file \"%s\" to \"%s\": %m", - tmppath, path))); -#endif - - /* The history file can be archived immediately. */ - TLHistoryFileName(histfname, newTLI); - XLogArchiveNotify(histfname); -} - /* * I/O routines for pg_control * @@ -6869,11 +5947,35 @@ StartupXLOG(void) */ if (InArchiveRecovery) { + char reason[200]; + ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1; ereport(LOG, (errmsg("selected new timeline ID: %u", ThisTimeLineID))); + + /* + * Write comment to history file to explain why and where timeline + * changed. Comment varies according to the recovery target used. + */ + if (recoveryTarget == RECOVERY_TARGET_XID) + snprintf(reason, sizeof(reason), + "%s transaction %u", + recoveryStopAfter ? "after" : "before", + recoveryStopXid); + else if (recoveryTarget == RECOVERY_TARGET_TIME) + snprintf(reason, sizeof(reason), + "%s %s\n", + recoveryStopAfter ? "after" : "before", + timestamptz_to_str(recoveryStopTime)); + else if (recoveryTarget == RECOVERY_TARGET_NAME) + snprintf(reason, sizeof(reason), + "at restore point \"%s\"", + recoveryStopName); + else + snprintf(reason, sizeof(reason), "no recovery target specified"); + writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI, - curFileTLI, endLogSegNo); + curFileTLI, endLogSegNo, reason); } /* Save the selected TimeLineID in shared memory, too */ @@ -9918,6 +9020,36 @@ GetXLogWriteRecPtr(void) return LogwrtResult.Write; } +/* + * Returns the redo pointer of the last restartpoint. This is the oldest + * point in WAL that we still need, if we have to restart recovery. Returns + * InvalidXLogRecPtr if we don't reliably know that point yet, that is, + * before we have started WAL redo. + * + * This function only works in the startup process, and only while we are + * in WAL redo. It's important to not return a value before redo has started, + * to avoid deleting WAL files that we might still need, but there's no + * fundamental reason why this couldn't return a valid value after redo has + * finished, or in other processes. This is enough for the current usage, + * however. + */ +void +GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli) +{ + if (InRedo) + { + LWLockAcquire(ControlFileLock, LW_SHARED); + *oldrecptr = ControlFile->checkPointCopy.redo; + *oldtli = ControlFile->checkPointCopy.ThisTimeLineID; + LWLockRelease(ControlFileLock); + } + else + { + *oldrecptr = InvalidXLogRecPtr; + *oldtli = 0; + } +} + /* * read_backup_label: check to see if a backup_label file is present * diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c new file mode 100644 index 00000000000..e72795783fc --- /dev/null +++ b/src/backend/access/transam/xlogarchive.c @@ -0,0 +1,572 @@ +/*------------------------------------------------------------------------- + * + * xlogarchive.c + * Functions for archiving WAL files and restoring from the archive. + * + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/transam/xlogarchive.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <signal.h> +#include <unistd.h> + +#include "access/xlog_internal.h" +#include "miscadmin.h" +#include "postmaster/startup.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/pmsignal.h" + +/* + * Attempt to retrieve the specified file from off-line archival storage. + * If successful, fill "path" with its complete path (note that this will be + * a temp file name that doesn't follow the normal naming convention), and + * return TRUE. + * + * If not successful, fill "path" with the name of the normal on-line file + * (which may or may not actually exist, but we'll try to use it), and return + * FALSE. + * + * For fixed-size files, the caller may pass the expected size as an + * additional crosscheck on successful recovery. If the file size is not + * known, set expectedSize = 0. + */ +bool +RestoreArchivedFile(char *path, const char *xlogfname, + const char *recovername, off_t expectedSize) +{ + char xlogpath[MAXPGPATH]; + char xlogRestoreCmd[MAXPGPATH]; + char lastRestartPointFname[MAXPGPATH]; + char *dp; + char *endp; + const char *sp; + int rc; + bool signaled; + struct stat stat_buf; + XLogSegNo restartSegNo; + XLogRecPtr restartRedoPtr; + TimeLineID restartTli; + + /* In standby mode, restore_command might not be supplied */ + if (recoveryRestoreCommand == NULL) + goto not_available; + + /* + * When doing archive recovery, we always prefer an archived log file even + * if a file of the same name exists in XLOGDIR. The reason is that the + * file in XLOGDIR could be an old, un-filled or partly-filled version + * that was copied and restored as part of backing up $PGDATA. + * + * We could try to optimize this slightly by checking the local copy + * lastchange timestamp against the archived copy, but we have no API to + * do this, nor can we guarantee that the lastchange timestamp was + * preserved correctly when we copied to archive. Our aim is robustness, + * so we elect not to do this. + * + * If we cannot obtain the log file from the archive, however, we will try + * to use the XLOGDIR file if it exists. This is so that we can make use + * of log segments that weren't yet transferred to the archive. + * + * Notice that we don't actually overwrite any files when we copy back + * from archive because the restore_command may inadvertently + * restore inappropriate xlogs, or they may be corrupt, so we may wish to + * fallback to the segments remaining in current XLOGDIR later. The + * copy-from-archive filename is always the same, ensuring that we don't + * run out of disk space on long recoveries. + */ + snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername); + + /* + * Make sure there is no existing file named recovername. + */ + if (stat(xlogpath, &stat_buf) != 0) + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + xlogpath))); + } + else + { + if (unlink(xlogpath) != 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", + xlogpath))); + } + + /* + * Calculate the archive file cutoff point for use during log shipping + * replication. All files earlier than this point can be deleted from the + * archive, though there is no requirement to do so. + * + * We initialise this with the filename of an InvalidXLogRecPtr, which + * will prevent the deletion of any WAL files from the archive because of + * the alphabetic sorting property of WAL filenames. + * + * Once we have successfully located the redo pointer of the checkpoint + * from which we start recovery we never request a file prior to the redo + * pointer of the last restartpoint. When redo begins we know that we have + * successfully located it, so there is no need for additional status + * flags to signify the point when we can begin deleting WAL files from + * the archive. + */ + GetOldestRestartPoint(&restartRedoPtr, &restartTli); + if (!XLogRecPtrIsInvalid(restartRedoPtr)) + { + XLByteToSeg(restartRedoPtr, restartSegNo); + XLogFileName(lastRestartPointFname, restartTli, restartSegNo); + /* we shouldn't need anything earlier than last restart point */ + Assert(strcmp(lastRestartPointFname, xlogfname) <= 0); + } + else + XLogFileName(lastRestartPointFname, 0, 0L); + + /* + * construct the command to be executed + */ + dp = xlogRestoreCmd; + endp = xlogRestoreCmd + MAXPGPATH - 1; + *endp = '\0'; + + for (sp = recoveryRestoreCommand; *sp; sp++) + { + if (*sp == '%') + { + switch (sp[1]) + { + case 'p': + /* %p: relative path of target file */ + sp++; + StrNCpy(dp, xlogpath, endp - dp); + make_native_path(dp); + dp += strlen(dp); + break; + case 'f': + /* %f: filename of desired file */ + sp++; + StrNCpy(dp, xlogfname, endp - dp); + dp += strlen(dp); + break; + case 'r': + /* %r: filename of last restartpoint */ + sp++; + StrNCpy(dp, lastRestartPointFname, endp - dp); + dp += strlen(dp); + break; + case '%': + /* convert %% to a single % */ + sp++; + if (dp < endp) + *dp++ = *sp; + break; + default: + /* otherwise treat the % as not special */ + if (dp < endp) + *dp++ = *sp; + break; + } + } + else + { + if (dp < endp) + *dp++ = *sp; + } + } + *dp = '\0'; + + ereport(DEBUG3, + (errmsg_internal("executing restore command \"%s\"", + xlogRestoreCmd))); + + /* + * Check signals before restore command and reset afterwards. + */ + PreRestoreCommand(); + + /* + * Copy xlog from archival storage to XLOGDIR + */ + rc = system(xlogRestoreCmd); + + PostRestoreCommand(); + + if (rc == 0) + { + /* + * command apparently succeeded, but let's make sure the file is + * really there now and has the correct size. + */ + if (stat(xlogpath, &stat_buf) == 0) + { + if (expectedSize > 0 && stat_buf.st_size != expectedSize) + { + int elevel; + + /* + * If we find a partial file in standby mode, we assume it's + * because it's just being copied to the archive, and keep + * trying. + * + * Otherwise treat a wrong-sized file as FATAL to ensure the + * DBA would notice it, but is that too strong? We could try + * to plow ahead with a local copy of the file ... but the + * problem is that there probably isn't one, and we'd + * incorrectly conclude we've reached the end of WAL and we're + * done recovering ... + */ + if (StandbyMode && stat_buf.st_size < expectedSize) + elevel = DEBUG1; + else + elevel = FATAL; + ereport(elevel, + (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu", + xlogfname, + (unsigned long) stat_buf.st_size, + (unsigned long) expectedSize))); + return false; + } + else + { + ereport(LOG, + (errmsg("restored log file \"%s\" from archive", + xlogfname))); + strcpy(path, xlogpath); + return true; + } + } + else + { + /* stat failed */ + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + xlogpath))); + } + } + + /* + * Remember, we rollforward UNTIL the restore fails so failure here is + * just part of the process... that makes it difficult to determine + * whether the restore failed because there isn't an archive to restore, + * or because the administrator has specified the restore program + * incorrectly. We have to assume the former. + * + * However, if the failure was due to any sort of signal, it's best to + * punt and abort recovery. (If we "return false" here, upper levels will + * assume that recovery is complete and start up the database!) It's + * essential to abort on child SIGINT and SIGQUIT, because per spec + * system() ignores SIGINT and SIGQUIT while waiting; if we see one of + * those it's a good bet we should have gotten it too. + * + * On SIGTERM, assume we have received a fast shutdown request, and exit + * cleanly. It's pure chance whether we receive the SIGTERM first, or the + * child process. If we receive it first, the signal handler will call + * proc_exit, otherwise we do it here. If we or the child process received + * SIGTERM for any other reason than a fast shutdown request, postmaster + * will perform an immediate shutdown when it sees us exiting + * unexpectedly. + * + * Per the Single Unix Spec, shells report exit status > 128 when a called + * command died on a signal. Also, 126 and 127 are used to report + * problems such as an unfindable command; treat those as fatal errors + * too. + */ + if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) + proc_exit(1); + + signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + + ereport(signaled ? FATAL : DEBUG2, + (errmsg("could not restore file \"%s\" from archive: return code %d", + xlogfname, rc))); + +not_available: + + /* + * if an archived file is not available, there might still be a version of + * this file in XLOGDIR, so return that as the filename to open. + * + * In many recovery scenarios we expect this to fail also, but if so that + * just means we've reached the end of WAL. + */ + snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname); + return false; +} + +/* + * Attempt to execute an external shell command during recovery. + * + * 'command' is the shell command to be executed, 'commandName' is a + * human-readable name describing the command emitted in the logs. If + * 'failOnSignal' is true and the command is killed by a signal, a FATAL + * error is thrown. Otherwise a WARNING is emitted. + * + * This is currently used for recovery_end_command and archive_cleanup_command. + */ +void +ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal) +{ + char xlogRecoveryCmd[MAXPGPATH]; + char lastRestartPointFname[MAXPGPATH]; + char *dp; + char *endp; + const char *sp; + int rc; + bool signaled; + XLogSegNo restartSegNo; + XLogRecPtr restartRedoPtr; + TimeLineID restartTli; + + Assert(command && commandName); + + /* + * Calculate the archive file cutoff point for use during log shipping + * replication. All files earlier than this point can be deleted from the + * archive, though there is no requirement to do so. + */ + GetOldestRestartPoint(&restartRedoPtr, &restartTli); + XLByteToSeg(restartRedoPtr, restartSegNo); + XLogFileName(lastRestartPointFname, restartTli, restartSegNo); + LWLockRelease(ControlFileLock); + + /* + * construct the command to be executed + */ + dp = xlogRecoveryCmd; + endp = xlogRecoveryCmd + MAXPGPATH - 1; + *endp = '\0'; + + for (sp = command; *sp; sp++) + { + if (*sp == '%') + { + switch (sp[1]) + { + case 'r': + /* %r: filename of last restartpoint */ + sp++; + StrNCpy(dp, lastRestartPointFname, endp - dp); + dp += strlen(dp); + break; + case '%': + /* convert %% to a single % */ + sp++; + if (dp < endp) + *dp++ = *sp; + break; + default: + /* otherwise treat the % as not special */ + if (dp < endp) + *dp++ = *sp; + break; + } + } + else + { + if (dp < endp) + *dp++ = *sp; + } + } + *dp = '\0'; + + ereport(DEBUG3, + (errmsg_internal("executing %s \"%s\"", commandName, command))); + + /* + * execute the constructed command + */ + rc = system(xlogRecoveryCmd); + if (rc != 0) + { + /* + * If the failure was due to any sort of signal, it's best to punt and + * abort recovery. See also detailed comments on signals in + * RestoreArchivedFile(). + */ + signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + + ereport((signaled && failOnSignal) ? FATAL : WARNING, + /*------ + translator: First %s represents a recovery.conf parameter name like + "recovery_end_command", and the 2nd is the value of that parameter. */ + (errmsg("%s \"%s\": return code %d", commandName, + command, rc))); + } +} + + +/* + * XLogArchiveNotify + * + * Create an archive notification file + * + * The name of the notification file is the message that will be picked up + * by the archiver, e.g. we write 0000000100000001000000C6.ready + * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6, + * then when complete, rename it to 0000000100000001000000C6.done + */ +void +XLogArchiveNotify(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + FILE *fd; + + /* insert an otherwise empty file called <XLOG>.ready */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + fd = AllocateFile(archiveStatusPath, "w"); + if (fd == NULL) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not create archive status file \"%s\": %m", + archiveStatusPath))); + return; + } + if (FreeFile(fd)) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write archive status file \"%s\": %m", + archiveStatusPath))); + return; + } + + /* Notify archiver that it's got something to do */ + if (IsUnderPostmaster) + SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER); +} + +/* + * Convenience routine to notify using segment number representation of filename + */ +void +XLogArchiveNotifySeg(XLogSegNo segno) +{ + char xlog[MAXFNAMELEN]; + + XLogFileName(xlog, ThisTimeLineID, segno); + XLogArchiveNotify(xlog); +} + +/* + * XLogArchiveCheckDone + * + * This is called when we are ready to delete or recycle an old XLOG segment + * file or backup history file. If it is okay to delete it then return true. + * If it is not time to delete it, make sure a .ready file exists, and return + * false. + * + * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists, + * then return false; else create <XLOG>.ready and return false. + * + * The reason we do things this way is so that if the original attempt to + * create <XLOG>.ready fails, we'll retry during subsequent checkpoints. + */ +bool +XLogArchiveCheckDone(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + struct stat stat_buf; + + /* Always deletable if archiving is off */ + if (!XLogArchivingActive()) + return true; + + /* First check for .done --- this means archiver is done with it */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return true; + + /* check for .ready --- this means archiver is still busy with it */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return false; + + /* Race condition --- maybe archiver just finished, so recheck */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return true; + + /* Retry creation of the .ready file */ + XLogArchiveNotify(xlog); + return false; +} + +/* + * XLogArchiveIsBusy + * + * Check to see if an XLOG segment file is still unarchived. + * This is almost but not quite the inverse of XLogArchiveCheckDone: in + * the first place we aren't chartered to recreate the .ready file, and + * in the second place we should consider that if the file is already gone + * then it's not busy. (This check is needed to handle the race condition + * that a checkpoint already deleted the no-longer-needed file.) + */ +bool +XLogArchiveIsBusy(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + struct stat stat_buf; + + /* First check for .done --- this means archiver is done with it */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return false; + + /* check for .ready --- this means archiver is still busy with it */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return true; + + /* Race condition --- maybe archiver just finished, so recheck */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return false; + + /* + * Check to see if the WAL file has been removed by checkpoint, which + * implies it has already been archived, and explains why we can't see a + * status file for it. + */ + snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog); + if (stat(archiveStatusPath, &stat_buf) != 0 && + errno == ENOENT) + return false; + + return true; +} + +/* + * XLogArchiveCleanup + * + * Cleanup archive notification file(s) for a particular xlog segment + */ +void +XLogArchiveCleanup(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + + /* Remove the .done file */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + unlink(archiveStatusPath); + /* should we complain about failure? */ + + /* Remove the .ready file if present --- normally it shouldn't be */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + unlink(archiveStatusPath); + /* should we complain about failure? */ +} diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h new file mode 100644 index 00000000000..f2a7658bc45 --- /dev/null +++ b/src/include/access/timeline.h @@ -0,0 +1,23 @@ +/* + * timeline.h + * + * Functions for reading and writing timeline history files. + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/timeline.h + */ +#ifndef TIMELINE_H +#define TIMELINE_H + +#include "access/xlogdefs.h" +#include "nodes/pg_list.h" + +extern List *readTimeLineHistory(TimeLineID targetTLI); +extern bool existsTimeLineHistory(TimeLineID probeTLI); +extern TimeLineID findNewestTimeLine(TimeLineID startTLI); +extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, + TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason); + +#endif /* TIMELINE_H */ diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index b5bfb7b4072..2c66b2feb8a 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -229,6 +229,29 @@ extern const RmgrData RmgrTable[]; extern pg_time_t GetLastSegSwitchTime(void); extern XLogRecPtr RequestXLogSwitch(void); +extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli); + +/* + * Exported for the functions in timeline.c and xlogarchive.c. Only valid + * in the startup process. + */ +extern bool InArchiveRecovery; +extern bool StandbyMode; +extern char *recoveryRestoreCommand; + +/* + * Prototypes for functions in xlogarchive.c + */ +extern bool RestoreArchivedFile(char *path, const char *xlogfname, + const char *recovername, off_t expectedSize); +extern void ExecuteRecoveryCommand(char *command, char *commandName, + bool failOnerror); +extern void XLogArchiveNotify(const char *xlog); +extern void XLogArchiveNotifySeg(XLogSegNo segno); +extern bool XLogArchiveCheckDone(const char *xlog); +extern bool XLogArchiveIsBusy(const char *xlog); +extern void XLogArchiveCleanup(const char *xlog); + /* * These aren't in xlog.h because I'd rather not include fmgr.h there. */ -- GitLab