diff --git a/doc/src/sgml/page.sgml b/doc/src/sgml/page.sgml
index ee619093a3745155b146fc55c372a30be24a8c86..ebafa46598fbb0812d45ac54640732af2e401c71 100644
--- a/doc/src/sgml/page.sgml
+++ b/doc/src/sgml/page.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/page.sgml,v 1.17 2003/12/14 00:10:32 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/page.sgml,v 1.18 2004/07/21 22:31:18 tgl Exp $
 -->
 
 <chapter id="page">
@@ -114,37 +114,38 @@ data. Empty in ordinary tables.</entry>
    <entry>pd_lsn</entry>
    <entry>XLogRecPtr</entry>
    <entry>8 bytes</entry>
-   <entry>LSN: next byte after last byte of xlog</entry>
+   <entry>LSN: next byte after last byte of xlog record for last change
+   to this page</entry>
   </row>
   <row>
-   <entry>pd_sui</entry>
-   <entry>StartUpID</entry>
+   <entry>pd_tli</entry>
+   <entry>TimeLineID</entry>
    <entry>4 bytes</entry>
-   <entry>SUI of last changes (currently it's used by heap AM only)</entry>
+   <entry>TLI of last change</entry>
   </row>
   <row>
    <entry>pd_lower</entry>
    <entry>LocationIndex</entry>
    <entry>2 bytes</entry>
-   <entry>Offset to start of free space.</entry>
+   <entry>Offset to start of free space</entry>
   </row>
   <row>
    <entry>pd_upper</entry>
    <entry>LocationIndex</entry>
    <entry>2 bytes</entry>
-   <entry>Offset to end of free space.</entry>
+   <entry>Offset to end of free space</entry>
   </row>
   <row>
    <entry>pd_special</entry>
    <entry>LocationIndex</entry>
    <entry>2 bytes</entry>
-   <entry>Offset to start of special space.</entry>
+   <entry>Offset to start of special space</entry>
   </row>
   <row>
    <entry>pd_pagesize_version</entry>
    <entry>uint16</entry>
    <entry>2 bytes</entry>
-   <entry>Page size and layout version number information.</entry>
+   <entry>Page size and layout version number information</entry>
   </row>
  </tbody>
  </tgroup>
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index f0be8123f5c476793e7704f0612f9799f8996804..6e65966c936a8ebd952462e0256981c6645c889f 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.170 2004/07/11 18:01:44 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.171 2004/07/21 22:31:19 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1214,7 +1214,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
 		recptr = XLogInsert(RM_HEAP_ID, info, rdata);
 
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 	}
 	else
 	{
@@ -1390,7 +1390,7 @@ l1:
 		recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
 
 		PageSetLSN(dp, recptr);
-		PageSetSUI(dp, ThisStartUpID);
+		PageSetTLI(dp, ThisTimeLineID);
 	}
 	else
 	{
@@ -1748,10 +1748,10 @@ l2:
 		if (newbuf != buffer)
 		{
 			PageSetLSN(BufferGetPage(newbuf), recptr);
-			PageSetSUI(BufferGetPage(newbuf), ThisStartUpID);
+			PageSetTLI(BufferGetPage(newbuf), ThisTimeLineID);
 		}
 		PageSetLSN(BufferGetPage(buffer), recptr);
-		PageSetSUI(BufferGetPage(buffer), ThisStartUpID);
+		PageSetTLI(BufferGetPage(buffer), ThisTimeLineID);
 	}
 	else
 	{
@@ -1902,7 +1902,7 @@ l3:
 	 * XLOG stuff: no logging is required as long as we have no
 	 * savepoints. For savepoints private log could be used...
 	 */
-	PageSetSUI(BufferGetPage(*buffer), ThisStartUpID);
+	PageSetTLI(BufferGetPage(*buffer), ThisTimeLineID);
 
 	/* store transaction information of xact marking the tuple */
 	tuple->t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
@@ -2184,7 +2184,7 @@ heap_xlog_clean(bool redo, XLogRecPtr lsn, XLogRecord *record)
 	PageRepairFragmentation(page, NULL);
 
 	PageSetLSN(page, lsn);
-	PageSetSUI(page, ThisStartUpID);	/* prev sui */
+	PageSetTLI(page, ThisTimeLineID);
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 	WriteBuffer(buffer);
 }
@@ -2217,7 +2217,7 @@ heap_xlog_newpage(bool redo, XLogRecPtr lsn, XLogRecord *record)
 	memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ);
 
 	PageSetLSN(page, lsn);
-	PageSetSUI(page, ThisStartUpID);
+	PageSetTLI(page, ThisTimeLineID);
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 	WriteBuffer(buffer);
 }
@@ -2283,7 +2283,7 @@ heap_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
 		/* Make sure there is no forward chain link in t_ctid */
 		htup->t_ctid = xlrec->target.tid;
 		PageSetLSN(page, lsn);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		WriteBuffer(buffer);
 		return;
@@ -2368,7 +2368,7 @@ heap_xlog_insert(bool redo, XLogRecPtr lsn, XLogRecord *record)
 		if (offnum == InvalidOffsetNumber)
 			elog(PANIC, "heap_insert_redo: failed to add tuple");
 		PageSetLSN(page, lsn);
-		PageSetSUI(page, ThisStartUpID);		/* prev sui */
+		PageSetTLI(page, ThisTimeLineID);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		WriteBuffer(buffer);
 		return;
@@ -2466,7 +2466,7 @@ heap_xlog_update(bool redo, XLogRecPtr lsn, XLogRecord *record, bool move)
 		if (samepage)
 			goto newsame;
 		PageSetLSN(page, lsn);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		WriteBuffer(buffer);
 		goto newt;
@@ -2564,7 +2564,7 @@ newsame:;
 		if (offnum == InvalidOffsetNumber)
 			elog(PANIC, "heap_update_redo: failed to add tuple");
 		PageSetLSN(page, lsn);
-		PageSetSUI(page, ThisStartUpID);		/* prev sui */
+		PageSetTLI(page, ThisTimeLineID);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		WriteBuffer(buffer);
 		return;
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 6a4ecaeb0c87da9f9418923e80c12479d0a630e1..ed08d65d99d8dbf66bd76f169cfa83862fd893d2 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.112 2004/04/21 18:24:25 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.113 2004/07/21 22:31:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -621,11 +621,11 @@ _bt_insertonpg(Relation rel,
 			if (BufferIsValid(metabuf))
 			{
 				PageSetLSN(metapg, recptr);
-				PageSetSUI(metapg, ThisStartUpID);
+				PageSetTLI(metapg, ThisTimeLineID);
 			}
 
 			PageSetLSN(page, recptr);
-			PageSetSUI(page, ThisStartUpID);
+			PageSetTLI(page, ThisTimeLineID);
 		}
 
 		END_CRIT_SECTION();
@@ -903,13 +903,13 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
 		recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
 
 		PageSetLSN(leftpage, recptr);
-		PageSetSUI(leftpage, ThisStartUpID);
+		PageSetTLI(leftpage, ThisTimeLineID);
 		PageSetLSN(rightpage, recptr);
-		PageSetSUI(rightpage, ThisStartUpID);
+		PageSetTLI(rightpage, ThisTimeLineID);
 		if (!P_RIGHTMOST(ropaque))
 		{
 			PageSetLSN(spage, recptr);
-			PageSetSUI(spage, ThisStartUpID);
+			PageSetTLI(spage, ThisTimeLineID);
 		}
 	}
 
@@ -1494,13 +1494,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
 
 		PageSetLSN(rootpage, recptr);
-		PageSetSUI(rootpage, ThisStartUpID);
+		PageSetTLI(rootpage, ThisTimeLineID);
 		PageSetLSN(metapg, recptr);
-		PageSetSUI(metapg, ThisStartUpID);
+		PageSetTLI(metapg, ThisTimeLineID);
 		PageSetLSN(lpage, recptr);
-		PageSetSUI(lpage, ThisStartUpID);
+		PageSetTLI(lpage, ThisTimeLineID);
 		PageSetLSN(rpage, recptr);
-		PageSetSUI(rpage, ThisStartUpID);
+		PageSetTLI(rpage, ThisTimeLineID);
 	}
 
 	END_CRIT_SECTION();
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 002fb018dba14c6b82e9447d953b7590d7a3e778..c8f482545e06f79e96cdeb525375864e50f91f41 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.76 2004/06/02 17:28:17 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.77 2004/07/21 22:31:20 tgl Exp $
  *
  *	NOTES
  *	   Postgres btree pages look like ordinary relation pages.	The opaque
@@ -84,7 +84,7 @@ _bt_metapinit(Relation rel)
 							rdata);
 
 		PageSetLSN(pg, recptr);
-		PageSetSUI(pg, ThisStartUpID);
+		PageSetTLI(pg, ThisTimeLineID);
 	}
 
 	END_CRIT_SECTION();
@@ -249,9 +249,9 @@ _bt_getroot(Relation rel, int access)
 			recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata);
 
 			PageSetLSN(rootpage, recptr);
-			PageSetSUI(rootpage, ThisStartUpID);
+			PageSetTLI(rootpage, ThisTimeLineID);
 			PageSetLSN(metapg, recptr);
-			PageSetSUI(metapg, ThisStartUpID);
+			PageSetTLI(metapg, ThisTimeLineID);
 		}
 
 		END_CRIT_SECTION();
@@ -686,7 +686,7 @@ _bt_delitems(Relation rel, Buffer buf,
 		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
 
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 	}
 
 	END_CRIT_SECTION();
@@ -1080,22 +1080,22 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 		if (BufferIsValid(metabuf))
 		{
 			PageSetLSN(metapg, recptr);
-			PageSetSUI(metapg, ThisStartUpID);
+			PageSetTLI(metapg, ThisTimeLineID);
 		}
 		page = BufferGetPage(pbuf);
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 		page = BufferGetPage(rbuf);
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 		page = BufferGetPage(buf);
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 		if (BufferIsValid(lbuf))
 		{
 			page = BufferGetPage(lbuf);
 			PageSetLSN(page, recptr);
-			PageSetSUI(page, ThisStartUpID);
+			PageSetTLI(page, ThisTimeLineID);
 		}
 	}
 
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index d2bafb3957783ca82c41342e97beb6bc4f4de034..28f147fcafebc869036766089c1f9436ae207af8 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -56,7 +56,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.84 2004/07/19 02:47:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.85 2004/07/21 22:31:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -299,14 +299,14 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
 		recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
 
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 
 		END_CRIT_SECTION();
 	}
 	else
 	{
-		/* Leave the page LSN zero if not WAL-logged, but set SUI anyway */
-		PageSetSUI(page, ThisStartUpID);
+		/* Leave the page LSN zero if not WAL-logged, but set TLI anyway */
+		PageSetTLI(page, ThisTimeLineID);
 	}
 
 	/*
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 73e2ddf287c7bd1069ab58f0016a93435120688d..2befeb1aa32bae9d590efd2d0d39cc5888f13645 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.15 2004/07/11 18:01:45 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.16 2004/07/21 22:31:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -136,7 +136,7 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn,
 	pageop->btpo_flags = BTP_META;
 
 	PageSetLSN(metapg, lsn);
-	PageSetSUI(metapg, ThisStartUpID);
+	PageSetTLI(metapg, ThisTimeLineID);
 	LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 	WriteBuffer(metabuf);
 }
@@ -197,7 +197,7 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta,
 					elog(PANIC, "btree_insert_redo: failed to add item");
 
 				PageSetLSN(page, lsn);
-				PageSetSUI(page, ThisStartUpID);
+				PageSetTLI(page, ThisTimeLineID);
 				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 				WriteBuffer(buffer);
 			}
@@ -281,7 +281,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
 						 xlrec->leftlen);
 
 		PageSetLSN(page, lsn);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		WriteBuffer(buffer);
 	}
@@ -317,7 +317,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
 					 record->xl_len - SizeOfBtreeSplit - xlrec->leftlen);
 
 		PageSetLSN(page, lsn);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		WriteBuffer(buffer);
 	}
@@ -353,7 +353,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
 				pageop->btpo_prev = rightsib;
 
 				PageSetLSN(page, lsn);
-				PageSetSUI(page, ThisStartUpID);
+				PageSetTLI(page, ThisTimeLineID);
 				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 				WriteBuffer(buffer);
 			}
@@ -420,7 +420,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
 	}
 
 	PageSetLSN(page, lsn);
-	PageSetSUI(page, ThisStartUpID);
+	PageSetTLI(page, ThisTimeLineID);
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 	WriteBuffer(buffer);
 }
@@ -489,7 +489,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
 			}
 
 			PageSetLSN(page, lsn);
-			PageSetSUI(page, ThisStartUpID);
+			PageSetTLI(page, ThisTimeLineID);
 			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 			WriteBuffer(buffer);
 		}
@@ -515,7 +515,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
 			pageop->btpo_prev = leftsib;
 
 			PageSetLSN(page, lsn);
-			PageSetSUI(page, ThisStartUpID);
+			PageSetTLI(page, ThisTimeLineID);
 			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 			WriteBuffer(buffer);
 		}
@@ -543,7 +543,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
 				pageop->btpo_next = rightsib;
 
 				PageSetLSN(page, lsn);
-				PageSetSUI(page, ThisStartUpID);
+				PageSetTLI(page, ThisTimeLineID);
 				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 				WriteBuffer(buffer);
 			}
@@ -569,7 +569,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
 		pageop->btpo_flags = BTP_DELETED;
 
 		PageSetLSN(page, lsn);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		WriteBuffer(buffer);
 	}
@@ -632,7 +632,7 @@ btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record)
 						 record->xl_len - SizeOfBtreeNewroot);
 
 	PageSetLSN(page, lsn);
-	PageSetSUI(page, ThisStartUpID);
+	PageSetTLI(page, ThisTimeLineID);
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 	WriteBuffer(buffer);
 
diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample
index 8a0801764c06802fd969d344e20d522fb39dec9b..e3068d535d631bdac1f09d4feddfc33cbf01dcff 100644
--- a/src/backend/access/transam/recovery.conf.sample
+++ b/src/backend/access/transam/recovery.conf.sample
@@ -63,4 +63,12 @@
 #
 #recovery_target_inclusive = 'true'		# 'true' or 'false'
 #
+#
+# If you want to recover into a timeline other than the "main line" shown in
+# pg_control, specify the timeline number here, or write 'latest' to get
+# the latest branch for which there's a history file.
+#
+#recovery_target_timeline = '33'		# number or 'latest'
+#
+#
 #---------------------------------------------------------------------------
diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c
index d6c8c93ca6ebfe3389571e363f410f50d4c2e888..ad68e4c99baef77a5b4023085babaae737c7f119 100644
--- a/src/backend/access/transam/rmgr.c
+++ b/src/backend/access/transam/rmgr.c
@@ -3,7 +3,7 @@
  *
  * Resource managers definition
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.13 2004/07/01 00:49:42 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.14 2004/07/21 22:31:20 tgl Exp $
  */
 #include "postgres.h"
 
@@ -14,12 +14,12 @@
 #include "access/rtree.h"
 #include "access/slru.h"
 #include "access/xact.h"
-#include "access/xlog.h"
+#include "access/xlog_internal.h"
 #include "storage/smgr.h"
 #include "commands/sequence.h"
 
 
-RmgrData	RmgrTable[RM_MAX_ID + 1] = {
+const RmgrData RmgrTable[RM_MAX_ID + 1] = {
 	{"XLOG", xlog_redo, xlog_undo, xlog_desc, NULL, NULL},
 	{"Transaction", xact_redo, xact_undo, xact_desc, NULL, NULL},
 	{"Storage", smgr_redo, smgr_undo, smgr_desc, NULL, NULL},
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 0181e2d626048a476aaf0f60f183cc65814c0225..d45a7d9f6141e60d3e13eb8b982702c38c34e431 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.17 2004/07/01 00:49:42 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.18 2004/07/21 22:31:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,7 @@
 #include "postmaster/bgwriter.h"
 #include "storage/fd.h"
 #include "storage/lwlock.h"
+#include "storage/shmem.h"
 #include "miscadmin.h"
 
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 358966d4fc402c174eb6dcd720af769a95705480..40c11fb6bd071decf6594a86b054762d59640811 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.149 2004/07/19 14:34:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.150 2004/07/21 22:31:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -24,12 +24,13 @@
 
 #include "access/clog.h"
 #include "access/subtrans.h"
-#include "access/transam.h"
 #include "access/xact.h"
 #include "access/xlog.h"
+#include "access/xlog_internal.h"
 #include "access/xlogutils.h"
 #include "catalog/catversion.h"
 #include "catalog/pg_control.h"
+#include "miscadmin.h"
 #include "postmaster/bgwriter.h"
 #include "storage/bufpage.h"
 #include "storage/fd.h"
@@ -41,7 +42,6 @@
 #include "utils/builtins.h"
 #include "utils/guc.h"
 #include "utils/relcache.h"
-#include "miscadmin.h"
 
 
 /*
@@ -121,25 +121,57 @@ static int	open_sync_bit = DEFAULT_SYNC_FLAGBIT;
 
 
 /*
- * ThisStartUpID will be same in all backends --- it identifies current
- * instance of the database system.
+ * ThisTimeLineID will be same in all backends --- it identifies current
+ * WAL timeline for the database system.
  */
-StartUpID	ThisStartUpID = 0;
+TimeLineID	ThisTimeLineID = 0;
 
 /* Are we doing recovery from XLOG? */
 bool		InRecovery = false;
 /* Are we recovering using offline XLOG archives? */
 static bool	InArchiveRecovery = false;
-/* Was the last file restored from archive, or local? */
+/* Was the last xlog file restored from archive, or local? */
 static bool	restoredFromArchive = false;
 
-static char recoveryRestoreCommand[MAXPGPATH];
+/* options taken from recovery.conf */
+static char *recoveryRestoreCommand = NULL;
 static bool recoveryTarget = false;
 static bool recoveryTargetExact = false;
 static bool recoveryTargetInclusive = true;
 static TransactionId   recoveryTargetXid;
 static time_t          recoveryTargetTime;
 
+/* if recoveryStopsHere returns true, it saves actual stop xid/time here */
+static TransactionId   recoveryStopXid;
+static time_t          recoveryStopTime;
+static bool			   recoveryStopAfter;
+
+/*
+ * During normal operation, the only timeline we care about is ThisTimeLineID.
+ * During recovery, however, things are more complicated.  To simplify life
+ * for rmgr code, we keep ThisTimeLineID set to the "current" timeline as we
+ * scan through the WAL history (that is, it is the line that was active when
+ * the currently-scanned WAL record was generated).  We also need these
+ * timeline values:
+ *
+ * recoveryTargetTLI: the desired timeline that we want to end in.
+ *
+ * expectedTLIs: an integer list of recoveryTargetTLI and the TLIs of
+ * its known parents, newest first (so recoveryTargetTLI is always the
+ * first list member).  Only these TLIs are expected to be seen in the WAL
+ * segments we read, and indeed only these TLIs will be considered as
+ * candidate WAL files to open at all.
+ *
+ * curFileTLI: the TLI appearing in the name of the current input WAL file.
+ * (This is not necessarily the same as ThisTimeLineID, because we could
+ * be scanning data that was copied from an ancestor timeline when the current
+ * file was created.)  During a sequential scan we do not allow this value
+ * to decrease.
+ */
+static TimeLineID	recoveryTargetTLI;
+static List		   *expectedTLIs;
+static TimeLineID	curFileTLI;
+
 /*
  * MyLastRecPtr points to the start of the last XLOG record inserted by the
  * current transaction.  If MyLastRecPtr.xrecoff == 0, then the current
@@ -242,12 +274,19 @@ static XLogRecPtr RedoRecPtr;
  *
  *----------
  */
+
 typedef struct XLogwrtRqst
 {
 	XLogRecPtr	Write;			/* last byte + 1 to write out */
 	XLogRecPtr	Flush;			/* last byte + 1 to flush */
 } XLogwrtRqst;
 
+typedef struct XLogwrtResult
+{
+	XLogRecPtr	Write;			/* last byte + 1 written out */
+	XLogRecPtr	Flush;			/* last byte + 1 flushed */
+} XLogwrtResult;
+
 /*
  * Shared state data for XLogInsert.
  */
@@ -293,7 +332,7 @@ typedef struct XLogCtlData
 	XLogRecPtr *xlblocks;		/* 1st byte ptr-s + BLCKSZ */
 	uint32		XLogCacheByte;	/* # bytes in xlog buffers */
 	uint32		XLogCacheBlck;	/* highest allocated xlog buffer index */
-	StartUpID	ThisStartUpID;
+	TimeLineID	ThisTimeLineID;
 
 	slock_t		info_lck;		/* locks shared LogwrtRqst/LogwrtResult */
 } XLogCtlData;
@@ -323,99 +362,15 @@ static ControlFileData *ControlFile = NULL;
 		XLogCtl->xlblocks[curridx].xrecoff - INSERT_FREESPACE(Insert) \
 	)
 
-
-/* Increment an xlogid/segment pair */
-#define NextLogSeg(logId, logSeg)	\
-	do { \
-		if ((logSeg) >= XLogSegsPerFile-1) \
-		{ \
-			(logId)++; \
-			(logSeg) = 0; \
-		} \
-		else \
-			(logSeg)++; \
-	} while (0)
-
-/* Decrement an xlogid/segment pair (assume it's not 0,0) */
-#define PrevLogSeg(logId, logSeg)	\
-	do { \
-		if (logSeg) \
-			(logSeg)--; \
-		else \
-		{ \
-			(logId)--; \
-			(logSeg) = XLogSegsPerFile-1; \
-		} \
-	} while (0)
-
-/*
- * Compute ID and segment from an XLogRecPtr.
- *
- * For XLByteToSeg, do the computation at face value.  For XLByteToPrevSeg,
- * a boundary byte is taken to be in the previous segment.	This is suitable
- * for deciding which segment to write given a pointer to a record end,
- * for example.  (We can assume xrecoff is not zero, since no valid recptr
- * can have that.)
- */
-#define XLByteToSeg(xlrp, logId, logSeg)	\
-	( logId = (xlrp).xlogid, \
-	  logSeg = (xlrp).xrecoff / XLogSegSize \
-	)
-#define XLByteToPrevSeg(xlrp, logId, logSeg)	\
-	( logId = (xlrp).xlogid, \
-	  logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
-	)
-
-/*
- * Is an XLogRecPtr within a particular XLOG segment?
- *
- * For XLByteInSeg, do the computation at face value.  For XLByteInPrevSeg,
- * a boundary byte is taken to be in the previous segment.
- */
-#define XLByteInSeg(xlrp, logId, logSeg)	\
-	((xlrp).xlogid == (logId) && \
-	 (xlrp).xrecoff / XLogSegSize == (logSeg))
-
-#define XLByteInPrevSeg(xlrp, logId, logSeg)	\
-	((xlrp).xlogid == (logId) && \
-	 ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
-
-
 #define PrevBufIdx(idx)		\
 		(((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1))
 
 #define NextBufIdx(idx)		\
 		(((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
-#define XRecOffIsValid(xrecoff) \
-		((xrecoff) % BLCKSZ >= SizeOfXLogPHD && \
-		(BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)
-
-/*
- * These macros encapsulate knowledge about the exact layout of XLog file
- * names as well as archive-status file names.
- */
-#define MAXFNAMELEN		32
-
-#define XLogFileName(fname, log, seg)	\
-	snprintf(fname, MAXFNAMELEN, "%08X%08X", log, seg)
-
-#define XLogFilePath(path, log, seg)	\
-	snprintf(path, MAXPGPATH, "%s/%08X%08X", XLogDir, log, seg)
-
-#define StatusFilePath(path, xlog, suffix)	\
-	snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix)
-
-/*
- * _INTL_MAXLOGRECSZ: max space needed for a record including header and
- * any backup-block data.
- */
-#define _INTL_MAXLOGRECSZ	(SizeOfXLogRecord + MAXLOGRECSZ + \
-							 XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
-
 
 /* File path names */
-static char XLogDir[MAXPGPATH];
+char XLogDir[MAXPGPATH];
 static char ControlFilePath[MAXPGPATH];
 
 /*
@@ -453,36 +408,44 @@ static char *readBuf = NULL;
 static XLogRecPtr ReadRecPtr;
 static XLogRecPtr EndRecPtr;
 static XLogRecord *nextRecord = NULL;
-static StartUpID lastReadSUI;
+static TimeLineID lastPageTLI = 0;
 
 static bool InRedo = false;
 
+
 static void XLogArchiveNotify(const char *xlog);
 static void XLogArchiveNotifySeg(uint32 log, uint32 seg);
 static bool XLogArchiveIsDone(const char *xlog);
 static void XLogArchiveCleanup(const char *xlog);
 static void readRecoveryCommandFile(void);
-static void exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg,
-								uint32 xrecoff);
+static void exitArchiveRecovery(TimeLineID endTLI,
+								uint32 endLogId, uint32 endLogSeg);
 static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
 
 static bool AdvanceXLInsertBuffer(void);
-static bool WasteXLInsertBuffer(void);
 static void XLogWrite(XLogwrtRqst WriteRqst);
 static int XLogFileInit(uint32 log, uint32 seg,
 			 bool *use_existent, bool use_lock);
 static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
 					   bool find_free, int max_advance,
 					   bool use_lock);
-static int	XLogFileOpen(uint32 log, uint32 seg, bool econt);
-static void RestoreArchivedXLog(char *path, uint32 log, uint32 seg);
+static int	XLogFileOpen(uint32 log, uint32 seg);
+static int	XLogFileRead(uint32 log, uint32 seg, int emode);
+static bool RestoreArchivedFile(char *path, const char *xlogfname,
+								const char *recovername);
 static void PreallocXlogFiles(XLogRecPtr endptr);
 static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr);
 static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer);
-static bool ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI);
+static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
 static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr,
 					 int whichChkpt,
 					 char *buffer);
+static List *readTimeLineHistory(TimeLineID targetTLI);
+static bool existsTimeLineHistory(TimeLineID probeTLI);
+static TimeLineID findNewestTimeLine(TimeLineID startTLI);
+static void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+								 TimeLineID endTLI,
+								 uint32 endLogId, uint32 endLogSeg);
 static void WriteControlFile(void);
 static void ReadControlFile(void);
 static char *str_time(time_t tnow);
@@ -546,7 +509,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
 	if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
 	{
 		RecPtr.xlogid = 0;
-		RecPtr.xrecoff = SizeOfXLogPHD; /* start of 1st checkpoint record */
+		RecPtr.xrecoff = SizeOfXLogLongPHD; /* start of 1st chkpt record */
 		return (RecPtr);
 	}
 
@@ -755,16 +718,9 @@ begin:;
 	}
 
 	/*
-	 * Determine exactly where we will place the new XLOG record.  If there
-	 * isn't enough space on the current XLOG page for a record header,
-	 * advance to the next page (leaving the unused space as zeroes).
-	 * If there isn't enough space in the current XLOG segment for the whole
-	 * record, advance to the next segment (inserting wasted-space records).
-	 * This avoids needing a continuation record at the start of a segment
-	 * file, which would conflict with placing a FILE_HEADER record there.
-	 * We assume that no XLOG record can be larger than a segment file...
+	 * If there isn't enough space on the current XLOG page for a record
+	 * header, advance to the next page (leaving the unused space as zeroes).
 	 */
-
 	updrqst = false;
 	freespace = INSERT_FREESPACE(Insert);
 	if (freespace < SizeOfXLogRecord)
@@ -773,27 +729,6 @@ begin:;
 		freespace = INSERT_FREESPACE(Insert);
 	}
 
-	if (freespace < (uint32) (SizeOfXLogRecord + write_len))
-	{
-		/* Doesn't fit on this page, so check for overrunning the file */
-		uint32		avail;
-
-		/* First figure the space available in remaining pages of file */
-		avail = XLogSegSize - BLCKSZ -
-			(Insert->currpage->xlp_pageaddr.xrecoff % XLogSegSize);
-		avail /= BLCKSZ;		/* convert to pages, then usable bytes */
-		avail *= (BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord);
-		avail += freespace;		/* add in the current page too */
-		if (avail < (uint32) (SizeOfXLogRecord + write_len))
-		{
-			/* It overruns the file, so waste the rest of the file... */
-			do {
-				updrqst = WasteXLInsertBuffer();
-			} while ((Insert->currpage->xlp_pageaddr.xrecoff % XLogSegSize) != 0);
-			freespace = INSERT_FREESPACE(Insert);
-		}
-	}
-
 	curridx = Insert->curridx;
 	record = (XLogRecord *) Insert->currpos;
 
@@ -891,14 +826,12 @@ begin:;
 		/* Use next buffer */
 		updrqst = AdvanceXLInsertBuffer();
 		curridx = Insert->curridx;
-		/* This assert checks we did not insert a file header record */
-		Assert(INSERT_FREESPACE(Insert) == BLCKSZ - SizeOfXLogPHD);
 		/* Insert cont-record header */
 		Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD;
 		contrecord = (XLogContRecord *) Insert->currpos;
 		contrecord->xl_rem_len = write_len;
 		Insert->currpos += SizeOfXLogContRecord;
-		freespace = BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord;
+		freespace = INSERT_FREESPACE(Insert);
 	}
 
 	/* Ensure next record will be properly aligned */
@@ -949,9 +882,9 @@ begin:;
  * Create an archive notification file
  *
  * The name of the notification file is the message that will be picked up
- * by the archiver, e.g. we write 00000001000000C6.ready
- * and the archiver then knows to archive XLogDir/00000001000000C6,
- * then when complete, rename it to 00000001000000C6.done
+ * by the archiver, e.g. we write 0000000100000001000000C6.ready
+ * and the archiver then knows to archive XLogDir/0000000100000001000000C6,
+ * then when complete, rename it to 0000000100000001000000C6.done
  */
 static void
 XLogArchiveNotify(const char *xlog)
@@ -990,7 +923,7 @@ XLogArchiveNotifySeg(uint32 log, uint32 seg)
 {
 	char		xlog[MAXFNAMELEN];
 
-	XLogFileName(xlog, log, seg);
+	XLogFileName(xlog, ThisTimeLineID, log, seg);
 	XLogArchiveNotify(xlog);
 }
 
@@ -1035,16 +968,22 @@ XLogArchiveIsDone(const char *xlog)
 /*
  * XLogArchiveCleanup
  *
- * Cleanup an archive notification file for a particular xlog segment
+ * Cleanup archive notification file(s) for a particular xlog segment
  */
 static void
 XLogArchiveCleanup(const char *xlog)
 {
 	char	archiveStatusPath[MAXPGPATH];
 
+	/* Remove the .done file */
 	StatusFilePath(archiveStatusPath, xlog, ".done");
 	unlink(archiveStatusPath);
 	/* should we complain about failure? */
+
+	/* Remove the .ready file if present --- normally it shouldn't be */
+	StatusFilePath(archiveStatusPath, xlog, ".ready");
+	unlink(archiveStatusPath);
+	/* should we complain about failure? */
 }
 
 /*
@@ -1151,7 +1090,7 @@ AdvanceXLInsertBuffer(void)
 	NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * BLCKSZ);
 	Insert->curridx = nextidx;
 	Insert->currpage = NewPage;
-	Insert->currpos = ((char *) NewPage) + SizeOfXLogPHD;
+	Insert->currpos = ((char *) NewPage) + SizeOfXLogShortPHD;
 
 	/*
 	 * Be sure to re-zero the buffer so that bytes beyond what we've
@@ -1164,103 +1103,26 @@ AdvanceXLInsertBuffer(void)
 	 */
 	NewPage->xlp_magic = XLOG_PAGE_MAGIC;
 	/* NewPage->xlp_info = 0; */	/* done by memset */
-	NewPage->xlp_sui = ThisStartUpID;
+	NewPage->xlp_tli = ThisTimeLineID;
 	NewPage->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid;
 	NewPage->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ;
 
 	/*
-	 * If first page of an XLOG segment file, add a FILE_HEADER record.
+	 * If first page of an XLOG segment file, make it a long header.
 	 */
 	if ((NewPage->xlp_pageaddr.xrecoff % XLogSegSize) == 0)
 	{
-		XLogRecPtr	RecPtr;
-		XLogRecord *record;
-		XLogFileHeaderData *fhdr;
-		crc64		crc;
+		XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
 
-		record = (XLogRecord *) Insert->currpos;
-		record->xl_prev = Insert->PrevRecord;
-		record->xl_xact_prev.xlogid = 0;
-		record->xl_xact_prev.xrecoff = 0;
-		record->xl_xid = InvalidTransactionId;
-		record->xl_len = SizeOfXLogFHD;
-		record->xl_info = XLOG_FILE_HEADER;
-		record->xl_rmid = RM_XLOG_ID;
-		fhdr = (XLogFileHeaderData *) XLogRecGetData(record);
-		fhdr->xlfhd_sysid = ControlFile->system_identifier;
-		fhdr->xlfhd_xlogid = NewPage->xlp_pageaddr.xlogid;
-		fhdr->xlfhd_segno = NewPage->xlp_pageaddr.xrecoff / XLogSegSize;
-		fhdr->xlfhd_seg_size = XLogSegSize;
-
-		INIT_CRC64(crc);
-		COMP_CRC64(crc, fhdr, SizeOfXLogFHD);
-		COMP_CRC64(crc, (char *) record + sizeof(crc64),
-				   SizeOfXLogRecord - sizeof(crc64));
-		FIN_CRC64(crc);
-		record->xl_crc = crc;
-
-		/* Compute record's XLOG location */
-		INSERT_RECPTR(RecPtr, Insert, nextidx);
-
-		/* Record begin of record in appropriate places */
-		Insert->PrevRecord = RecPtr;
-
-		Insert->currpos += SizeOfXLogRecord + SizeOfXLogFHD;
+		NewLongPage->xlp_sysid = ControlFile->system_identifier;
+		NewLongPage->xlp_seg_size = XLogSegSize;
+		NewPage->xlp_info |= XLP_LONG_HEADER;
+		Insert->currpos = ((char *) NewPage) + SizeOfXLogLongPHD;
 	}
 
 	return update_needed;
 }
 
-/*
- * Fill the remainder of the current XLOG page with an XLOG_WASTED_SPACE
- * record, and advance to the next page.  This has the same calling and
- * result conditions as AdvanceXLInsertBuffer, except that
- * AdvanceXLInsertBuffer expects the current page to be already filled.
- */
-static bool
-WasteXLInsertBuffer(void)
-{
-	XLogCtlInsert *Insert = &XLogCtl->Insert;
-	XLogRecord *record;
-	XLogRecPtr	RecPtr;
-	uint32		freespace;
-	uint16		curridx;
-	crc64		rdata_crc;
-
-	freespace = INSERT_FREESPACE(Insert);
-	Assert(freespace >= SizeOfXLogRecord);
-	freespace -= SizeOfXLogRecord;
-
-	curridx = Insert->curridx;
-	record = (XLogRecord *) Insert->currpos;
-
-	record->xl_prev = Insert->PrevRecord;
-	record->xl_xact_prev.xlogid = 0;
-	record->xl_xact_prev.xrecoff = 0;
-
-	record->xl_xid = InvalidTransactionId;
-	record->xl_len = freespace;
-	record->xl_info = XLOG_WASTED_SPACE;
-	record->xl_rmid = RM_XLOG_ID;
-
-	INIT_CRC64(rdata_crc);
-	COMP_CRC64(rdata_crc, XLogRecGetData(record), freespace);
-	COMP_CRC64(rdata_crc, (char *) record + sizeof(crc64),
-			   SizeOfXLogRecord - sizeof(crc64));
-	FIN_CRC64(rdata_crc);
-	record->xl_crc = rdata_crc;
-
-	/* Compute record's XLOG location */
-	INSERT_RECPTR(RecPtr, Insert, curridx);
-
-	/* Record begin of record in appropriate places */
-	Insert->PrevRecord = RecPtr;
-
-	/* We needn't bother to advance Insert->currpos */
-
-	return AdvanceXLInsertBuffer();
-}
-
 /*
  * Write and/or fsync the log at least as far as WriteRqst indicates.
  *
@@ -1355,7 +1217,7 @@ XLogWrite(XLogwrtRqst WriteRqst)
 		if (openLogFile < 0)
 		{
 			XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
-			openLogFile = XLogFileOpen(openLogId, openLogSeg, false);
+			openLogFile = XLogFileOpen(openLogId, openLogSeg);
 			openLogOff = 0;
 		}
 
@@ -1439,7 +1301,7 @@ XLogWrite(XLogwrtRqst WriteRqst)
 			if (openLogFile < 0)
 			{
 				XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
-				openLogFile = XLogFileOpen(openLogId, openLogSeg, false);
+				openLogFile = XLogFileOpen(openLogId, openLogSeg);
 				openLogOff = 0;
 			}
 			issue_xlog_fsync();
@@ -1617,7 +1479,7 @@ XLogFileInit(uint32 log, uint32 seg,
 	int			fd;
 	int			nbytes;
 
-	XLogFilePath(path, log, seg);
+	XLogFilePath(path, ThisTimeLineID, log, seg);
 
 	/*
 	 * Try to use existent file (checkpoint maker may have created it
@@ -1730,6 +1592,109 @@ XLogFileInit(uint32 log, uint32 seg,
 	return (fd);
 }
 
+/*
+ * Create a new XLOG file segment by copying a pre-existing one.
+ *
+ * log, seg: identify segment to be created.
+ *
+ * srcTLI, srclog, srcseg: identify segment to be copied (could be from
+ *		a different timeline)
+ *
+ * Currently this is only used during recovery, and so there are no locking
+ * considerations.  But we should be just as tense as XLogFileInit to avoid
+ * emplacing a bogus file.
+ */
+static void
+XLogFileCopy(uint32 log, uint32 seg,
+			 TimeLineID srcTLI, uint32 srclog, uint32 srcseg)
+{
+	char		path[MAXPGPATH];
+	char		tmppath[MAXPGPATH];
+	char		buffer[BLCKSZ];
+	int			srcfd;
+	int			fd;
+	int			nbytes;
+
+	/*
+	 * Open the source file
+	 */
+	XLogFilePath(path, srcTLI, srclog, srcseg);
+	srcfd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
+	if (srcfd < 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\": %m", path)));
+
+	/*
+	 * Copy into a temp file name.
+	 */
+	snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d",
+			 XLogDir, (int) getpid());
+
+	unlink(tmppath);
+
+	/* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */
+	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+					   S_IRUSR | S_IWUSR);
+	if (fd < 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not create file \"%s\": %m", tmppath)));
+
+	/*
+	 * Do the data copying.
+	 */
+	for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer))
+	{
+		errno = 0;
+		if ((int) read(srcfd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
+		{
+			if (errno != 0)
+				ereport(PANIC,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", path)));
+			else
+				ereport(PANIC,
+						(errmsg("insufficient data in file \"%s\"", path)));
+		}
+		errno = 0;
+		if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
+		{
+			int			save_errno = errno;
+
+			/*
+			 * If we fail to make the file, delete it to release disk
+			 * space
+			 */
+			unlink(tmppath);
+			/* if write didn't set errno, assume problem is no disk space */
+			errno = save_errno ? save_errno : ENOSPC;
+
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not write to file \"%s\": %m", tmppath)));
+		}
+	}
+
+	if (pg_fsync(fd) != 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync file \"%s\": %m", tmppath)));
+
+	if (close(fd))
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\": %m", tmppath)));
+
+	close(srcfd);
+
+	/*
+	 * Now move the segment into place with its final name.
+	 */
+	if (!InstallXLogFileSegment(log, seg, tmppath, false, 0, false))
+		elog(PANIC, "InstallXLogFileSegment should not have failed");
+}
+
 /*
  * Install a new XLOG segment file as a current or future log segment.
  *
@@ -1763,7 +1728,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
 	char		path[MAXPGPATH];
 	struct stat stat_buf;
 
-	XLogFilePath(path, log, seg);
+	XLogFilePath(path, ThisTimeLineID, log, seg);
 
 	/*
 	 * We want to be sure that only one process does this at a time.
@@ -1789,7 +1754,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
 				return false;
 			}
 			NextLogSeg(log, seg);
-			XLogFilePath(path, log, seg);
+			XLogFilePath(path, ThisTimeLineID, log, seg);
 		}
 	}
 
@@ -1820,73 +1785,102 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
 }
 
 /*
- * Open a pre-existing logfile segment.
+ * Open a pre-existing logfile segment for writing.
  */
 static int
-XLogFileOpen(uint32 log, uint32 seg, bool econt)
+XLogFileOpen(uint32 log, uint32 seg)
 {
 	char		path[MAXPGPATH];
 	int			fd;
 
-	if (InArchiveRecovery)
-		RestoreArchivedXLog(path, log, seg);
-	else
-		XLogFilePath(path, log, seg);
+	XLogFilePath(path, ThisTimeLineID, log, seg);
 
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
 					   S_IRUSR | S_IWUSR);
 	if (fd < 0)
-	{
-		if (econt && errno == ENOENT)
-		{
-			ereport(LOG,
-					(errcode_for_file_access(),
-			errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
-				   path, log, seg)));
-			return (fd);
-		}
 		ereport(PANIC,
 				(errcode_for_file_access(),
 			errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
 				   path, log, seg)));
-	}
+
+	return fd;
+}
+
+/*
+ * Open a logfile segment for reading (during recovery).
+ */
+static int
+XLogFileRead(uint32 log, uint32 seg, int emode)
+{
+	char		path[MAXPGPATH];
+	char		xlogfname[MAXFNAMELEN];
+	ListCell   *cell;
+	int			fd;
 
 	/*
-	 * XXX this is a pretty horrid hack.  Remove after implementing timelines.
-	 *
-	 * if we switched back to local xlogs after having been
-	 * restoring from archive, we need to make sure that the
-	 * local files don't get removed by end-of-recovery checkpoint
-	 * in case we need to re-run the recovery
+	 * Loop looking for a suitable timeline ID: we might need to
+	 * read any of the timelines listed in expectedTLIs.
 	 *
-	 * we want to copy these away as soon as possible, so set
-	 * the archive status flag to .ready for them
-	 * in case admin isn't cautious enough to have done this anyway
-	 *
-	 * XXX this is completely broken, because there is no guarantee this file
-	 * is actually complete and ready to be archived.  Also, what if there's
-	 * a .done file for them?
+	 * We expect curFileTLI on entry to be the TLI of the preceding file
+	 * in sequence, or 0 if there was no predecessor.  We do not allow
+	 * curFileTLI to go backwards; this prevents us from picking up the
+	 * wrong file when a parent timeline extends to higher segment numbers
+	 * than the child we want to read.
 	 */
-	if (InArchiveRecovery && !restoredFromArchive)
-		XLogArchiveNotifySeg(log, seg);
+	foreach(cell, expectedTLIs)
+	{
+		TimeLineID	tli = (TimeLineID) lfirst_int(cell);
 
-	return (fd);
+		if (tli < curFileTLI)
+			break;				/* don't bother looking at too-old TLIs */
+
+		if (InArchiveRecovery)
+		{
+			XLogFileName(xlogfname, tli, log, seg);
+			restoredFromArchive = RestoreArchivedFile(path, xlogfname,
+													  "RECOVERYXLOG");
+		}
+		else
+			XLogFilePath(path, tli, log, seg);
+
+		fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
+		if (fd >= 0)
+		{
+			/* Success! */
+			curFileTLI = tli;
+			return fd;
+		}
+		if (errno != ENOENT)	/* unexpected failure? */
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
+							path, log, seg)));
+	}
+
+	/* Couldn't find it.  For simplicity, complain about front timeline */
+	XLogFilePath(path, recoveryTargetTLI, log, seg);
+	errno = ENOENT;
+	ereport(emode,
+			(errcode_for_file_access(),
+			 errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
+					path, log, seg)));
+	return -1;
 }
 
 /*
- * Get next logfile segment when using off-line archive for recovery
- *
- * Attempt to retrieve the specified segment from off-line archival storage.
+ * Attempt to retrieve the specified file from off-line archival storage.
  * If successful, fill "path" with its complete path (note that this will be
- * a temp file name that doesn't follow the normal naming convention).
+ * a temp file name that doesn't follow the normal naming convention), and
+ * return TRUE.
  *
- * If not successful, fill "path" with the name of the normal on-line segment
- * file (which may or may not actually exist, but we'll try to use it).
+ * If not successful, fill "path" with the name of the normal on-line file
+ * (which may or may not actually exist, but we'll try to use it), and return
+ * FALSE.
  */
-static void
-RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
+static bool
+RestoreArchivedFile(char *path, const char *xlogfname,
+					const char *recovername)
 {
-	char xlogfname[MAXFNAMELEN];
 	char xlogpath[MAXPGPATH];
 	char xlogRestoreCmd[MAXPGPATH];
 	char *dp;
@@ -1919,11 +1913,10 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
 	 * The copy-from-archive filename is always the same, ensuring that we
 	 * don't run out of disk space on long recoveries.
 	 */
-	XLogFileName(xlogfname, log, seg);
-	snprintf(xlogpath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
+	snprintf(xlogpath, MAXPGPATH, "%s/%s", XLogDir, recovername);
 
 	/*
-	 * Make sure there is no existing RECOVERYXLOG file.
+	 * Make sure there is no existing file named recovername.
 	 */
 	if (stat(xlogpath, &stat_buf) != 0)
 	{
@@ -2004,8 +1997,7 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
 					(errmsg("restored log file \"%s\" from archive",
 							xlogfname)));
 			strcpy(path, xlogpath);
-			restoredFromArchive = true;
-			return;
+			return true;
 		}
 		if (errno != ENOENT)
 			ereport(FATAL,
@@ -2033,8 +2025,8 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
 	 * In many recovery scenarios we expect this to fail also, but
 	 * if so that just means we've reached the end of WAL.
 	 */
-	XLogFilePath(path, log, seg);
-	restoredFromArchive = false;
+	snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlogfname);
+	return false;
 }
 
 /*
@@ -2085,18 +2077,25 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr)
 			errmsg("could not open transaction log directory \"%s\": %m",
 				   XLogDir)));
 
-	XLogFileName(lastoff, log, seg);
+	XLogFileName(lastoff, ThisTimeLineID, log, seg);
 
 	errno = 0;
 	while ((xlde = readdir(xldir)) != NULL)
 	{
 		/*
-		 * use the alphanumeric sorting property of the filenames to decide
-		 * which ones are earlier than the lastoff segment
+		 * We ignore the timeline part of the XLOG segment identifiers in
+		 * deciding whether a segment is still needed.  This ensures that
+		 * we won't prematurely remove a segment from a parent timeline.
+		 * We could probably be a little more proactive about removing
+		 * segments of non-parent timelines, but that would be a whole lot
+		 * more complicated.
+		 *
+		 * We use the alphanumeric sorting property of the filenames to decide
+		 * which ones are earlier than the lastoff segment.
 		 */
-		if (strlen(xlde->d_name) == 16 &&
-			strspn(xlde->d_name, "0123456789ABCDEF") == 16 &&
-			strcmp(xlde->d_name, lastoff) <= 0)
+		if (strlen(xlde->d_name) == 24 &&
+			strspn(xlde->d_name, "0123456789ABCDEF") == 24 &&
+			strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
 		{
 			bool        recycle;
 
@@ -2185,7 +2184,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
 				page = (Page) BufferGetPage(buffer);
 				memcpy((char *) page, blk, BLCKSZ);
 				PageSetLSN(page, lsn);
-				PageSetSUI(page, ThisStartUpID);
+				PageSetTLI(page, ThisTimeLineID);
 				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 				WriteBuffer(buffer);
 			}
@@ -2272,11 +2271,13 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
 {
 	XLogRecord *record;
 	XLogRecPtr	tmpRecPtr = EndRecPtr;
+	bool		randAccess = false;
 	uint32		len,
 				total_len;
 	uint32		targetPageOff;
+	uint32		targetRecOff;
+	uint32		pageHeaderSize;
 	unsigned	i;
-	bool		nextmode = false;
 
 	if (readBuf == NULL)
 	{
@@ -2295,7 +2296,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
 	if (RecPtr == NULL)
 	{
 		RecPtr = &tmpRecPtr;
-		nextmode = true;
 		/* fast case if next record is on same page */
 		if (nextRecord != NULL)
 		{
@@ -2310,12 +2310,24 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
 			(tmpRecPtr.xlogid)++;
 			tmpRecPtr.xrecoff = 0;
 		}
-		tmpRecPtr.xrecoff += SizeOfXLogPHD;
+		/* We will account for page header size below */
+	}
+	else
+	{
+		if (!XRecOffIsValid(RecPtr->xrecoff))
+			ereport(PANIC,
+					(errmsg("invalid record offset at %X/%X",
+							RecPtr->xlogid, RecPtr->xrecoff)));
+		/*
+		 * Since we are going to a random position in WAL, forget any
+		 * prior state about what timeline we were in, and allow it
+		 * to be any timeline in expectedTLIs.  We also set a flag to
+		 * allow curFileTLI to go backwards (but we can't reset that
+		 * variable right here, since we might not change files at all).
+		 */
+		lastPageTLI = 0;		/* see comment in ValidXLOGHeader */
+		randAccess = true;		/* allow curFileTLI to go backwards too */
 	}
-	else if (!XRecOffIsValid(RecPtr->xrecoff))
-		ereport(PANIC,
-				(errmsg("invalid record offset at %X/%X",
-						RecPtr->xlogid, RecPtr->xrecoff)));
 
 	if (readFile >= 0 && !XLByteInSeg(*RecPtr, readId, readSeg))
 	{
@@ -2325,7 +2337,11 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
 	XLByteToSeg(*RecPtr, readId, readSeg);
 	if (readFile < 0)
 	{
-		readFile = XLogFileOpen(readId, readSeg, (emode == LOG));
+		/* Now it's okay to reset curFileTLI if random fetch */
+		if (randAccess)
+			curFileTLI = 0;
+
+		readFile = XLogFileRead(readId, readSeg, emode);
 		if (readFile < 0)
 			goto next_record_is_invalid;
 		readOff = (uint32) (-1);	/* force read to occur below */
@@ -2351,11 +2367,30 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
 							readId, readSeg, readOff)));
 			goto next_record_is_invalid;
 		}
-		if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, nextmode))
+		if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
 			goto next_record_is_invalid;
 	}
+	pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
+	targetRecOff = RecPtr->xrecoff % BLCKSZ;
+	if (targetRecOff == 0)
+	{
+		/*
+		 * Can only get here in the continuing-from-prev-page case, because
+		 * XRecOffIsValid eliminated the zero-page-offset case otherwise.
+		 * Need to skip over the new page's header.
+		 */
+		tmpRecPtr.xrecoff += pageHeaderSize;
+		targetRecOff = pageHeaderSize;
+	}
+	else if (targetRecOff < pageHeaderSize)
+	{
+		ereport(emode,
+				(errmsg("invalid record offset at %X/%X",
+						RecPtr->xlogid, RecPtr->xrecoff)));
+		goto next_record_is_invalid;
+	}
 	if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
-		RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHD)
+		targetRecOff == pageHeaderSize)
 	{
 		ereport(emode,
 				(errmsg("contrecord is requested by %X/%X",
@@ -2428,7 +2463,7 @@ got_record:;
 				close(readFile);
 				readFile = -1;
 				NextLogSeg(readId, readSeg);
-				readFile = XLogFileOpen(readId, readSeg, (emode == LOG));
+				readFile = XLogFileRead(readId, readSeg, emode);
 				if (readFile < 0)
 					goto next_record_is_invalid;
 				readOff = 0;
@@ -2441,7 +2476,7 @@ got_record:;
 								readId, readSeg, readOff)));
 				goto next_record_is_invalid;
 			}
-			if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, true))
+			if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
 				goto next_record_is_invalid;
 			if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
 			{
@@ -2450,7 +2485,8 @@ got_record:;
 								readId, readSeg, readOff)));
 				goto next_record_is_invalid;
 			}
-			contrecord = (XLogContRecord *) ((char *) readBuf + SizeOfXLogPHD);
+			pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
+			contrecord = (XLogContRecord *) ((char *) readBuf + pageHeaderSize);
 			if (contrecord->xl_rem_len == 0 ||
 				total_len != (contrecord->xl_rem_len + gotlen))
 			{
@@ -2460,7 +2496,7 @@ got_record:;
 								readId, readSeg, readOff)));
 				goto next_record_is_invalid;
 			}
-			len = BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord;
+			len = BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
 			if (contrecord->xl_rem_len > len)
 			{
 				memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
@@ -2474,7 +2510,8 @@ got_record:;
 		}
 		if (!RecordIsValid(record, *RecPtr, emode))
 			goto next_record_is_invalid;
-		if (BLCKSZ - SizeOfXLogRecord >= SizeOfXLogPHD +
+		pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
+		if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
 			SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len))
 		{
 			nextRecord = (XLogRecord *) ((char *) contrecord +
@@ -2482,7 +2519,7 @@ got_record:;
 		}
 		EndRecPtr.xlogid = readId;
 		EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff +
-			SizeOfXLogPHD + SizeOfXLogContRecord +
+			pageHeaderSize + SizeOfXLogContRecord +
 			MAXALIGN(contrecord->xl_rem_len);
 		ReadRecPtr = *RecPtr;
 		return record;
@@ -2514,7 +2551,7 @@ next_record_is_invalid:;
  * ReadRecord.	It's not intended for use from anywhere else.
  */
 static bool
-ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI)
+ValidXLOGHeader(XLogPageHeader hdr, int emode)
 {
 	XLogRecPtr	recaddr;
 
@@ -2532,46 +2569,416 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI)
 						hdr->xlp_info, readId, readSeg, readOff)));
 		return false;
 	}
-	recaddr.xlogid = readId;
-	recaddr.xrecoff = readSeg * XLogSegSize + readOff;
-	if (!XLByteEQ(hdr->xlp_pageaddr, recaddr))
+	if (hdr->xlp_info & XLP_LONG_HEADER)
 	{
-		ereport(emode,
-				(errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u",
-					 hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff,
-						readId, readSeg, readOff)));
-		return false;
-	}
+		XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
 
-	/*
-	 * We disbelieve a SUI less than the previous page's SUI, or more than
-	 * a few counts greater.  In theory as many as 512 shutdown checkpoint
-	 * records could appear on a 32K-sized xlog page, so that's the most
-	 * differential there could legitimately be.
-	 *
-	 * Note this check can only be applied when we are reading the next page
-	 * in sequence, so ReadRecord passes a flag indicating whether to
-	 * check.
-	 */
-	if (checkSUI)
-	{
-		if (hdr->xlp_sui < lastReadSUI ||
-			hdr->xlp_sui > lastReadSUI + 512)
+		if (longhdr->xlp_sysid != ControlFile->system_identifier)
 		{
-			ereport(emode,
-			/* translator: SUI = startup id */
-					(errmsg("out-of-sequence SUI %u (after %u) in log file %u, segment %u, offset %u",
-							hdr->xlp_sui, lastReadSUI,
-							readId, readSeg, readOff)));
-			return false;
-		}
-	}
-	lastReadSUI = hdr->xlp_sui;
-	return true;
-}
+			char		fhdrident_str[32];
+			char		sysident_str[32];
 
-/*
- * I/O routines for pg_control
+			/*
+			 * Format sysids separately to keep platform-dependent format
+			 * code out of the translatable message string.
+			 */
+			snprintf(fhdrident_str, sizeof(fhdrident_str), UINT64_FORMAT,
+					 longhdr->xlp_sysid);
+			snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
+					 ControlFile->system_identifier);
+			ereport(emode,
+					(errmsg("WAL file is from different system"),
+					 errdetail("WAL file SYSID is %s, pg_control SYSID is %s",
+							   fhdrident_str, sysident_str)));
+			return false;
+		}
+		if (longhdr->xlp_seg_size != XLogSegSize)
+		{
+			ereport(emode,
+					(errmsg("WAL file is from different system"),
+					 errdetail("Incorrect XLOG_SEG_SIZE in page header.")));
+			return false;
+		}
+	}
+	recaddr.xlogid = readId;
+	recaddr.xrecoff = readSeg * XLogSegSize + readOff;
+	if (!XLByteEQ(hdr->xlp_pageaddr, recaddr))
+	{
+		ereport(emode,
+				(errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u",
+					 hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff,
+						readId, readSeg, readOff)));
+		return false;
+	}
+
+	/*
+	 * Check page TLI is one of the expected values.
+	 */
+	if (!list_member_int(expectedTLIs, (int) hdr->xlp_tli))
+	{
+		ereport(emode,
+				(errmsg("unexpected timeline ID %u in log file %u, segment %u, offset %u",
+						hdr->xlp_tli,
+						readId, readSeg, readOff)));
+		return false;
+	}
+
+	/*
+	 * Since child timelines are always assigned a TLI greater than their
+	 * immediate parent's TLI, we should never see TLI go backwards across
+	 * successive pages of a consistent WAL sequence.
+	 *
+	 * Of course this check should only be applied when advancing sequentially
+	 * across pages; therefore ReadRecord resets lastPageTLI to zero when
+	 * going to a random page.
+	 */
+	if (hdr->xlp_tli < lastPageTLI)
+	{
+		ereport(emode,
+				(errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u",
+						hdr->xlp_tli, lastPageTLI,
+						readId, readSeg, readOff)));
+		return false;
+	}
+	lastPageTLI = hdr->xlp_tli;
+	return true;
+}
+
+/*
+ * Try to read a timeline's history file.
+ *
+ * If successful, return the list of component TLIs (the given TLI followed by
+ * its ancestor TLIs).  If we can't find the history file, assume that the
+ * timeline has no parents, and return a list of just the specified timeline
+ * ID.
+ */
+static List *
+readTimeLineHistory(TimeLineID targetTLI)
+{
+	List	   *result;
+	char		path[MAXPGPATH];
+	char		histfname[MAXFNAMELEN];
+	char		fline[MAXPGPATH];
+    FILE     *fd;
+
+	if (InArchiveRecovery)
+	{
+		TLHistoryFileName(histfname, targetTLI);
+		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
+	}
+	else
+		TLHistoryFilePath(path, targetTLI);
+
+    fd = AllocateFile(path, "r");
+	if (fd == NULL)
+	{
+		if (errno != ENOENT)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not open \"%s\": %m", path)));
+		/* Not there, so assume no parents */
+		return list_make1_int((int) targetTLI);
+	}
+
+	result = NIL;
+
+    /*
+     * Parse the file...
+     */
+    while (fgets(fline, MAXPGPATH, fd) != NULL)
+	{
+		/* skip leading whitespace and check for # comment */
+		char *ptr;
+		char *endptr;
+		TimeLineID tli;
+
+		for (ptr = fline; *ptr; ptr++)
+		{
+			if (!isspace((unsigned char) *ptr))
+				break;
+		}
+		if (*ptr == '\0' || *ptr == '#')
+			continue;
+
+		/* expect a numeric timeline ID as first field of line */
+		tli = (TimeLineID) strtoul(ptr, &endptr, 0);
+		if (endptr == ptr)
+			ereport(FATAL,
+					(errmsg("syntax error in history file: %s", fline),
+					 errhint("Expected a numeric timeline ID.")));
+
+		if (result &&
+			tli <= (TimeLineID) linitial_int(result))
+			ereport(FATAL,
+					(errmsg("invalid data in history file: %s", fline),
+					 errhint("Timeline IDs must be in increasing sequence.")));
+
+		/* Build list with newest item first */
+		result = lcons_int((int) tli, result);
+
+		/* we ignore the remainder of each line */
+	}
+
+	FreeFile(fd);
+
+	if (result &&
+		targetTLI <= (TimeLineID) linitial_int(result))
+		ereport(FATAL,
+				(errmsg("invalid data in history file \"%s\"", path),
+				 errhint("Timeline IDs must be less than child timeline's ID.")));
+
+	result = lcons_int((int) targetTLI, result);
+
+	ereport(DEBUG3,
+			(errmsg_internal("history of timeline %u is %s",
+							 targetTLI, nodeToString(result))));
+
+	return result;
+}
+
+/*
+ * Probe whether a timeline history file exists for the given timeline ID
+ */
+static bool
+existsTimeLineHistory(TimeLineID probeTLI)
+{
+	char		path[MAXPGPATH];
+	char		histfname[MAXFNAMELEN];
+    FILE     *fd;
+
+	if (InArchiveRecovery)
+	{
+		TLHistoryFileName(histfname, probeTLI);
+		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
+	}
+	else
+		TLHistoryFilePath(path, probeTLI);
+
+	fd = AllocateFile(path, "r");
+	if (fd != NULL)
+	{
+		FreeFile(fd);
+		return true;
+	}
+	else
+	{
+		if (errno != ENOENT)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not open \"%s\": %m", path)));
+		return false;
+	}
+}
+
+/*
+ * Find the newest existing timeline, assuming that startTLI exists.
+ *
+ * Note: while this is somewhat heuristic, it does positively guarantee
+ * that (result + 1) is not a known timeline, and therefore it should
+ * be safe to assign that ID to a new timeline.
+ */
+static TimeLineID
+findNewestTimeLine(TimeLineID startTLI)
+{
+	TimeLineID	newestTLI;
+	TimeLineID	probeTLI;
+
+	/*
+	 * The algorithm is just to probe for the existence of timeline history
+	 * files.  XXX is it useful to allow gaps in the sequence?
+	 */
+	newestTLI = startTLI;
+
+	for (probeTLI = startTLI + 1; ; probeTLI++)
+	{
+		if (existsTimeLineHistory(probeTLI))
+		{
+			newestTLI = probeTLI;		/* probeTLI exists */
+		}
+		else
+		{
+			/* doesn't exist, assume we're done */
+			break;
+		}
+	}
+
+	return newestTLI;
+}
+
+/*
+ * Create a new timeline history file.
+ *
+ *	newTLI: ID of the new timeline
+ *	parentTLI: ID of its immediate parent
+ *	endTLI et al: ID of the last used WAL file, for annotation purposes
+ *
+ * Currently this is only used during recovery, and so there are no locking
+ * considerations.  But we should be just as tense as XLogFileInit to avoid
+ * emplacing a bogus file.
+ */
+static void
+writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+					 TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
+{
+	char		path[MAXPGPATH];
+	char		tmppath[MAXPGPATH];
+	char		histfname[MAXFNAMELEN];
+	char		xlogfname[MAXFNAMELEN];
+	char		buffer[BLCKSZ];
+	int			srcfd;
+	int			fd;
+	int			nbytes;
+
+	Assert(newTLI > parentTLI);	/* else bad selection of newTLI */
+
+	/*
+	 * Write into a temp file name.
+	 */
+	snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d",
+			 XLogDir, (int) getpid());
+
+	unlink(tmppath);
+
+	/* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */
+	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
+					   S_IRUSR | S_IWUSR);
+	if (fd < 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not create file \"%s\": %m", tmppath)));
+
+	/*
+	 * If a history file exists for the parent, copy it verbatim
+	 */
+	if (InArchiveRecovery)
+	{
+		TLHistoryFileName(histfname, parentTLI);
+		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
+	}
+	else
+		TLHistoryFilePath(path, parentTLI);
+
+	srcfd = BasicOpenFile(path, O_RDONLY, 0);
+	if (srcfd < 0)
+	{
+		if (errno != ENOENT)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not open \"%s\": %m", path)));
+		/* Not there, so assume parent has no parents */
+	}
+	else
+	{
+		for (;;)
+		{
+			errno = 0;
+			nbytes = (int) read(srcfd, buffer, sizeof(buffer));
+			if (nbytes < 0 || errno != 0)
+				ereport(PANIC,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", path)));
+			if (nbytes == 0)
+				break;
+			errno = 0;
+			if ((int) write(fd, buffer, nbytes) != nbytes)
+			{
+				int			save_errno = errno;
+
+				/*
+				 * If we fail to make the file, delete it to release disk
+				 * space
+				 */
+				unlink(tmppath);
+				/* if write didn't set errno, assume problem is no disk space */
+				errno = save_errno ? save_errno : ENOSPC;
+
+				ereport(PANIC,
+						(errcode_for_file_access(),
+						 errmsg("could not write to file \"%s\": %m", tmppath)));
+			}
+		}
+		close(srcfd);
+	}
+
+	/*
+	 * Append one line with the details of this timeline split.
+	 *
+	 * If we did have a parent file, insert an extra newline just in case
+	 * the parent file failed to end with one.
+	 */
+	XLogFileName(xlogfname, endTLI, endLogId, endLogSeg);
+
+	snprintf(buffer, sizeof(buffer),
+			 "%s%u\t%s\t%s transaction %u at %s\n",
+			 (srcfd < 0) ? "" : "\n",
+			 parentTLI,
+			 xlogfname,
+			 recoveryStopAfter ? "after" : "before",
+			 recoveryStopXid,
+			 str_time(recoveryStopTime));
+
+	nbytes = strlen(buffer);
+	errno = 0;
+	if ((int) write(fd, buffer, nbytes) != nbytes)
+	{
+		int			save_errno = errno;
+
+		/*
+		 * If we fail to make the file, delete it to release disk
+		 * space
+		 */
+		unlink(tmppath);
+		/* if write didn't set errno, assume problem is no disk space */
+		errno = save_errno ? save_errno : ENOSPC;
+
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not write to file \"%s\": %m", tmppath)));
+	}
+
+	if (pg_fsync(fd) != 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync file \"%s\": %m", tmppath)));
+
+	if (close(fd))
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\": %m", tmppath)));
+
+
+	/*
+	 * Now move the completed history file into place with its final name.
+	 */
+	TLHistoryFilePath(path, newTLI);
+
+	/*
+	 * Prefer link() to rename() here just to be really sure that we don't
+	 * overwrite an existing logfile.  However, there shouldn't be one, so
+	 * rename() is an acceptable substitute except for the truly paranoid.
+	 */
+#if HAVE_WORKING_LINK
+	if (link(tmppath, path) < 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not link file \"%s\" to \"%s\": %m",
+						tmppath, path)));
+	unlink(tmppath);
+#else
+	if (rename(tmppath, path) < 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not rename file \"%s\" to \"%s\": %m",
+						tmppath, path)));
+#endif
+
+	/* The history file can be archived immediately. */
+	TLHistoryFileName(histfname, newTLI);
+	XLogArchiveNotify(histfname);
+}
+
+/*
+ * I/O routines for pg_control
  *
  * *ControlFile is a buffer in shared memory that holds an image of the
  * contents of pg_control.	WriteControlFile() initializes pg_control
@@ -2956,8 +3363,8 @@ BootStrapXLOG(void)
 	CheckPoint	checkPoint;
 	char	   *buffer;
 	XLogPageHeader page;
+	XLogLongPageHeader longpage;
 	XLogRecord *record;
-	XLogFileHeaderData *fhdr;
 	bool		use_existent;
 	uint64		sysidentifier;
 	struct timeval tv;
@@ -2979,6 +3386,9 @@ BootStrapXLOG(void)
 	sysidentifier = ((uint64) tv.tv_sec) << 32;
 	sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
 
+	/* First timeline ID is always 1 */
+	ThisTimeLineID = 1;
+
 	/* Use malloc() to ensure buffer is MAXALIGNED */
 	buffer = (char *) malloc(BLCKSZ);
 	page = (XLogPageHeader) buffer;
@@ -2986,9 +3396,9 @@ BootStrapXLOG(void)
 
 	/* Set up information for the initial checkpoint record */
 	checkPoint.redo.xlogid = 0;
-	checkPoint.redo.xrecoff = SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD;
+	checkPoint.redo.xrecoff = SizeOfXLogLongPHD;
 	checkPoint.undo = checkPoint.redo;
-	checkPoint.ThisStartUpID = 0;
+	checkPoint.ThisTimeLineID = ThisTimeLineID;
 	checkPoint.nextXid = FirstNormalTransactionId;
 	checkPoint.nextOid = BootstrapObjectIdData;
 	checkPoint.time = time(NULL);
@@ -2999,38 +3409,18 @@ BootStrapXLOG(void)
 
 	/* Set up the XLOG page header */
 	page->xlp_magic = XLOG_PAGE_MAGIC;
-	page->xlp_info = 0;
-	page->xlp_sui = checkPoint.ThisStartUpID;
+	page->xlp_info = XLP_LONG_HEADER;
+	page->xlp_tli = ThisTimeLineID;
 	page->xlp_pageaddr.xlogid = 0;
 	page->xlp_pageaddr.xrecoff = 0;
-
-	/* Insert the file header record */
-	record = (XLogRecord *) ((char *) page + SizeOfXLogPHD);
-	record->xl_prev.xlogid = 0;
-	record->xl_prev.xrecoff = 0;
-	record->xl_xact_prev.xlogid = 0;
-	record->xl_xact_prev.xrecoff = 0;
-	record->xl_xid = InvalidTransactionId;
-	record->xl_len = SizeOfXLogFHD;
-	record->xl_info = XLOG_FILE_HEADER;
-	record->xl_rmid = RM_XLOG_ID;
-	fhdr = (XLogFileHeaderData *) XLogRecGetData(record);
-	fhdr->xlfhd_sysid = sysidentifier;
-	fhdr->xlfhd_xlogid = 0;
-	fhdr->xlfhd_segno = 0;
-	fhdr->xlfhd_seg_size = XLogSegSize;
-
-	INIT_CRC64(crc);
-	COMP_CRC64(crc, fhdr, SizeOfXLogFHD);
-	COMP_CRC64(crc, (char *) record + sizeof(crc64),
-			   SizeOfXLogRecord - sizeof(crc64));
-	FIN_CRC64(crc);
-	record->xl_crc = crc;
+	longpage = (XLogLongPageHeader) page;
+	longpage->xlp_sysid = sysidentifier;
+	longpage->xlp_seg_size = XLogSegSize;
 
 	/* Insert the initial checkpoint record */
-	record = (XLogRecord *) ((char *) page + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD);
+	record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
 	record->xl_prev.xlogid = 0;
-	record->xl_prev.xrecoff = SizeOfXLogPHD;
+	record->xl_prev.xrecoff = 0;
 	record->xl_xact_prev.xlogid = 0;
 	record->xl_xact_prev.xrecoff = 0;
 	record->xl_xid = InvalidTransactionId;
@@ -3050,7 +3440,7 @@ BootStrapXLOG(void)
 	use_existent = false;
 	openLogFile = XLogFileInit(0, 0, &use_existent, false);
 
-	/* Write the first page with the initial records */
+	/* Write the first page with the initial record */
 	errno = 0;
 	if (write(openLogFile, buffer, BLCKSZ) != BLCKSZ)
 	{
@@ -3120,6 +3510,8 @@ readRecoveryCommandFile(void)
 	char recoveryCommandFile[MAXPGPATH];
     FILE     *fd;
     char    cmdline[MAXPGPATH];
+	TimeLineID rtli = 0;
+	bool	rtliGiven = false;
     bool    syntaxError = false;
 
   	snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir);
@@ -3177,11 +3569,31 @@ readRecoveryCommandFile(void)
 		}
 
 		if (strcmp(tok1,"restore_command") == 0) {
-			StrNCpy(recoveryRestoreCommand, tok2, MAXPGPATH);
+			recoveryRestoreCommand = pstrdup(tok2);
 			ereport(LOG,
 					(errmsg("restore_command = \"%s\"",
 							recoveryRestoreCommand)));
 		}
+		else if (strcmp(tok1,"recovery_target_timeline") == 0) {
+			rtliGiven = true;
+			if (strcmp(tok2, "latest") == 0)
+				rtli = 0;
+			else
+			{
+				errno = 0;
+				rtli = (TimeLineID) strtoul(tok2, NULL, 0);
+				if (errno == EINVAL || errno == ERANGE)
+					ereport(FATAL,
+							(errmsg("recovery_target_timeline is not a valid number: \"%s\"",
+									tok2)));
+			}
+			if (rtli)
+				ereport(LOG,
+						(errmsg("recovery_target_timeline = %u", rtli)));
+			else
+				ereport(LOG,
+						(errmsg("recovery_target_timeline = latest")));
+		}
 		else if (strcmp(tok1,"recovery_target_xid") == 0) {
 			errno = 0;
 			recoveryTargetXid = (TransactionId) strtoul(tok2, NULL, 0);
@@ -3246,22 +3658,44 @@ readRecoveryCommandFile(void)
 				 errhint("Lines should have the format parameter = 'value'.")));
 
 	/* Check that required parameters were supplied */
-	if (recoveryRestoreCommand[0] == '\0')
+	if (recoveryRestoreCommand == NULL)
 		ereport(FATAL,
 				(errmsg("recovery command file \"%s\" did not specify restore_command",
 						recoveryCommandFile)));
 
+	/* Enable fetching from archive recovery area */
+	InArchiveRecovery = true;
+
 	/*
-	 * clearly indicate our state
+	 * If user specified recovery_target_timeline, validate it or compute the
+	 * "latest" value.  We can't do this until after we've gotten the restore
+	 * command and set InArchiveRecovery, because we need to fetch timeline
+	 * history files from the archive.
 	 */
-	InArchiveRecovery = true;
+	if (rtliGiven)
+	{
+		if (rtli)
+		{
+			/* Timeline 1 does not have a history file, all else should */
+			if (rtli != 1 && !existsTimeLineHistory(rtli))
+				ereport(FATAL,
+						(errmsg("recovery_target_timeline %u does not exist",
+								rtli)));
+			recoveryTargetTLI = rtli;
+		}
+		else
+		{
+			/* We start the "latest" search from pg_control's timeline */
+			recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
+		}
+	}
 }
 
 /*
  * Exit archive-recovery state
  */
 static void
-exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
+exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
 {
 	char recoveryPath[MAXPGPATH];
 	char xlogpath[MAXPGPATH];
@@ -3269,7 +3703,7 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
 	char recoveryCommandDone[MAXPGPATH];
 
 	/*
-	 * Disable fetches from archive, so we can use XLogFileOpen below.
+	 * We are no longer in archive recovery state.
 	 */
 	InArchiveRecovery = false;
 
@@ -3294,10 +3728,12 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
 	 * more descriptive of what our current database state is, because that
 	 * is what we replayed from.
 	 *
-	 * XXX there ought to be a timeline increment somewhere around here.
+	 * Note that if we are establishing a new timeline, ThisTimeLineID is
+	 * already set to the new value, and so we will create a new file instead
+	 * of overwriting any existing file.
 	 */
 	snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
-	XLogFilePath(xlogpath, endLogId, endLogSeg);
+	XLogFilePath(xlogpath, ThisTimeLineID, endLogId, endLogSeg);
 
 	if (restoredFromArchive)
 	{
@@ -3319,61 +3755,26 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
 		 * RECOVERYXLOG laying about, get rid of it.
 		 */
 		unlink(recoveryPath);	/* ignore any error */
+		/*
+		 * If we are establishing a new timeline, we have to copy data
+		 * from the last WAL segment of the old timeline to create a
+		 * starting WAL segment for the new timeline.
+		 */
+		if (endTLI != ThisTimeLineID)
+			XLogFileCopy(endLogId, endLogSeg,
+						 endTLI, endLogId, endLogSeg);
 	}
 
 	/*
-	 * If we restored to a point-in-time, then the current WAL segment
-	 * probably contains records beyond the stop point.  These represent an
-	 * extreme hazard: if we crash in the near future, the replay apparatus
-	 * will know no reason why it shouldn't replay them.  Therefore,
-	 * explicitly zero out all the remaining pages of the segment.  (We need
-	 * not worry about the partial page in which the last record ends, since
-	 * StartUpXlog will handle zeroing that.  Also, there's nothing to do
-	 * if we are right at a segment boundary.)
-	 *
-	 * XXX segment files beyond thhe current one also represent a hazard
-	 * for the same reason.  Need to invent timelines to fix this.
+	 * Let's just make real sure there are not .ready or .done flags posted
+	 * for the new segment.
 	 */
+	XLogFileName(xlogpath, ThisTimeLineID, endLogId, endLogSeg);
+	XLogArchiveCleanup(xlogpath);
 
-	/* align xrecoff to next page, then drop segment part */
-	if (xrecoff % BLCKSZ != 0)
-		xrecoff += (BLCKSZ - xrecoff % BLCKSZ);
-	xrecoff %= XLogSegSize;
-
-	if (recoveryTarget && xrecoff != 0)
-	{
-		int			fd;
-		char		zbuffer[BLCKSZ];
-
-		fd = XLogFileOpen(endLogId, endLogSeg, false);
-		MemSet(zbuffer, 0, sizeof(zbuffer));
-		if (lseek(fd, (off_t) xrecoff, SEEK_SET) < 0)
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not seek in file \"%s\": %m",
-							xlogpath)));
-		for (; xrecoff < XLogSegSize; xrecoff += sizeof(zbuffer))
-		{
-			errno = 0;
-			if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
-			{
-				/* if write didn't set errno, assume problem is no disk space */
-				if (errno == 0)
-					errno = ENOSPC;
-				ereport(PANIC,
-						(errcode_for_file_access(),
-						 errmsg("could not write to file \"%s\": %m", xlogpath)));
-			}
-		}
-		if (pg_fsync(fd) != 0)
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not fsync file \"%s\": %m", xlogpath)));
-		if (close(fd))
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not close file \"%s\": %m", xlogpath)));
-	}
+	/* Get rid of any remaining recovered timeline-history file, too */
+	snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYHISTORY", XLogDir);
+	unlink(recoveryPath);	/* ignore any error */
 
 	/*
 	 * Rename the config file out of the way, so that we don't accidentally
@@ -3398,6 +3799,8 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
  *
  * Returns TRUE if we are stopping, FALSE otherwise.  On TRUE return,
  * *includeThis is set TRUE if we should apply this record before stopping.
+ * Also, some information is saved in recoveryStopXid et al for use in
+ * annotating the new timeline's history file.
  */
 static bool
 recoveryStopsHere(XLogRecord *record, bool *includeThis)
@@ -3466,27 +3869,31 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
 	if (stopsHere)
 	{
+		recoveryStopXid = record->xl_xid;
+		recoveryStopTime = recordXtime;
+		recoveryStopAfter = *includeThis;
+
 		if (record_info == XLOG_XACT_COMMIT)
 		{
-			if (*includeThis)
+			if (recoveryStopAfter)
 				ereport(LOG,
 						(errmsg("recovery stopping after commit of transaction %u, time %s",
-								record->xl_xid, str_time(recordXtime))));
+								recoveryStopXid, str_time(recoveryStopTime))));
 			else
 				ereport(LOG,
 						(errmsg("recovery stopping before commit of transaction %u, time %s",
-								record->xl_xid, str_time(recordXtime))));
+								recoveryStopXid, str_time(recoveryStopTime))));
 		}
 		else
 		{
-			if (*includeThis)
+			if (recoveryStopAfter)
 				ereport(LOG,
 						(errmsg("recovery stopping after abort of transaction %u, time %s",
-								record->xl_xid, str_time(recordXtime))));
+								recoveryStopXid, str_time(recoveryStopTime))));
 			else
 				ereport(LOG,
 						(errmsg("recovery stopping before abort of transaction %u, time %s",
-								record->xl_xid, str_time(recordXtime))));
+								recoveryStopXid, str_time(recoveryStopTime))));
 		}
 	}
 
@@ -3502,6 +3909,7 @@ StartupXLOG(void)
 	XLogCtlInsert *Insert;
 	CheckPoint	checkPoint;
 	bool		wasShutdown;
+	bool		needNewTimeLine = false;
 	XLogRecPtr	RecPtr,
 				LastRec,
 				checkPointLoc,
@@ -3557,12 +3965,21 @@ StartupXLOG(void)
 		pg_usleep(60000000L);
 #endif
 
+	/*
+	 * Initialize on the assumption we want to recover to the same timeline
+	 * that's active according to pg_control.
+	 */
+	recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID;
+
 	/*
 	 * Check for recovery control file, and if so set up state for
 	 * offline recovery
 	 */
 	readRecoveryCommandFile();
 
+	/* Now we can determine the list of expected TLIs */
+	expectedTLIs = readTimeLineHistory(recoveryTargetTLI);
+
 	/*
 	 * Get the last valid checkpoint record.  If the latest one according
 	 * to pg_control is broken, try the next-to-last one.
@@ -3611,17 +4028,11 @@ StartupXLOG(void)
 	ShmemVariableCache->oidCount = 0;
 
 	/*
-	 * If it was a shutdown checkpoint, then any following WAL entries
-	 * were created under the next StartUpID; if it was a regular
-	 * checkpoint then any following WAL entries were created under the
-	 * same StartUpID. We must replay WAL entries using the same StartUpID
-	 * they were created under, so temporarily adopt that SUI (see also
-	 * xlog_redo()).
+	 * We must replay WAL entries using the same TimeLineID they were created
+	 * under, so temporarily adopt the TLI indicated by the checkpoint (see
+	 * also xlog_redo()).
 	 */
-	if (wasShutdown)
-		ThisStartUpID = checkPoint.ThisStartUpID + 1;
-	else
-		ThisStartUpID = checkPoint.ThisStartUpID;
+	ThisTimeLineID = checkPoint.ThisTimeLineID;
 
 	RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
 
@@ -3663,12 +4074,18 @@ StartupXLOG(void)
 				RmgrTable[rmid].rm_startup();
 		}
 
-		/* Is REDO required ? */
+		/*
+		 * Find the first record that logically follows the checkpoint ---
+		 * it might physically precede it, though.
+		 */
 		if (XLByteLT(checkPoint.redo, RecPtr))
+		{
+			/* back up to find the record */
 			record = ReadRecord(&(checkPoint.redo), PANIC, buffer);
+		}
 		else
 		{
-			/* read past CheckPoint record */
+			/* just have to read next record after CheckPoint */
 			record = ReadRecord(NULL, LOG, buffer);
 		}
 
@@ -3708,6 +4125,7 @@ StartupXLOG(void)
 				 */
 				if (recoveryStopsHere(record, &recoveryApply))
 				{
+					needNewTimeLine = true;	/* see below */
 					recoveryContinue = false;
 					if (!recoveryApply)
 						break;
@@ -3752,6 +4170,26 @@ StartupXLOG(void)
 	EndOfLog = EndRecPtr;
 	XLByteToPrevSeg(EndOfLog, endLogId, endLogSeg);
 
+	/*
+	 * Consider whether we need to assign a new timeline ID.
+	 *
+	 * If we stopped short of the end of WAL during recovery, then we
+	 * are generating a new timeline and must assign it a unique new ID.
+	 * Otherwise, we can just extend the timeline we were in when we
+	 * ran out of WAL.
+	 */
+	if (needNewTimeLine)
+	{
+		ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1;
+		ereport(LOG,
+				(errmsg("selected new timeline ID: %u", ThisTimeLineID)));
+		writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
+							 curFileTLI, endLogId, endLogSeg);
+	}
+
+	/* Save the selected TimeLineID in shared memory, too */
+	XLogCtl->ThisTimeLineID = ThisTimeLineID;
+
 	/*
 	 * We are now done reading the old WAL.  Turn off archive fetching
 	 * if it was active, and make a writable copy of the last WAL segment.
@@ -3759,7 +4197,7 @@ StartupXLOG(void)
 	 * readBuf; we will use that below.)
 	 */
 	if (InArchiveRecovery)
-		exitArchiveRecovery(endLogId, endLogSeg, EndOfLog.xrecoff);
+		exitArchiveRecovery(curFileTLI, endLogId, endLogSeg);
 
 	/*
 	 * Prepare to write WAL starting at EndOfLog position, and init xlog
@@ -3768,7 +4206,7 @@ StartupXLOG(void)
 	 */
 	openLogId = endLogId;
 	openLogSeg = endLogSeg;
-	openLogFile = XLogFileOpen(openLogId, openLogSeg, false);
+	openLogFile = XLogFileOpen(openLogId, openLogSeg);
 	openLogOff = 0;
 	ControlFile->logId = openLogId;
 	ControlFile->logSeg = openLogSeg + 1;
@@ -3812,9 +4250,8 @@ StartupXLOG(void)
 		 * XLogWrite()).
 		 *
 		 * Note: it might seem we should do AdvanceXLInsertBuffer() here, but
-		 * we can't since we haven't yet determined the correct StartUpID
-		 * to put into the new page's header.  The first actual attempt to
-		 * insert a log record will advance the insert state.
+		 * this is sufficient.  The first actual attempt to insert a log
+		 * record will advance the insert state.
 		 */
 		XLogCtl->Write.curridx = NextBufIdx(0);
 	}
@@ -3860,22 +4297,15 @@ StartupXLOG(void)
 				RmgrTable[rmid].rm_cleanup();
 		}
 
-		/*
-		 * At this point, ThisStartUpID is the largest SUI that we could
-		 * find evidence for in the WAL entries.  But check it against
-		 * pg_control's latest checkpoint, to make sure that we can't
-		 * accidentally re-use an already-used SUI.
-		 */
-		if (ThisStartUpID < ControlFile->checkPointCopy.ThisStartUpID)
-			ThisStartUpID = ControlFile->checkPointCopy.ThisStartUpID;
-
 		/*
 		 * Perform a new checkpoint to update our recovery activity to
 		 * disk.
 		 *
-		 * Note that we write a shutdown checkpoint.  This is correct since
-		 * the records following it will use SUI one more than what is
-		 * shown in the checkpoint's ThisStartUpID.
+		 * Note that we write a shutdown checkpoint rather than an on-line
+		 * one.  This is not particularly critical, but since we may be
+		 * assigning a new TLI, using a shutdown checkpoint allows us to
+		 * have the rule that TLI only changes in shutdown checkpoints,
+		 * which allows some extra error checking in xlog_redo.
 		 *
 		 * In case we had to use the secondary checkpoint, make sure that it
 		 * will still be shown as the secondary checkpoint after this
@@ -3890,31 +4320,12 @@ StartupXLOG(void)
 		 */
 		XLogCloseRelationCache();
 	}
-	else
-	{
-		/*
-		 * If we are not doing recovery, then we saw a checkpoint with
-		 * nothing after it, and we can safely use StartUpID equal to one
-		 * more than the checkpoint's SUI.  But just for paranoia's sake,
-		 * check against pg_control too.
-		 */
-		ThisStartUpID = checkPoint.ThisStartUpID;
-		if (ThisStartUpID < ControlFile->checkPointCopy.ThisStartUpID)
-			ThisStartUpID = ControlFile->checkPointCopy.ThisStartUpID;
-	}
 
 	/*
 	 * Preallocate additional log files, if wanted.
 	 */
 	PreallocXlogFiles(EndOfLog);
 
-	/*
-	 * Advance StartUpID to one more than the highest value used
-	 * previously.
-	 */
-	ThisStartUpID++;
-	XLogCtl->ThisStartUpID = ThisStartUpID;
-
 	/*
 	 * Okay, we're officially UP.
 	 */
@@ -4018,18 +4429,18 @@ ReadCheckpointRecord(XLogRecPtr RecPtr,
 /*
  * This must be called during startup of a backend process, except that
  * it need not be called in a standalone backend (which does StartupXLOG
- * instead).  We need to initialize the local copies of ThisStartUpID and
+ * instead).  We need to initialize the local copies of ThisTimeLineID and
  * RedoRecPtr.
  *
  * Note: before Postgres 7.5, we went to some effort to keep the postmaster
- * process's copies of ThisStartUpID and RedoRecPtr valid too.  This was
+ * process's copies of ThisTimeLineID and RedoRecPtr valid too.  This was
  * unnecessary however, since the postmaster itself never touches XLOG anyway.
  */
 void
 InitXLOGAccess(void)
 {
-	/* ThisStartUpID doesn't change so we need no lock to copy it */
-	ThisStartUpID = XLogCtl->ThisStartUpID;
+	/* ThisTimeLineID doesn't change so we need no lock to copy it */
+	ThisTimeLineID = XLogCtl->ThisTimeLineID;
 	/* Use GetRedoRecPtr to copy the RedoRecPtr safely */
 	(void) GetRedoRecPtr();
 }
@@ -4110,7 +4521,7 @@ CreateCheckPoint(bool shutdown, bool force)
 	}
 
 	MemSet(&checkPoint, 0, sizeof(checkPoint));
-	checkPoint.ThisStartUpID = ThisStartUpID;
+	checkPoint.ThisTimeLineID = ThisTimeLineID;
 	checkPoint.time = time(NULL);
 
 	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
@@ -4372,8 +4783,20 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 		ShmemVariableCache->nextXid = checkPoint.nextXid;
 		ShmemVariableCache->nextOid = checkPoint.nextOid;
 		ShmemVariableCache->oidCount = 0;
-		/* Any later WAL records should be run with shutdown SUI plus 1 */
-		ThisStartUpID = checkPoint.ThisStartUpID + 1;
+		/*
+		 * TLI may change in a shutdown checkpoint, but it shouldn't decrease
+		 */
+		if (checkPoint.ThisTimeLineID != ThisTimeLineID)
+		{
+			if (checkPoint.ThisTimeLineID < ThisTimeLineID ||
+				!list_member_int(expectedTLIs,
+								 (int) checkPoint.ThisTimeLineID))
+				ereport(PANIC,
+						(errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
+								checkPoint.ThisTimeLineID, ThisTimeLineID)));
+			/* Following WAL records should be run with new TLI */
+			ThisTimeLineID = checkPoint.ThisTimeLineID;
+		}
 	}
 	else if (info == XLOG_CHECKPOINT_ONLINE)
 	{
@@ -4389,40 +4812,11 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 			ShmemVariableCache->nextOid = checkPoint.nextOid;
 			ShmemVariableCache->oidCount = 0;
 		}
-		/* Any later WAL records should be run with the then-active SUI */
-		ThisStartUpID = checkPoint.ThisStartUpID;
-	}
-	else if (info == XLOG_FILE_HEADER)
-	{
-		XLogFileHeaderData fhdr;
-
-		memcpy(&fhdr, XLogRecGetData(record), sizeof(XLogFileHeaderData));
-		if (fhdr.xlfhd_sysid != ControlFile->system_identifier)
-		{
-			char		fhdrident_str[32];
-			char		sysident_str[32];
-
-			/*
-			 * Format sysids separately to keep platform-dependent format
-			 * code out of the translatable message string.
-			 */
-			snprintf(fhdrident_str, sizeof(fhdrident_str), UINT64_FORMAT,
-					 fhdr.xlfhd_sysid);
-			snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
-					 ControlFile->system_identifier);
-			ereport(PANIC,
-					(errmsg("WAL file is from different system"),
-					 errdetail("WAL file SYSID is %s, pg_control SYSID is %s",
-							   fhdrident_str, sysident_str)));
-		}
-		if (fhdr.xlfhd_seg_size != XLogSegSize)
+		/* TLI should not change in an on-line checkpoint */
+		if (checkPoint.ThisTimeLineID != ThisTimeLineID)
 			ereport(PANIC,
-					(errmsg("WAL file is from different system"),
-					 errdetail("Incorrect XLOG_SEG_SIZE in file header.")));
-	}
-	else if (info == XLOG_WASTED_SPACE)
-	{
-		/* ignore */
+					(errmsg("unexpected timeline ID %u (should be %u) in checkpoint record",
+							checkPoint.ThisTimeLineID, ThisTimeLineID)));
 	}
 }
 
@@ -4442,10 +4836,10 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
 		CheckPoint *checkpoint = (CheckPoint *) rec;
 
 		sprintf(buf + strlen(buf), "checkpoint: redo %X/%X; undo %X/%X; "
-				"sui %u; xid %u; oid %u; %s",
+				"tli %u; xid %u; oid %u; %s",
 				checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
 				checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
-				checkpoint->ThisStartUpID, checkpoint->nextXid,
+				checkpoint->ThisTimeLineID, checkpoint->nextXid,
 				checkpoint->nextOid,
 			 (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
 	}
@@ -4456,22 +4850,6 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
 		memcpy(&nextOid, rec, sizeof(Oid));
 		sprintf(buf + strlen(buf), "nextOid: %u", nextOid);
 	}
-	else if (info == XLOG_FILE_HEADER)
-	{
-		XLogFileHeaderData *fhdr = (XLogFileHeaderData *) rec;
-
-		sprintf(buf + strlen(buf),
-				"file header: sysid " UINT64_FORMAT "; "
-				"xlogid %X segno %X; seg_size %X",
-				fhdr->xlfhd_sysid,
-				fhdr->xlfhd_xlogid,
-				fhdr->xlfhd_segno,
-				fhdr->xlfhd_seg_size);
-	}
-	else if (info == XLOG_WASTED_SPACE)
-	{
-		strcat(buf, "wasted space");
-	}
 	else
 		strcat(buf, "UNKNOWN");
 }
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index a58902e76f6b50ec86e71777163de8483173c20d..8387844a7781509ef3a5b12ae3a1e93e898c965d 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -2,169 +2,31 @@
  *
  * xlogutils.c
  *
+ * PostgreSQL transaction log manager utility routines
+ *
+ * This file contains support routines that are used by XLOG replay functions.
+ * None of this code is used during normal system operation.
+ *
  *
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.31 2004/06/18 06:13:15 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.32 2004/07/21 22:31:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include "access/htup.h"
 #include "access/xlogutils.h"
-#include "catalog/pg_database.h"
-#include "storage/bufpage.h"
+#include "storage/bufmgr.h"
 #include "storage/smgr.h"
 #include "utils/hsearch.h"
-#include "utils/relcache.h"
-
-
-/*
- * ---------------------------------------------------------------
- *
- * Index support functions
- *
- *----------------------------------------------------------------
- */
-
-/*
- * Check if specified heap tuple was inserted by given
- * xaction/command and return
- *
- * - -1 if not
- * - 0	if there is no tuple at all
- * - 1	if yes
- */
-int
-XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr,
-				   TransactionId xid, CommandId cid)
-{
-	Relation	reln;
-	Buffer		buffer;
-	Page		page;
-	ItemId		lp;
-	HeapTupleHeader htup;
-
-	reln = XLogOpenRelation(false, RM_HEAP_ID, hnode);
-	if (!RelationIsValid(reln))
-		return (0);
-
-	buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr));
-	if (!BufferIsValid(buffer))
-		return (0);
-
-	LockBuffer(buffer, BUFFER_LOCK_SHARE);
-	page = (Page) BufferGetPage(buffer);
-	if (PageIsNew((PageHeader) page) ||
-		ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page))
-	{
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		ReleaseBuffer(buffer);
-		return (0);
-	}
-	lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr));
-	if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp))
-	{
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		ReleaseBuffer(buffer);
-		return (0);
-	}
 
-	htup = (HeapTupleHeader) PageGetItem(page, lp);
-
-	Assert(PageGetSUI(page) == ThisStartUpID);
-	if (!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), xid) ||
-		HeapTupleHeaderGetCmin(htup) != cid)
-	{
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		ReleaseBuffer(buffer);
-		return (-1);
-	}
-
-	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-	ReleaseBuffer(buffer);
-	return (1);
-}
 
 /*
- * MUST BE CALLED ONLY ON RECOVERY.
- *
- * Check if exists valid (inserted by not aborted xaction) heap tuple
- * for given item pointer
- */
-bool
-XLogIsValidTuple(RelFileNode hnode, ItemPointer iptr)
-{
-	Relation	reln;
-	Buffer		buffer;
-	Page		page;
-	ItemId		lp;
-	HeapTupleHeader htup;
-
-	reln = XLogOpenRelation(false, RM_HEAP_ID, hnode);
-	if (!RelationIsValid(reln))
-		return (false);
-
-	buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr));
-	if (!BufferIsValid(buffer))
-		return (false);
-
-	LockBuffer(buffer, BUFFER_LOCK_SHARE);
-	page = (Page) BufferGetPage(buffer);
-	if (PageIsNew((PageHeader) page) ||
-		ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page))
-	{
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		ReleaseBuffer(buffer);
-		return (false);
-	}
-
-	if (PageGetSUI(page) != ThisStartUpID)
-	{
-		Assert(PageGetSUI(page) < ThisStartUpID);
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		ReleaseBuffer(buffer);
-		return (true);
-	}
-
-	lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr));
-	if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp))
-	{
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		ReleaseBuffer(buffer);
-		return (false);
-	}
-
-	htup = (HeapTupleHeader) PageGetItem(page, lp);
-
-	/* MUST CHECK WASN'T TUPLE INSERTED IN PREV STARTUP */
-
-	if (!(htup->t_infomask & HEAP_XMIN_COMMITTED))
-	{
-		if (htup->t_infomask & HEAP_XMIN_INVALID ||
-			(htup->t_infomask & HEAP_MOVED_IN &&
-			 TransactionIdDidAbort(HeapTupleHeaderGetXvac(htup))) ||
-			TransactionIdDidAbort(HeapTupleHeaderGetXmin(htup)))
-		{
-			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-			ReleaseBuffer(buffer);
-			return (false);
-		}
-	}
-
-	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-	ReleaseBuffer(buffer);
-	return (true);
-}
-
-/*
- * ---------------------------------------------------------------
  *
  * Storage related support functions
  *
- *----------------------------------------------------------------
  */
 
 Buffer
@@ -198,8 +60,10 @@ XLogReadBuffer(bool extend, Relation reln, BlockNumber blkno)
 	return (buffer);
 }
 
+
 /*
- * "Relation" cache
+ * Lightweight "Relation" cache --- this substitutes for the normal relcache
+ * during XLOG replay.
  */
 
 typedef struct XLogRelDesc
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index dc66314afee2224ba9d4a5d9ac45740aaa0362c7..778c1c95212d1188ca4d868304f876220429c6bc 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.113 2004/07/12 05:37:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.114 2004/07/21 22:31:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -279,7 +279,7 @@ DefineSequence(CreateSeqStmt *seq)
 		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
 
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 	}
 
 	END_CRIT_SECTION();
@@ -354,7 +354,7 @@ AlterSequence(AlterSeqStmt *stmt)
 		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
 
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 	}
 
 	END_CRIT_SECTION();
@@ -553,7 +553,7 @@ nextval(PG_FUNCTION_ARGS)
 		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
 
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 	}
 
 	/* update on-disk data */
@@ -689,7 +689,7 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled)
 		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
 
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 	}
 
 	/* save info in sequence relation */
@@ -1091,7 +1091,7 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
 		elog(PANIC, "seq_redo: failed to add item to page");
 
 	PageSetLSN(page, lsn);
-	PageSetSUI(page, ThisStartUpID);
+	PageSetTLI(page, ThisTimeLineID);
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 	WriteBuffer(buffer);
 }
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 241f2550f6ba22cecb8514265bccf79d8acfc0bb..e78db91e77af06d9a3a15cf169da0afe25cd0ba2 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.121 2004/07/19 02:47:06 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.122 2004/07/21 22:31:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -5448,7 +5448,7 @@ copy_relation_data(Relation rel, SMgrRelation dst)
 			recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
 
 			PageSetLSN(page, recptr);
-			PageSetSUI(page, ThisStartUpID);
+			PageSetTLI(page, ThisTimeLineID);
 
 			END_CRIT_SECTION();
 		}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index a420865b38985ddd2323dd4695e4c592019b3199..dfc03ea461a75ec61b88da30a950cb3c0190125f 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.283 2004/07/20 22:56:29 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.284 2004/07/21 22:31:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2341,7 +2341,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
 
 				recptr = log_heap_clean(onerel, buf, unused, uncnt);
 				PageSetLSN(page, recptr);
-				PageSetSUI(page, ThisStartUpID);
+				PageSetTLI(page, ThisTimeLineID);
 			}
 			else
 			{
@@ -2491,10 +2491,10 @@ move_chain_tuple(Relation rel,
 		if (old_buf != dst_buf)
 		{
 			PageSetLSN(old_page, recptr);
-			PageSetSUI(old_page, ThisStartUpID);
+			PageSetTLI(old_page, ThisTimeLineID);
 		}
 		PageSetLSN(dst_page, recptr);
-		PageSetSUI(dst_page, ThisStartUpID);
+		PageSetTLI(dst_page, ThisTimeLineID);
 	}
 	else
 	{
@@ -2611,9 +2611,9 @@ move_plain_tuple(Relation rel,
 										   dst_buf, &newtup);
 
 		PageSetLSN(old_page, recptr);
-		PageSetSUI(old_page, ThisStartUpID);
+		PageSetTLI(old_page, ThisTimeLineID);
 		PageSetLSN(dst_page, recptr);
-		PageSetSUI(dst_page, ThisStartUpID);
+		PageSetTLI(dst_page, ThisTimeLineID);
 	}
 	else
 	{
@@ -2807,7 +2807,7 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
 
 		recptr = log_heap_clean(onerel, buffer, unused, uncnt);
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 	}
 	else
 	{
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 2e4b263b1a7bec54883a48b0c1e8e2142f53d33c..e09db0426b396e1fb6b29dd893cc3538bf1c8a90 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -31,7 +31,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.42 2004/06/05 19:48:07 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.43 2004/07/21 22:31:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -532,7 +532,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 
 		recptr = log_heap_clean(onerel, buffer, unused, uncnt);
 		PageSetLSN(page, recptr);
-		PageSetSUI(page, ThisStartUpID);
+		PageSetTLI(page, ThisTimeLineID);
 	}
 	else
 	{
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index c9595094f1da82c304489ada2bd6a7af0689d093..d42c43195d27f1555f7d76a4be14d3b5b0f41889 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -19,7 +19,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.1 2004/07/19 02:47:08 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.2 2004/07/21 22:31:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,9 +31,10 @@
 #include <sys/time.h>
 #include <unistd.h>
 
-#include "postmaster/pgarch.h"
+#include "access/xlog_internal.h"
 #include "libpq/pqsignal.h"
 #include "miscadmin.h"
+#include "postmaster/pgarch.h"
 #include "postmaster/postmaster.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
@@ -63,8 +64,8 @@
  * ----------
  */
 #define MIN_XFN_CHARS	16
-#define MAX_XFN_CHARS	16
-#define VALID_XFN_CHARS	"0123456789ABCDEF"
+#define MAX_XFN_CHARS	24
+#define VALID_XFN_CHARS	"0123456789ABCDEF.history"
 
 #define NUM_ARCHIVE_RETRIES 3
 
@@ -73,8 +74,6 @@
  * Local data
  * ----------
  */
-static char XLogDir[MAXPGPATH];
-static char XLogArchiveStatusDir[MAXPGPATH];
 static time_t last_pgarch_start_time;
 
 /*
@@ -265,9 +264,8 @@ PgArchiverMain(int argc, char *argv[])
     init_ps_display("archiver process", "", "");
     set_ps_display("");
 
-    /* Init XLOG file paths */
-    snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
-    snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir);
+    /* Init XLOG file paths --- needed in EXEC_BACKEND case */
+	XLOGPathInit();
 
     pgarch_MainLoop();
 
@@ -497,6 +495,12 @@ pgarch_archiveXlog(char *xlog)
  * 1) to maintain the sequential chain of xlogs required for recovery
  * 2) because the oldest ones will sooner become candidates for
  * recycling at time of checkpoint
+ *
+ * NOTE: the "oldest" comparison will presently consider all segments of
+ * a timeline with a smaller ID to be older than all segments of a timeline
+ * with a larger ID; the net result being that past timelines are given
+ * higher priority for archiving.  This seems okay, or at least not
+ * obviously worth changing.
  */
 static bool
 pgarch_readyXlog(char *xlog)
@@ -507,11 +511,13 @@ pgarch_readyXlog(char *xlog)
 	 * It is possible to optimise this code, though only a single
 	 * file is expected on the vast majority of calls, so....
 	 */
+	char		XLogArchiveStatusDir[MAXPGPATH];
   	char		newxlog[MAX_XFN_CHARS + 6 + 1];
  	DIR		    *rldir;
  	struct dirent 	*rlde;
  	bool		found = false;
 
+    snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir);
 	rldir = AllocateDir(XLogArchiveStatusDir);
 	if (rldir == NULL)
 		ereport(ERROR,
@@ -575,14 +581,12 @@ pgarch_archiveDone(char *xlog)
 {
     char		rlogready[MAXPGPATH];
     char		rlogdone[MAXPGPATH];
- 	int 		rc;
 
-    snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog);
- 	snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog);
- 	rc = rename(rlogready, rlogdone);
- 	if (rc < 0)
+	StatusFilePath(rlogready, xlog, ".ready");
+	StatusFilePath(rlogdone, xlog, ".done");
+ 	if (rename(rlogready, rlogdone) < 0)
  		ereport(WARNING,
 				(errcode_for_file_access(),
-				 errmsg("could not rename \"%s\": %m",
-						rlogready)));
+				 errmsg("could not rename \"%s\" to \"%s\": %m",
+						rlogready, rlogdone)));
 }
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index 4170221b22fe7a099d0f861d6ab9ab69e27b8d4d..edfdbcbee74863edd2ccf1a5703ab0260603c5de 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -12,7 +12,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.28 2004/06/05 19:48:08 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.29 2004/07/21 22:31:22 tgl Exp $
  *
  *	Interface:
  *
@@ -25,6 +25,7 @@
  */
 #include "postgres.h"
 
+#include "catalog/pg_class.h"
 #include "lib/stringinfo.h"
 #include "miscadmin.h"
 #include "storage/proc.h"
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 91abe8cfa79cc8ad6ac59277203fdeefd5675eea..53054979a9634a849b3cc65e1b84170bc67c0740 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -6,7 +6,7 @@
  * copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001;
  * licence: BSD
  *
- * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.17 2004/06/03 00:07:36 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.18 2004/07/21 22:31:23 tgl Exp $
  */
 #include "postgres.h"
 
@@ -165,7 +165,7 @@ main(int argc, char *argv[])
 		   ControlFile.checkPointCopy.redo.xlogid, ControlFile.checkPointCopy.redo.xrecoff);
 	printf(_("Latest checkpoint's UNDO location:    %X/%X\n"),
 		   ControlFile.checkPointCopy.undo.xlogid, ControlFile.checkPointCopy.undo.xrecoff);
-	printf(_("Latest checkpoint's StartUpID:        %u\n"), ControlFile.checkPointCopy.ThisStartUpID);
+	printf(_("Latest checkpoint's TimeLineID:       %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
 	printf(_("Latest checkpoint's NextXID:          %u\n"), ControlFile.checkPointCopy.nextXid);
 	printf(_("Latest checkpoint's NextOID:          %u\n"), ControlFile.checkPointCopy.nextOid);
 	printf(_("Time of latest checkpoint:            %s\n"), ckpttime_str);
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 41b81ea894b647a012ae6c3768f8cbb5a3b46af3..b8caf3cb7bd5535d823dc5861a2630770b713368 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -23,22 +23,22 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.20 2004/06/03 00:07:37 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.21 2004/07/21 22:31:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include <errno.h>
-#include <unistd.h>
-#include <time.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <fcntl.h>
 #include <dirent.h>
+#include <fcntl.h>
 #include <locale.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
 
 #include "access/xlog.h"
+#include "access/xlog_internal.h"
 #include "catalog/catversion.h"
 #include "catalog/pg_control.h"
 
@@ -48,27 +48,7 @@ extern char *optarg;
 #define _(x) gettext((x))
 
 
-/******************** stuff copied from xlog.c ********************/
-
-/* Increment an xlogid/segment pair */
-#define NextLogSeg(logId, logSeg)	\
-	do { \
-		if ((logSeg) >= XLogSegsPerFile-1) \
-		{ \
-			(logId)++; \
-			(logSeg) = 0; \
-		} \
-		else \
-			(logSeg)++; \
-	} while (0)
-
-#define XLogFileName(path, log, seg)	\
-			snprintf(path, MAXPGPATH, "%s/%08X%08X",	\
-					 XLogDir, log, seg)
-
-/******************** end of stuff copied from xlog.c ********************/
-
-static char XLogDir[MAXPGPATH];
+char XLogDir[MAXPGPATH];		/* not static, see xlog_internal.h */
 static char ControlFilePath[MAXPGPATH];
 
 static ControlFileData ControlFile;		/* pg_control values */
@@ -388,9 +368,9 @@ GuessControlValues(void)
 	ControlFile.system_identifier = sysidentifier;
 
 	ControlFile.checkPointCopy.redo.xlogid = 0;
-	ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD;
+	ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
 	ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
-	ControlFile.checkPointCopy.ThisStartUpID = 0;
+	ControlFile.checkPointCopy.ThisTimeLineID = 1;
 	ControlFile.checkPointCopy.nextXid = (TransactionId) 514;	/* XXX */
 	ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
 	ControlFile.checkPointCopy.time = time(NULL);
@@ -430,7 +410,7 @@ GuessControlValues(void)
 
 	/*
 	 * XXX eventually, should try to grovel through old XLOG to develop
-	 * more accurate values for startupid, nextXID, and nextOID.
+	 * more accurate values for TimeLineID, nextXID, and nextOID.
 	 */
 }
 
@@ -463,7 +443,7 @@ PrintControlValues(bool guessed)
 	printf(_("Database system identifier:           %s\n"), sysident_str);
 	printf(_("Current log file ID:                  %u\n"), ControlFile.logId);
 	printf(_("Next log file segment:                %u\n"), ControlFile.logSeg);
-	printf(_("Latest checkpoint's StartUpID:        %u\n"), ControlFile.checkPointCopy.ThisStartUpID);
+	printf(_("Latest checkpoint's TimeLineID:       %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
 	printf(_("Latest checkpoint's NextXID:          %u\n"), ControlFile.checkPointCopy.nextXid);
 	printf(_("Latest checkpoint's NextOID:          %u\n"), ControlFile.checkPointCopy.nextOid);
 	printf(_("Database block size:                  %u\n"), ControlFile.blcksz);
@@ -506,7 +486,7 @@ RewriteControlFile(void)
 
 	ControlFile.checkPointCopy.redo.xlogid = newXlogId;
 	ControlFile.checkPointCopy.redo.xrecoff =
-		newXlogSeg * XLogSegSize + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD;
+		newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
 	ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
 	ControlFile.checkPointCopy.time = time(NULL);
 
@@ -634,8 +614,8 @@ WriteEmptyXLOG(void)
 {
 	char	   *buffer;
 	XLogPageHeader page;
+	XLogLongPageHeader longpage;
 	XLogRecord *record;
-	XLogFileHeaderData *fhdr;
 	crc64		crc;
 	char		path[MAXPGPATH];
 	int			fd;
@@ -648,44 +628,23 @@ WriteEmptyXLOG(void)
 
 	/* Set up the XLOG page header */
 	page->xlp_magic = XLOG_PAGE_MAGIC;
-	page->xlp_info = 0;
-	page->xlp_sui = ControlFile.checkPointCopy.ThisStartUpID;
+	page->xlp_info = XLP_LONG_HEADER;
+	page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
 	page->xlp_pageaddr.xlogid =
 		ControlFile.checkPointCopy.redo.xlogid;
 	page->xlp_pageaddr.xrecoff =
-		ControlFile.checkPointCopy.redo.xrecoff -
-		(SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD);
+		ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
+	longpage = (XLogLongPageHeader) page;
+	longpage->xlp_sysid = ControlFile.system_identifier;
+	longpage->xlp_seg_size = XLogSegSize;
 
-	/* Insert the file header record */
-	record = (XLogRecord *) ((char *) page + SizeOfXLogPHD);
+	/* Insert the initial checkpoint record */
+	record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
 	record->xl_prev.xlogid = 0;
 	record->xl_prev.xrecoff = 0;
 	record->xl_xact_prev.xlogid = 0;
 	record->xl_xact_prev.xrecoff = 0;
 	record->xl_xid = InvalidTransactionId;
-	record->xl_len = SizeOfXLogFHD;
-	record->xl_info = XLOG_FILE_HEADER;
-	record->xl_rmid = RM_XLOG_ID;
-	fhdr = (XLogFileHeaderData *) XLogRecGetData(record);
-	fhdr->xlfhd_sysid = ControlFile.system_identifier;
-	fhdr->xlfhd_xlogid = page->xlp_pageaddr.xlogid;
-	fhdr->xlfhd_segno = page->xlp_pageaddr.xrecoff / XLogSegSize;
-	fhdr->xlfhd_seg_size = XLogSegSize;
-
-	INIT_CRC64(crc);
-	COMP_CRC64(crc, fhdr, SizeOfXLogFHD);
-	COMP_CRC64(crc, (char *) record + sizeof(crc64),
-			   SizeOfXLogRecord - sizeof(crc64));
-	FIN_CRC64(crc);
-	record->xl_crc = crc;
-
-	/* Insert the initial checkpoint record */
-	record = (XLogRecord *) ((char *) page + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD);
-	record->xl_prev.xlogid = page->xlp_pageaddr.xlogid;
-	record->xl_prev.xrecoff = page->xlp_pageaddr.xrecoff + SizeOfXLogPHD;
-	record->xl_xact_prev.xlogid = 0;
-	record->xl_xact_prev.xrecoff = 0;
-	record->xl_xid = InvalidTransactionId;
 	record->xl_len = sizeof(CheckPoint);
 	record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
 	record->xl_rmid = RM_XLOG_ID;
@@ -700,7 +659,8 @@ WriteEmptyXLOG(void)
 	record->xl_crc = crc;
 
 	/* Write the first page */
-	XLogFileName(path, newXlogId, newXlogSeg);
+	XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
+				 newXlogId, newXlogSeg);
 
 	unlink(path);
 
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 714518d308cba2b894925787d604471368a0b4e3..458b3012adfcf5600cdf12e66295cd47291d1345 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.65 2004/07/17 03:30:38 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.66 2004/07/21 22:31:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -15,6 +15,7 @@
 #define XACT_H
 
 #include "access/xlog.h"
+#include "storage/relfilenode.h"
 #include "utils/nabstime.h"
 
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 630a62d77a965c010f05cecd85c29b3d7917ad99..f2542d6fc7143f59357716cec87a605f9fd0bc57 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.53 2004/07/19 02:47:13 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.54 2004/07/21 22:31:25 tgl Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
@@ -14,7 +14,7 @@
 #include "access/rmgr.h"
 #include "access/transam.h"
 #include "access/xlogdefs.h"
-#include "storage/bufmgr.h"
+#include "storage/buf.h"
 #include "utils/pg_crc.h"
 
 
@@ -76,107 +76,6 @@ typedef struct XLogRecord
 #define XLOG_NO_TRAN			XLR_INFO_MASK
 
 /*
- * Header info for a backup block appended to an XLOG record.
- *
- * Note that the backup block has its own CRC, and is not covered by
- * the CRC of the XLOG record proper.  Also note that we don't attempt
- * to align either the BkpBlock struct or the block's data.
- */
-typedef struct BkpBlock
-{
-	crc64		crc;
-	RelFileNode node;
-	BlockNumber block;
-} BkpBlock;
-
-/*
- * When there is not enough space on current page for whole record, we
- * continue on the next page with continuation record.	(However, the
- * XLogRecord header will never be split across pages; if there's less than
- * SizeOfXLogRecord space left at the end of a page, we just waste it.)
- *
- * Note that xl_rem_len includes backup-block data, unlike xl_len in the
- * initial header.
- */
-typedef struct XLogContRecord
-{
-	uint32		xl_rem_len;		/* total len of remaining data for record */
-
-	/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
-
-} XLogContRecord;
-
-#define SizeOfXLogContRecord	MAXALIGN(sizeof(XLogContRecord))
-
-/*
- * Each page of XLOG file has a header like this:
- */
-#define XLOG_PAGE_MAGIC 0xD05B	/* can be used as WAL version indicator */
-
-typedef struct XLogPageHeaderData
-{
-	uint16		xlp_magic;		/* magic value for correctness checks */
-	uint16		xlp_info;		/* flag bits, see below */
-	StartUpID	xlp_sui;		/* StartUpID of first record on page */
-	XLogRecPtr	xlp_pageaddr;	/* XLOG address of this page */
-} XLogPageHeaderData;
-
-#define SizeOfXLogPHD	MAXALIGN(sizeof(XLogPageHeaderData))
-
-typedef XLogPageHeaderData *XLogPageHeader;
-
-/* When record crosses page boundary, set this flag in new page's header */
-#define XLP_FIRST_IS_CONTRECORD		0x0001
-/* All defined flag bits in xlp_info (used for validity checking of header) */
-#define XLP_ALL_FLAGS				0x0001
-
-/*
- * We break each logical log file (xlogid value) into segment files of the
- * size indicated by XLOG_SEG_SIZE.  One possible segment at the end of each
- * log file is wasted, to ensure that we don't have problems representing
- * last-byte-position-plus-1.
- */
-#define XLogSegSize		((uint32) XLOG_SEG_SIZE)
-#define XLogSegsPerFile (((uint32) 0xffffffff) / XLogSegSize)
-#define XLogFileSize	(XLogSegsPerFile * XLogSegSize)
-
-/*
- * The first XLOG record in each segment file is always an XLOG_FILE_HEADER
- * record.  This record does nothing as far as XLOG replay is concerned,
- * but it is useful for verifying that we haven't mixed up XLOG segment files.
- * The body of an XLOG_FILE_HEADER record is a struct XLogFileHeaderData.
- * Note: the xlogid/segno fields are really redundant with xlp_pageaddr in
- * the page header, but we store them anyway as an extra check.
- */
-typedef struct XLogFileHeaderData
-{
-	uint64		xlfhd_sysid;	/* system identifier from pg_control */
-	uint32		xlfhd_xlogid;	/* logical log file # */
-	uint32		xlfhd_segno;	/* segment number within logical log file */
-	uint32		xlfhd_seg_size;	/* just as a cross-check */
-} XLogFileHeaderData;
-
-#define SizeOfXLogFHD	MAXALIGN(sizeof(XLogFileHeaderData))
-
-
-/*
- * Method table for resource managers.
- *
- * RmgrTable[] is indexed by RmgrId values (see rmgr.h).
- */
-typedef struct RmgrData
-{
-	const char *rm_name;
-	void		(*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
-	void		(*rm_undo) (XLogRecPtr lsn, XLogRecord *rptr);
-	void		(*rm_desc) (char *buf, uint8 xl_info, char *rec);
-	void		(*rm_startup) (void);
-	void		(*rm_cleanup) (void);
-} RmgrData;
-
-extern RmgrData RmgrTable[];
-
-/*--------------------
  * List of these structs is used to pass data to XLogInsert().
  *
  * If buffer is valid then XLOG will check if buffer must be backed up
@@ -188,7 +87,6 @@ extern RmgrData RmgrTable[];
  * the XLOG record, since we assume it's present in the buffer.  Therefore,
  * rmgr redo routines MUST pay attention to XLR_BKP_BLOCK_X to know what
  * is actually stored in the XLOG record.
- *--------------------
  */
 typedef struct XLogRecData
 {
@@ -198,7 +96,7 @@ typedef struct XLogRecData
 	struct XLogRecData *next;
 } XLogRecData;
 
-extern StartUpID ThisStartUpID; /* current SUI */
+extern TimeLineID ThisTimeLineID; /* current TLI */
 extern bool InRecovery;
 extern XLogRecPtr MyLastRecPtr;
 extern bool MyXactMadeXLogEntry;
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..09877bf64be89e12416d8af8653a64c27086801e
--- /dev/null
+++ b/src/include/access/xlog_internal.h
@@ -0,0 +1,224 @@
+/*
+ * xlog_internal.h
+ *
+ * PostgreSQL transaction log internal declarations
+ *
+ * NOTE: this file is intended to contain declarations useful for
+ * manipulating the XLOG files directly, but it is not supposed to be
+ * needed by rmgr routines (redo/undo support for individual record types).
+ * So the XLogRecord typedef and associated stuff appear in xlog.h.
+ *
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.1 2004/07/21 22:31:25 tgl Exp $
+ */
+#ifndef XLOG_INTERNAL_H
+#define XLOG_INTERNAL_H
+
+#include "access/xlog.h"
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+
+
+/*
+ * Header info for a backup block appended to an XLOG record.
+ *
+ * Note that the backup block has its own CRC, and is not covered by
+ * the CRC of the XLOG record proper.  Also note that we don't attempt
+ * to align either the BkpBlock struct or the block's data.
+ */
+typedef struct BkpBlock
+{
+	crc64		crc;
+	RelFileNode node;
+	BlockNumber block;
+} BkpBlock;
+
+/*
+ * When there is not enough space on current page for whole record, we
+ * continue on the next page with continuation record.	(However, the
+ * XLogRecord header will never be split across pages; if there's less than
+ * SizeOfXLogRecord space left at the end of a page, we just waste it.)
+ *
+ * Note that xl_rem_len includes backup-block data, unlike xl_len in the
+ * initial header.
+ */
+typedef struct XLogContRecord
+{
+	uint32		xl_rem_len;		/* total len of remaining data for record */
+
+	/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
+
+} XLogContRecord;
+
+#define SizeOfXLogContRecord	MAXALIGN(sizeof(XLogContRecord))
+
+/*
+ * Each page of XLOG file has a header like this:
+ */
+#define XLOG_PAGE_MAGIC 0xD05B	/* can be used as WAL version indicator */
+
+typedef struct XLogPageHeaderData
+{
+	uint16		xlp_magic;		/* magic value for correctness checks */
+	uint16		xlp_info;		/* flag bits, see below */
+	TimeLineID	xlp_tli;		/* TimeLineID of first record on page */
+	XLogRecPtr	xlp_pageaddr;	/* XLOG address of this page */
+} XLogPageHeaderData;
+
+#define SizeOfXLogShortPHD	MAXALIGN(sizeof(XLogPageHeaderData))
+
+typedef XLogPageHeaderData *XLogPageHeader;
+
+/*
+ * When the XLP_LONG_HEADER flag is set, we store additional fields in the
+ * page header.  (This is ordinarily done just in the first page of an
+ * XLOG file.)  The additional fields serve to identify the file accurately.
+ */
+typedef struct XLogLongPageHeaderData
+{
+	XLogPageHeaderData std;		/* standard header fields */
+	uint64		xlp_sysid;		/* system identifier from pg_control */
+	uint32		xlp_seg_size;	/* just as a cross-check */
+} XLogLongPageHeaderData;
+
+#define SizeOfXLogLongPHD	MAXALIGN(sizeof(XLogLongPageHeaderData))
+
+typedef XLogLongPageHeaderData *XLogLongPageHeader;
+
+/* When record crosses page boundary, set this flag in new page's header */
+#define XLP_FIRST_IS_CONTRECORD		0x0001
+/* This flag indicates a "long" page header */
+#define XLP_LONG_HEADER				0x0002
+/* All defined flag bits in xlp_info (used for validity checking of header) */
+#define XLP_ALL_FLAGS				0x0003
+
+#define XLogPageHeaderSize(hdr)		\
+	(((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD)
+
+/*
+ * We break each logical log file (xlogid value) into segment files of the
+ * size indicated by XLOG_SEG_SIZE.  One possible segment at the end of each
+ * log file is wasted, to ensure that we don't have problems representing
+ * last-byte-position-plus-1.
+ */
+#define XLogSegSize		((uint32) XLOG_SEG_SIZE)
+#define XLogSegsPerFile (((uint32) 0xffffffff) / XLogSegSize)
+#define XLogFileSize	(XLogSegsPerFile * XLogSegSize)
+
+
+/*
+ * Macros for manipulating XLOG pointers
+ */
+
+/* Increment an xlogid/segment pair */
+#define NextLogSeg(logId, logSeg)	\
+	do { \
+		if ((logSeg) >= XLogSegsPerFile-1) \
+		{ \
+			(logId)++; \
+			(logSeg) = 0; \
+		} \
+		else \
+			(logSeg)++; \
+	} while (0)
+
+/* Decrement an xlogid/segment pair (assume it's not 0,0) */
+#define PrevLogSeg(logId, logSeg)	\
+	do { \
+		if (logSeg) \
+			(logSeg)--; \
+		else \
+		{ \
+			(logId)--; \
+			(logSeg) = XLogSegsPerFile-1; \
+		} \
+	} while (0)
+
+/*
+ * Compute ID and segment from an XLogRecPtr.
+ *
+ * For XLByteToSeg, do the computation at face value.  For XLByteToPrevSeg,
+ * a boundary byte is taken to be in the previous segment.	This is suitable
+ * for deciding which segment to write given a pointer to a record end,
+ * for example.  (We can assume xrecoff is not zero, since no valid recptr
+ * can have that.)
+ */
+#define XLByteToSeg(xlrp, logId, logSeg)	\
+	( logId = (xlrp).xlogid, \
+	  logSeg = (xlrp).xrecoff / XLogSegSize \
+	)
+#define XLByteToPrevSeg(xlrp, logId, logSeg)	\
+	( logId = (xlrp).xlogid, \
+	  logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
+	)
+
+/*
+ * Is an XLogRecPtr within a particular XLOG segment?
+ *
+ * For XLByteInSeg, do the computation at face value.  For XLByteInPrevSeg,
+ * a boundary byte is taken to be in the previous segment.
+ */
+#define XLByteInSeg(xlrp, logId, logSeg)	\
+	((xlrp).xlogid == (logId) && \
+	 (xlrp).xrecoff / XLogSegSize == (logSeg))
+
+#define XLByteInPrevSeg(xlrp, logId, logSeg)	\
+	((xlrp).xlogid == (logId) && \
+	 ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
+
+/* Check if an xrecoff value is in a plausible range */
+#define XRecOffIsValid(xrecoff) \
+		((xrecoff) % BLCKSZ >= SizeOfXLogShortPHD && \
+		(BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)
+
+/*
+ * These macros encapsulate knowledge about the exact layout of XLog file
+ * names, timeline history file names, and archive-status file names.
+ */
+#define MAXFNAMELEN		32
+
+#define XLogFileName(fname, tli, log, seg)	\
+	snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, log, seg)
+
+#define XLogFilePath(path, tli, log, seg)	\
+	snprintf(path, MAXPGPATH, "%s/%08X%08X%08X", XLogDir, tli, log, seg)
+
+#define TLHistoryFileName(fname, tli)	\
+	snprintf(fname, MAXFNAMELEN, "%08X.history", tli)
+
+#define TLHistoryFilePath(path, tli)	\
+	snprintf(path, MAXPGPATH, "%s/%08X.history", XLogDir, tli)
+
+#define StatusFilePath(path, xlog, suffix)	\
+	snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix)
+
+extern char XLogDir[MAXPGPATH];
+
+/*
+ * _INTL_MAXLOGRECSZ: max space needed for a record including header and
+ * any backup-block data.
+ */
+#define _INTL_MAXLOGRECSZ	(SizeOfXLogRecord + MAXLOGRECSZ + \
+							 XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+
+
+/*
+ * Method table for resource managers.
+ *
+ * RmgrTable[] is indexed by RmgrId values (see rmgr.h).
+ */
+typedef struct RmgrData
+{
+	const char *rm_name;
+	void		(*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
+	void		(*rm_undo) (XLogRecPtr lsn, XLogRecord *rptr);
+	void		(*rm_desc) (char *buf, uint8 xl_info, char *rec);
+	void		(*rm_startup) (void);
+	void		(*rm_cleanup) (void);
+} RmgrData;
+
+extern const RmgrData RmgrTable[];
+
+#endif   /* XLOG_INTERNAL_H */
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index 057236e4eda83aea0f8a0de4d6c334ae4113d7df..4507723e5a8ac16c4f75021db7b30ff35cba8195 100644
--- a/src/include/access/xlogdefs.h
+++ b/src/include/access/xlogdefs.h
@@ -2,12 +2,12 @@
  * xlogdefs.h
  *
  * Postgres transaction log manager record pointer and
- * system startup number definitions
+ * timeline number definitions
  *
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.11 2003/12/20 17:31:21 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.12 2004/07/21 22:31:25 tgl Exp $
  */
 #ifndef XLOG_DEFS_H
 #define XLOG_DEFS_H
@@ -33,12 +33,6 @@ typedef struct XLogRecPtr
 	uint32		xrecoff;		/* byte offset of location in log file */
 } XLogRecPtr;
 
-typedef struct XLogwrtResult
-{
-	XLogRecPtr	Write;			/* last byte + 1 written out */
-	XLogRecPtr	Flush;			/* last byte + 1 flushed */
-} XLogwrtResult;
-
 
 /*
  * Macros for comparing XLogRecPtrs
@@ -57,10 +51,16 @@ typedef struct XLogwrtResult
 #define XLByteEQ(a, b)		\
 			((a).xlogid == (b).xlogid && (a).xrecoff == (b).xrecoff)
 
+
 /*
- * StartUpID (SUI) - system startups counter. It's to allow removing
- * pg_clog after shutdown, in future.
+ * TimeLineID (TLI) - identifies different database histories to prevent
+ * confusion after restoring a prior state of a database installation.
+ * TLI does not change in a normal stop/restart of the database (including
+ * crash-and-recover cases); but we must assign a new TLI after doing
+ * a recovery to a prior state, a/k/a point-in-time recovery.  This makes
+ * the new WAL logfile sequence we generate distinguishable from the
+ * sequence that was generated in the previous incarnation.
  */
-typedef uint32 StartUpID;
+typedef uint32 TimeLineID;
 
 #endif   /* XLOG_DEFS_H */
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index 8b1dc671fa0e8128a3893faa74d09b458f6b6a87..a5b8f30978ea0883f4db4de2aabbc738e2cc31ef 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -6,19 +6,15 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.14 2004/02/11 22:55:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.15 2004/07/21 22:31:25 tgl Exp $
  */
 #ifndef XLOG_UTILS_H
 #define XLOG_UTILS_H
 
 #include "access/rmgr.h"
 #include "storage/buf.h"
-#include "storage/itemptr.h"
 #include "utils/rel.h"
 
-extern int XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr,
-				   TransactionId xid, CommandId cid);
-extern bool XLogIsValidTuple(RelFileNode hnode, ItemPointer iptr);
 
 extern void XLogInitRelationCache(void);
 extern void XLogCloseRelationCache(void);
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index 2319da9b8e8af5a2ca43fcf103f736036d8e6a05..2f5d51ff989a3587ae5c394ee61e90b5a8c292d4 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.15 2004/06/03 02:08:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.16 2004/07/21 22:31:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,7 +22,7 @@
 
 
 /* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION	73
+#define PG_CONTROL_VERSION	74
 
 /*
  * Body of CheckPoint XLOG records.  This is declared here because we keep
@@ -30,13 +30,13 @@
  */
 typedef struct CheckPoint
 {
-	XLogRecPtr	redo;			/* next RecPtr available when we */
-	/* began to create CheckPoint */
-	/* (i.e. REDO start point) */
-	XLogRecPtr	undo;			/* first record of oldest in-progress */
-	/* transaction when we started */
-	/* (i.e. UNDO end point) */
-	StartUpID	ThisStartUpID;	/* current SUI */
+	XLogRecPtr	redo;			/* next RecPtr available when we
+								 * began to create CheckPoint
+								 * (i.e. REDO start point) */
+	XLogRecPtr	undo;			/* first record of oldest in-progress
+								 * transaction when we started
+								 * (i.e. UNDO end point) */
+	TimeLineID	ThisTimeLineID;	/* current TLI */
 	TransactionId nextXid;		/* next free XID */
 	Oid			nextOid;		/* next free OID */
 	time_t		time;			/* time stamp of checkpoint */
@@ -46,8 +46,6 @@ typedef struct CheckPoint
 #define XLOG_CHECKPOINT_SHUTDOWN		0x00
 #define XLOG_CHECKPOINT_ONLINE			0x10
 #define XLOG_NEXTOID					0x30
-#define XLOG_FILE_HEADER				0x40
-#define XLOG_WASTED_SPACE				0x50
 
 
 /* System status indicator */
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 727ec508a3b1ecd9620d5ea6eea34c0cac7ef876..21148c9937b760167cfd41487f9ac84d6e65c86f 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.59 2004/07/01 00:51:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.60 2004/07/21 22:31:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,13 +87,22 @@ typedef uint16 LocationIndex;
 
 /*
  * disk page organization
+ *
  * space management information generic to any page
  *
+ *		pd_lsn		- identifies xlog record for last change to this page.
+ *		pd_tli		- ditto.
  *		pd_lower	- offset to start of free space.
  *		pd_upper	- offset to end of free space.
  *		pd_special	- offset to start of special space.
  *		pd_pagesize_version - size in bytes and page layout version number.
  *
+ * The LSN is used by the buffer manager to enforce the basic rule of WAL:
+ * "thou shalt write xlog before data".  A dirty buffer cannot be dumped
+ * to disk until xlog has been flushed at least as far as the page's LSN.
+ * We also store the TLI for identification purposes (it is not clear that
+ * this is actually necessary, but it seems like a good idea).
+ *
  * The page version number and page size are packed together into a single
  * uint16 field.  This is for historical reasons: before PostgreSQL 7.3,
  * there was no concept of a page version number, and doing it this way
@@ -109,13 +118,10 @@ typedef uint16 LocationIndex;
  */
 typedef struct PageHeaderData
 {
-	/* XXX LSN is member of *any* block, not */
-	/* only page-organized - 'll change later */
-	XLogRecPtr	pd_lsn;			/* LSN: next byte after last byte of xlog */
-	/* record for last change of this page */
-	StartUpID	pd_sui;			/* SUI of last changes (currently it's */
-	/* used by heap AM only) */
-
+	/* XXX LSN is member of *any* block, not only page-organized ones */
+	XLogRecPtr	pd_lsn;			/* LSN: next byte after last byte of xlog
+								 * record for last change to this page */
+	TimeLineID	pd_tli;			/* TLI of last change */
 	LocationIndex pd_lower;		/* offset to start of free space */
 	LocationIndex pd_upper;		/* offset to end of free space */
 	LocationIndex pd_special;	/* offset to start of special space */
@@ -298,10 +304,10 @@ typedef PageHeaderData *PageHeader;
 #define PageSetLSN(page, lsn) \
 	(((PageHeader) (page))->pd_lsn = (lsn))
 
-#define PageGetSUI(page) \
-	(((PageHeader) (page))->pd_sui)
-#define PageSetSUI(page, sui) \
-	(((PageHeader) (page))->pd_sui = (StartUpID) (sui))
+#define PageGetTLI(page) \
+	(((PageHeader) (page))->pd_tli)
+#define PageSetTLI(page, tli) \
+	(((PageHeader) (page))->pd_tli = (tli))
 
 /* ----------------------------------------------------------------
  *		extern declarations