diff --git a/doc/src/sgml/ref/pg_resetxlog.sgml b/doc/src/sgml/ref/pg_resetxlog.sgml
index f5915adacd43820209dbf5d171055d8ffca61933..f4caa8b80e6627cf59f5012c8a2308528c852f00 100644
--- a/doc/src/sgml/ref/pg_resetxlog.sgml
+++ b/doc/src/sgml/ref/pg_resetxlog.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/pg_resetxlog.sgml,v 1.10 2005/04/28 21:47:10 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/pg_resetxlog.sgml,v 1.11 2005/06/08 15:50:21 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -23,6 +23,7 @@ PostgreSQL documentation
    <arg> -o <replaceable class="parameter">oid</replaceable> </arg>
    <arg> -x <replaceable class="parameter">xid</replaceable> </arg>
    <arg> -m <replaceable class="parameter">mxid</replaceable> </arg>
+   <arg> -O <replaceable class="parameter">mxoff</replaceable> </arg>
    <arg> -l <replaceable class="parameter">timelineid</replaceable>,<replaceable class="parameter">fileid</replaceable>,<replaceable class="parameter">seg</replaceable> </arg>
    <arg choice="plain"><replaceable>datadir</replaceable></arg>
   </cmdsynopsis>
@@ -32,8 +33,8 @@ PostgreSQL documentation
   <title>Description</title>
   <para>
    <command>pg_resetxlog</command> clears the write-ahead log (WAL) and
-   optionally resets some other control information (stored in the
-   <filename>pg_control</> file).  This function is sometimes needed
+   optionally resets some other control information stored in the
+   <filename>pg_control</> file.  This function is sometimes needed
    if these files have become corrupted.  It should be used only as a
    last resort, when the server will not start due to such corruption.
   </para>
@@ -60,8 +61,9 @@ PostgreSQL documentation
    by specifying the <literal>-f</> (force) switch.  In this case plausible
    values will be substituted for the missing data.  Most of the fields can be
    expected to match, but manual assistance may be needed for the next OID,
-   next transaction ID, WAL starting address, and database locale fields.
-   The first three of these can be set using the switches discussed below.
+   next transaction ID, next multi-transaction ID and offset,
+   WAL starting address, and database locale fields.
+   The first five of these can be set using the switches discussed below.
    <command>pg_resetxlog</command>'s own environment is the source for its
    guess at the locale fields; take care that <envar>LANG</> and so forth
    match the environment that <command>initdb</> was run in.
@@ -74,9 +76,10 @@ PostgreSQL documentation
   </para>
 
   <para>
-   The <literal>-o</>, <literal>-x</>, <literal>-m</>, and <literal>-l</>
+   The <literal>-o</>, <literal>-x</>, <literal>-m</>, <literal>-O</>,
+   and <literal>-l</>
    switches allow the next OID, next transaction ID, next multi-transaction
-   ID, and WAL starting address values to
+   ID, next multi-transaction offset, and WAL starting address values to
    be set manually.  These are only needed when
    <command>pg_resetxlog</command> is unable to determine appropriate values
    by reading <filename>pg_control</>.  Safe values may be determined as
@@ -108,6 +111,17 @@ PostgreSQL documentation
      </para>
     </listitem>
 
+    <listitem>
+     <para>
+      A safe value for the next multi-transaction offset (<literal>-O</>)
+      may be determined by looking for the numerically largest
+      file name in the directory <filename>pg_multixact/members</> under the
+      data directory, adding one, and then multiplying by 65536.  As above,
+      the file names are in hexadecimal, so the easiest way to do this is to
+      specify the switch value in hexadecimal and add four zeroes.
+     </para>
+    </listitem>
+
     <listitem>
      <para>
       The WAL starting address (<literal>-l</>) should be
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 433a4b4538cd40cebc39c94725111bb9ef494b93..74f76c1d16aeeef14506ca83d78b4b6a9a809874 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.193 2005/06/06 20:22:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.194 2005/06/08 15:50:21 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -2219,6 +2219,8 @@ l3:
 	 * Else the same IDs might be re-used after a crash, which would be
 	 * disastrous if this page made it to disk before the crash.  Essentially
 	 * we have to enforce the WAL log-before-data rule even in this case.
+	 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
+	 * entries for everything anyway.)
 	 */
 	if (!relation->rd_istemp)
 	{
@@ -2228,6 +2230,8 @@ l3:
 
 		xlrec.target.node = relation->rd_node;
 		xlrec.target.tid = tuple->t_self;
+		xlrec.locking_xid = xid;
+		xlrec.xid_is_mxact = ((new_infomask & HEAP_XMAX_IS_MULTI) != 0);
 		xlrec.shared_lock = (mode == LockTupleShared);
 		rdata[0].data = (char *) &xlrec;
 		rdata[0].len = SizeOfHeapLock;
@@ -2900,17 +2904,18 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
 
 	htup = (HeapTupleHeader) PageGetItem(page, lp);
 
-	/*
-	 * Presently, we don't bother to restore the locked state, but
-	 * just set the XMAX_INVALID bit.
-	 */
 	htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
 						  HEAP_XMAX_INVALID |
 						  HEAP_XMAX_IS_MULTI |
 						  HEAP_IS_LOCKED |
 						  HEAP_MOVED);
-	htup->t_infomask |= HEAP_XMAX_INVALID;
-	HeapTupleHeaderSetXmax(htup, record->xl_xid);
+	if (xlrec->xid_is_mxact)
+		htup->t_infomask |= HEAP_XMAX_IS_MULTI;
+	if (xlrec->shared_lock)
+		htup->t_infomask |= HEAP_XMAX_SHARED_LOCK;
+	else
+		htup->t_infomask |= HEAP_XMAX_EXCL_LOCK;
+	HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
 	HeapTupleHeaderSetCmax(htup, FirstCommandId);
 	/* Make sure there is no forward chain link in t_ctid */
 	htup->t_ctid = xlrec->target.tid;
@@ -3010,6 +3015,11 @@ heap_desc(char *buf, uint8 xl_info, char *rec)
 			strcat(buf, "shared_lock: ");
 		else
 			strcat(buf, "exclusive_lock: ");
+		if (xlrec->xid_is_mxact)
+			strcat(buf, "mxid ");
+		else
+			strcat(buf, "xid ");
+		sprintf(buf + strlen(buf), "%u ", xlrec->locking_xid);
 		out_target(buf, &(xlrec->target));
 	}
 	else
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 85acfe2cc0207e4c52d69123f325c67bf9eae099..41773625af4cecf351b84db4ae3e034468e4ed01 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -10,7 +10,7 @@
  * tuple to be unlocked can sleep on the potentially-several TransactionIds
  * that compose the MultiXactId.
  *
- * We use two SLRU areas, one for storing the offsets on which the data
+ * We use two SLRU areas, one for storing the offsets at which the data
  * starts for each MultiXactId in the other one.  This trick allows us to
  * store variable length arrays of TransactionIds.  (We could alternatively
  * use one area containing counts and TransactionIds, with valid MultiXactId
@@ -18,20 +18,31 @@
  * since it would get completely confused if someone inquired about a bogus
  * MultiXactId that pointed to an intermediate slot containing an XID.)
  *
- * This code is based on subtrans.c; see it for additional discussion.
- * Like the subtransaction manager, we only need to remember multixact
- * information for currently-open transactions.  Thus, there is
- * no need to preserve data over a crash and restart.
+ * XLOG interactions: this module generates an XLOG record whenever a new
+ * OFFSETs or MEMBERs page is initialized to zeroes, as well as an XLOG record
+ * whenever a new MultiXactId is defined.  This allows us to completely
+ * rebuild the data entered since the last checkpoint during XLOG replay.
+ * Because this is possible, we need not follow the normal rule of
+ * "write WAL before data"; the only correctness guarantee needed is that
+ * we flush and sync all dirty OFFSETs and MEMBERs pages to disk before a
+ * checkpoint is considered complete.  If a page does make it to disk ahead
+ * of corresponding WAL records, it will be forcibly zeroed before use anyway.
+ * Therefore, we don't need to mark our pages with LSN information; we have
+ * enough synchronization already.
+ *
+ * Like clog.c, and unlike subtrans.c, we have to preserve state across
+ * crashes and ensure that MXID and offset numbering increases monotonically
+ * across a crash.  We do this in the same way as it's done for transaction
+ * IDs: the WAL record is guaranteed to contain evidence of every MXID we
+ * could need to worry about, and we just make sure that at the end of
+ * replay, the next-MXID and next-offset counters are at least as large as
+ * anything we saw during replay.
  *
- * The only XLOG interaction we need to take care of is that generated
- * MultiXactId values must continue to increase across a system crash.
- * Thus we log groups of MultiXactIds acquisition in the same fashion we do
- * for Oids (see XLogPutNextMultiXactId).
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.4 2005/05/19 21:35:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.5 2005/06/08 15:50:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -51,8 +62,8 @@
  * Defines for MultiXactOffset page sizes.  A page is the same BLCKSZ as is
  * used everywhere else in Postgres.
  *
- * Note: because both uint32 and TransactionIds are 32 bits and wrap around at
- * 0xFFFFFFFF, MultiXact page numbering also wraps around at
+ * Note: because both MultiXactOffsets and TransactionIds are 32 bits and
+ * wrap around at 0xFFFFFFFF, MultiXact page numbering also wraps around at
  * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE, and segment numbering at
  * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE.  We need take no
  * explicit notice of that fact in this module, except when comparing segment
@@ -61,21 +72,19 @@
  */
 
 /* We need four bytes per offset and also four bytes per member */
-#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(uint32))
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
 #define MULTIXACT_MEMBERS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
 
 #define MultiXactIdToOffsetPage(xid) \
-	((xid) / (uint32) MULTIXACT_OFFSETS_PER_PAGE)
+	((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
 #define MultiXactIdToOffsetEntry(xid) \
-	((xid) % (uint32) MULTIXACT_OFFSETS_PER_PAGE)
+	((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
 
 #define MXOffsetToMemberPage(xid) \
 	((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
 #define MXOffsetToMemberEntry(xid) \
 	((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
 
-/* Arbitrary number of MultiXactIds to allocate at each XLog call */
-#define MXACT_PREFETCH	8192
 
 /*
  * Links to shared-memory data structures for MultiXact control
@@ -98,11 +107,8 @@ typedef struct MultiXactStateData
 	/* next-to-be-assigned MultiXactId */
 	MultiXactId		nextMXact;
 
-	/* MultiXactIds we have left before logging more */
-	uint32			mXactCount;
-
 	/* next-to-be-assigned offset */
-	uint32			nextOffset;
+	MultiXactOffset	nextOffset;
 
 	/* the Offset SLRU area was last truncated at this MultiXactId */
 	MultiXactId		lastTruncationPoint;
@@ -161,7 +167,8 @@ static MultiXactId *OldestVisibleMXactId;
  * for this being that most entries will contain our own TransactionId and
  * so they will be uninteresting by the time our next transaction starts.
  * (XXX not clear that this is correct --- other members of the MultiXact
- * could hang around longer than we did.)
+ * could hang around longer than we did.  However, it's not clear what a
+ * better policy for flushing old cache entries would be.)
  *
  * We allocate the cache entries in a memory context that is deleted at
  * transaction end, so we don't need to do retail freeing of entries.
@@ -194,7 +201,9 @@ static MemoryContext	MXactContext = NULL;
 static void MultiXactIdSetOldestVisible(void);
 static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids);
 static int GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids);
-static MultiXactId GetNewMultiXactId(int nxids, uint32 *offset);
+static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
+							   int nxids, TransactionId *xids);
+static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset);
 
 /* MultiXact cache management */
 static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids);
@@ -206,15 +215,17 @@ static char *mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids);
 #endif
 
 /* management of SLRU infrastructure */
-static int	ZeroMultiXactOffsetPage(int pageno);
-static int	ZeroMultiXactMemberPage(int pageno);
+static int	ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
+static int	ZeroMultiXactMemberPage(int pageno, bool writeXlog);
 static bool MultiXactOffsetPagePrecedes(int page1, int page2);
 static bool MultiXactMemberPagePrecedes(int page1, int page2);
 static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
-static bool MultiXactOffsetPrecedes(uint32 offset1, uint32 offset2);
+static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
+									MultiXactOffset offset2);
 static void ExtendMultiXactOffset(MultiXactId multi);
-static void ExtendMultiXactMember(uint32 offset);
+static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
 static void TruncateMultiXact(void);
+static void WriteMZeroPageXlogRec(int pageno, uint8 info);
 
 
 /*
@@ -551,8 +562,8 @@ MultiXactIdWait(MultiXactId multi)
  * CreateMultiXactId
  * 		Make a new MultiXactId
  *
- * Make SLRU and cache entries for a new MultiXactId, recording the given
- * TransactionIds as members.  Returns the newly created MultiXactId.
+ * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
+ * given TransactionIds as members.  Returns the newly created MultiXactId.
  *
  * NB: the passed xids[] array will be sorted in-place.
  */
@@ -560,13 +571,9 @@ static MultiXactId
 CreateMultiXactId(int nxids, TransactionId *xids)
 {
 	MultiXactId	multi;
-	int			pageno;
-	int			prev_pageno;
-	int			entryno;
-	int			slotno;
-	uint32	   *offptr;
-	uint32		offset;
-	int			i;
+	MultiXactOffset offset;
+	XLogRecData rdata[2];
+	xl_multixact_create xlrec;
 
 	debug_elog3(DEBUG2, "Create: %s",
 				mxid_to_string(InvalidMultiXactId, nxids, xids));
@@ -588,11 +595,70 @@ CreateMultiXactId(int nxids, TransactionId *xids)
 		return multi;
 	}
 
+	/*
+	 * OK, assign the MXID and offsets range to use
+	 */
 	multi = GetNewMultiXactId(nxids, &offset);
 
-	LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
+	debug_elog4(DEBUG2, "Create: assigned id %u offset %u", multi, offset);
+
+	/*
+	 * Make an XLOG entry describing the new MXID.
+	 *
+	 * Note: we need not flush this XLOG entry to disk before proceeding.
+	 * The only way for the MXID to be referenced from any data page is
+	 * for heap_lock_tuple() to have put it there, and heap_lock_tuple()
+	 * generates an XLOG record that must follow ours.  The normal LSN
+	 * interlock between the data page and that XLOG record will ensure
+	 * that our XLOG record reaches disk first.  If the SLRU members/offsets
+	 * data reaches disk sooner than the XLOG record, we do not care because
+	 * we'll overwrite it with zeroes unless the XLOG record is there too;
+	 * see notes at top of this file.
+	 */
+	xlrec.mid = multi;
+	xlrec.moff = offset;
+	xlrec.nxids = nxids;
+
+	rdata[0].data = (char *) (&xlrec);
+	rdata[0].len = MinSizeOfMultiXactCreate;
+	rdata[0].buffer = InvalidBuffer;
+	rdata[0].next = &(rdata[1]);
+	rdata[1].data = (char *) xids;
+	rdata[1].len = nxids * sizeof(TransactionId);
+	rdata[1].buffer = InvalidBuffer;
+	rdata[1].next = NULL;
+
+	(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
+
+	/* Now enter the information into the OFFSETs and MEMBERs logs */
+	RecordNewMultiXact(multi, offset, nxids, xids);
+
+	/* Store the new MultiXactId in the local cache, too */
+	mXactCachePut(multi, nxids, xids);
+
+	debug_elog2(DEBUG2, "Create: all done");
+
+	return multi;
+}
+
+/*
+ * RecordNewMultiXact
+ *		Write info about a new multixact into the offsets and members files
+ *
+ * This is broken out of CreateMultiXactId so that xlog replay can use it.
+ */
+static void
+RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
+				   int nxids, TransactionId *xids)
+{
+	int			pageno;
+	int			prev_pageno;
+	int			entryno;
+	int			slotno;
+	MultiXactOffset *offptr;
+	int			i;
 
-	ExtendMultiXactOffset(multi);
+	LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
 
 	pageno = MultiXactIdToOffsetPage(multi);
 	entryno = MultiXactIdToOffsetEntry(multi);
@@ -605,8 +671,9 @@ CreateMultiXactId(int nxids, TransactionId *xids)
 	 * we'll take the trouble to generalize the slru.c error reporting code.
 	 */
 	slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, multi);
-	offptr = (uint32 *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 	offptr += entryno;
+
 	*offptr = offset;
 
 	MultiXactOffsetCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
@@ -614,8 +681,6 @@ CreateMultiXactId(int nxids, TransactionId *xids)
 	/* Exchange our lock */
 	LWLockRelease(MultiXactOffsetControlLock);
 
-	debug_elog3(DEBUG2, "Create: got offset %u", offset);
-
 	LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
 
 	prev_pageno = -1;
@@ -624,8 +689,6 @@ CreateMultiXactId(int nxids, TransactionId *xids)
 	{
 		TransactionId *memberptr;
 
-		ExtendMultiXactMember(offset);
-
 		pageno = MXOffsetToMemberPage(offset);
 		entryno = MXOffsetToMemberEntry(offset);
 
@@ -640,29 +703,27 @@ CreateMultiXactId(int nxids, TransactionId *xids)
 		memberptr += entryno;
 
 		*memberptr = xids[i];
+
 		MultiXactMemberCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
 	}
 
 	LWLockRelease(MultiXactMemberControlLock);
-
-	/* Store the new MultiXactId in the local cache, too */
-	mXactCachePut(multi, nxids, xids);
-
-	debug_elog2(DEBUG2, "Create: all done");
-
-	return multi;
 }
 
 /*
  * GetNewMultiXactId
  *		Get the next MultiXactId.
  *
- * Get the next MultiXactId, XLogging if needed.  Also, reserve the needed
- * amount of space in the "members" area.  The starting offset of the
- * reserved space is returned in *offset.
+ * Also, reserve the needed amount of space in the "members" area.  The
+ * starting offset of the reserved space is returned in *offset.
+ *
+ * This may generate XLOG records for expansion of the offsets and/or members
+ * files.  Unfortunately, we have to do that while holding MultiXactGenLock
+ * to avoid race conditions --- the XLOG record for zeroing a page must appear
+ * before any backend can possibly try to store data in that page!
  */
 static MultiXactId
-GetNewMultiXactId(int nxids, uint32 *offset)
+GetNewMultiXactId(int nxids, MultiXactOffset *offset)
 {
 	MultiXactId		result;
 
@@ -675,33 +736,33 @@ GetNewMultiXactId(int nxids, uint32 *offset)
 
 	/* Handle wraparound of the nextMXact counter */
 	if (MultiXactState->nextMXact < FirstMultiXactId)
-	{
 		MultiXactState->nextMXact = FirstMultiXactId;
-		MultiXactState->mXactCount = 0;
-	}
-
-	/* If we run out of logged for use multixacts then we must log more */
-	if (MultiXactState->mXactCount == 0)
-	{
-		XLogPutNextMultiXactId(MultiXactState->nextMXact + MXACT_PREFETCH);
-		MultiXactState->mXactCount = MXACT_PREFETCH;
-	}
 
+	/*
+	 * Assign the MXID, and make sure there is room for it in the file.
+	 */
 	result = MultiXactState->nextMXact;
 
+	ExtendMultiXactOffset(result);
+
 	/*
+	 * Advance counter.  As in GetNewTransactionId(), this must not happen
+	 * until after ExtendMultiXactOffset has succeeded!
+	 *
 	 * We don't care about MultiXactId wraparound here; it will be handled by
 	 * the next iteration.  But note that nextMXact may be InvalidMultiXactId
 	 * after this routine exits, so anyone else looking at the variable must
 	 * be prepared to deal with that.
 	 */
 	(MultiXactState->nextMXact)++;
-	(MultiXactState->mXactCount)--;
 
 	/*
-	 * Reserve the members space.
+	 * Reserve the members space.  Same considerations as above.
 	 */
 	*offset = MultiXactState->nextOffset;
+
+	ExtendMultiXactMember(*offset, nxids);
+
 	MultiXactState->nextOffset += nxids;
 
 	LWLockRelease(MultiXactGenLock);
@@ -725,13 +786,13 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
 	int			prev_pageno;
 	int			entryno;
 	int			slotno;
-	uint32	   *offptr;
-	uint32		offset;
+	MultiXactOffset	   *offptr;
+	MultiXactOffset		offset;
 	int			length;
 	int			i;
 	MultiXactId	nextMXact;
 	MultiXactId	tmpMXact;
-	uint32		nextOffset;
+	MultiXactOffset		nextOffset;
 	TransactionId *ptr;
 
 	debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
@@ -799,7 +860,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
 	entryno = MultiXactIdToOffsetEntry(multi);
 
 	slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, multi);
-	offptr = (uint32 *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 	offptr += entryno;
 	offset = *offptr;
 
@@ -829,7 +890,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
 		if (pageno != prev_pageno)
 			slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, tmpMXact);
 
-		offptr = (uint32 *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 		offptr += entryno;
 		length = *offptr - offset;
 	}
@@ -1086,10 +1147,6 @@ MultiXactShmemInit(void)
 	SimpleLruInit(MultiXactMemberCtl, "MultiXactMember Ctl",
 				  MultiXactMemberControlLock, "pg_multixact/members");
 
-	/* Override default assumption that writes should be fsync'd */
-	MultiXactOffsetCtl->do_fsync = false;
-	MultiXactMemberCtl->do_fsync = false;
-
 	/* Initialize our shared state struct */
 	MultiXactState = ShmemInitStruct("Shared MultiXact State",
 									 SHARED_MULTIXACT_STATE_SIZE,
@@ -1116,10 +1173,6 @@ MultiXactShmemInit(void)
  * This func must be called ONCE on system install.  It creates the initial
  * MultiXact segments.  (The MultiXacts directories are assumed to have been
  * created by initdb, and MultiXactShmemInit must have been called already.)
- *
- * Note: it's not really necessary to create the initial segments now,
- * since slru.c would create 'em on first write anyway.	But we may as well
- * do it to be sure the directories are set up correctly.
  */
 void
 BootStrapMultiXact(void)
@@ -1128,8 +1181,10 @@ BootStrapMultiXact(void)
 
 	LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
 
-	/* Offsets first page */
-	slotno = ZeroMultiXactOffsetPage(0);
+	/* Create and zero the first page of the offsets log */
+	slotno = ZeroMultiXactOffsetPage(0, false);
+
+	/* Make sure it's written out */
 	SimpleLruWritePage(MultiXactOffsetCtl, slotno, NULL);
 	Assert(MultiXactOffsetCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
 
@@ -1137,8 +1192,10 @@ BootStrapMultiXact(void)
 
 	LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
 
-	/* Members first page */
-	slotno = ZeroMultiXactMemberPage(0);
+	/* Create and zero the first page of the members log */
+	slotno = ZeroMultiXactMemberPage(0, false);
+
+	/* Make sure it's written out */
 	SimpleLruWritePage(MultiXactMemberCtl, slotno, NULL);
 	Assert(MultiXactMemberCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
 
@@ -1147,6 +1204,7 @@ BootStrapMultiXact(void)
 
 /*
  * Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
+ * If writeXlog is TRUE, also emit an XLOG record saying we did this.
  *
  * The page is not actually written, just set up in shared memory.
  * The slot number of the new page is returned.
@@ -1154,25 +1212,40 @@ BootStrapMultiXact(void)
  * Control lock must be held at entry, and will be held at exit.
  */
 static int
-ZeroMultiXactOffsetPage(int pageno)
+ZeroMultiXactOffsetPage(int pageno, bool writeXlog)
 {
-	return SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+	int			slotno;
+
+	slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+
+	if (writeXlog)
+		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
+
+	return slotno;
 }
 
 /*
  * Ditto, for MultiXactMember
  */
 static int
-ZeroMultiXactMemberPage(int pageno)
+ZeroMultiXactMemberPage(int pageno, bool writeXlog)
 {
-	return SimpleLruZeroPage(MultiXactMemberCtl, pageno);
+	int			slotno;
+
+	slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
+
+	if (writeXlog)
+		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
+
+	return slotno;
 }
 
 /*
  * This must be called ONCE during postmaster or standalone-backend startup.
  *
- * StartupXLOG has already established nextMXact by calling
- * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact.
+ * StartupXLOG has already established nextMXact/nextOffset by calling
+ * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact.  Note that we
+ * may already have replayed WAL data into the SLRU files.
  *
  * We don't need any locks here, really; the SLRU locks are taken
  * only because slru.c expects to be called with locks held.
@@ -1180,68 +1253,76 @@ ZeroMultiXactMemberPage(int pageno)
 void
 StartupMultiXact(void)
 {
-	int			startPage;
-	int			cutoffPage;
-	uint32		offset;
+	MultiXactId multi = MultiXactState->nextMXact;
+	MultiXactOffset offset = MultiXactState->nextOffset;
+	int			pageno;
+	int			entryno;
+
+	/* Clean up offsets state */
+	LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
 
 	/*
-	 * We start nextOffset at zero after every reboot; there is no need to
-	 * avoid offset values that were used in the previous system lifecycle.
+	 * Initialize our idea of the latest page number.
 	 */
-	MultiXactState->nextOffset = 0;
+	pageno = MultiXactIdToOffsetPage(multi);
+	MultiXactOffsetCtl->shared->latest_page_number = pageno;
 
 	/*
-	 * Because of the above, a shutdown and restart is likely to leave
-	 * high-numbered MultiXactMember page files that would not get recycled
-	 * for a long time (about as long as the system had been up in the
-	 * previous cycle of life).  To clean out such page files, we issue an
-	 * artificial truncation call that will zap any page files in the first
-	 * half of the offset cycle.  Should there be any page files in the last
-	 * half, they will get cleaned out by the first checkpoint.
-	 *
-	 * XXX it might be a good idea to disable this when debugging, since it
-	 * will tend to destroy evidence after a crash.  To not be *too* ruthless,
-	 * we arbitrarily spare the first 64 pages.  (Note this will get
-	 * rounded off to a multiple of SLRU_PAGES_PER_SEGMENT ...)
+	 * Zero out the remainder of the current offsets page.  See notes
+	 * in StartupCLOG() for motivation.
 	 */
-	offset = ((~ (uint32) 0) >> 1) + 1;
+	entryno = MultiXactIdToOffsetEntry(multi);
+	if (entryno != 0)
+	{
+		int			slotno;
+		MultiXactOffset	   *offptr;
 
-	cutoffPage = MXOffsetToMemberPage(offset) + 64;
+		slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, multi);
+		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+		offptr += entryno;
 
-	/*
-	 * Defeat safety interlock in SimpleLruTruncate; this hack will be
-	 * cleaned up by ZeroMultiXactMemberPage call below.
-	 */
-	MultiXactMemberCtl->shared->latest_page_number = cutoffPage;
+		MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
 
-	SimpleLruTruncate(MultiXactMemberCtl, cutoffPage);
+		MultiXactOffsetCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
+	}
+
+	LWLockRelease(MultiXactOffsetControlLock);
+
+	/* And the same for members */
+	LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
 
 	/*
-	 * Initialize lastTruncationPoint to invalid, ensuring that the first
-	 * checkpoint will try to do truncation.
+	 * Initialize our idea of the latest page number.
 	 */
-	MultiXactState->lastTruncationPoint = InvalidMultiXactId;
+	pageno = MXOffsetToMemberPage(offset);
+	MultiXactMemberCtl->shared->latest_page_number = pageno;
 
 	/*
-	 * Since we don't expect MultiXact to be valid across crashes, we
-	 * initialize the currently-active pages to zeroes during startup.
-	 * Whenever we advance into a new page, both ExtendMultiXact routines
-	 * will likewise zero the new page without regard to whatever was
-	 * previously on disk.
+	 * Zero out the remainder of the current members page.  See notes
+	 * in StartupCLOG() for motivation.
 	 */
-	LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
-
-	startPage = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
-	(void) ZeroMultiXactOffsetPage(startPage);
+	entryno = MXOffsetToMemberEntry(offset);
+	if (entryno != 0)
+	{
+		int			slotno;
+		TransactionId	   *xidptr;
 
-	LWLockRelease(MultiXactOffsetControlLock);
+		slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, offset);
+		xidptr = (TransactionId *) MultiXactMemberCtl->shared->page_buffer[slotno];
+		xidptr += entryno;
 
-	LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
+		MemSet(xidptr, 0, BLCKSZ - (entryno * sizeof(TransactionId)));
 
-	startPage = MXOffsetToMemberPage(MultiXactState->nextOffset);
-	(void) ZeroMultiXactMemberPage(startPage);
+		MultiXactMemberCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
+	}
 
 	LWLockRelease(MultiXactMemberControlLock);
+
+	/*
+	 * Initialize lastTruncationPoint to invalid, ensuring that the first
+	 * checkpoint will try to do truncation.
+	 */
+	MultiXactState->lastTruncationPoint = InvalidMultiXactId;
 }
 
 /*
@@ -1250,36 +1331,28 @@ StartupMultiXact(void)
 void
 ShutdownMultiXact(void)
 {
-	/*
-	 * Flush dirty MultiXact pages to disk
-	 *
-	 * This is not actually necessary from a correctness point of view. We do
-	 * it merely as a debugging aid.
-	 */
+	/* Flush dirty MultiXact pages to disk */
 	SimpleLruFlush(MultiXactOffsetCtl, false);
 	SimpleLruFlush(MultiXactMemberCtl, false);
 }
 
 /*
- * Get the next MultiXactId to save in a checkpoint record
+ * Get the next MultiXactId and offset to save in a checkpoint record
  */
-MultiXactId
-MultiXactGetCheckptMulti(bool is_shutdown)
+void
+MultiXactGetCheckptMulti(bool is_shutdown,
+						 MultiXactId *nextMulti,
+						 MultiXactOffset *nextMultiOffset)
 {
-	MultiXactId	retval;
-
 	LWLockAcquire(MultiXactGenLock, LW_SHARED);
 
-	retval = MultiXactState->nextMXact;
-	if (!is_shutdown)
-		retval += MultiXactState->mXactCount;
+	*nextMulti = MultiXactState->nextMXact;
+	*nextMultiOffset = MultiXactState->nextOffset;
 
 	LWLockRelease(MultiXactGenLock);
 
-	debug_elog3(DEBUG2, "MultiXact: MultiXact for checkpoint record is %u",
-				retval);
-
-	return retval;
+	debug_elog4(DEBUG2, "MultiXact: checkpoint is nextMulti %u, nextOffset %u",
+				*nextMulti, *nextMultiOffset);
 }
 
 /*
@@ -1288,62 +1361,68 @@ MultiXactGetCheckptMulti(bool is_shutdown)
 void
 CheckPointMultiXact(void)
 {
-	/*
-	 * Flush dirty MultiXact pages to disk
-	 *
-	 * This is not actually necessary from a correctness point of view. We do
-	 * it merely to improve the odds that writing of dirty pages is done
-	 * by the checkpoint process and not by backends.
-	 */
+	/* Flush dirty MultiXact pages to disk */
 	SimpleLruFlush(MultiXactOffsetCtl, true);
 	SimpleLruFlush(MultiXactMemberCtl, true);
 
 	/*
-	 * Truncate the SLRU files
+	 * Truncate the SLRU files.  This could be done at any time, but
+	 * checkpoint seems a reasonable place for it.
 	 */
 	TruncateMultiXact();
 }
 
 /*
- * Set the next-to-be-assigned MultiXactId
+ * Set the next-to-be-assigned MultiXactId and offset
  *
- * This is used when we can determine the correct next Id exactly
- * from an XLog record.  We need no locking since it is only called
+ * This is used when we can determine the correct next ID/offset exactly
+ * from a checkpoint record.  We need no locking since it is only called
  * during bootstrap and XLog replay.
  */
 void
-MultiXactSetNextMXact(MultiXactId nextMulti)
+MultiXactSetNextMXact(MultiXactId nextMulti,
+					  MultiXactOffset nextMultiOffset)
 {
-	debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", nextMulti);
+	debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
+				nextMulti, nextMultiOffset);
 	MultiXactState->nextMXact = nextMulti;
-	MultiXactState->mXactCount = 0;
+	MultiXactState->nextOffset = nextMultiOffset;
 }
 
 /*
- * Ensure the next-to-be-assigned MultiXactId is at least minMulti
+ * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
+ * and similarly nextOffset is at least minMultiOffset
  *
- * This is used when we can determine a minimum safe value
- * from an XLog record.  We need no locking since it is only called
- * during XLog replay.
+ * This is used when we can determine minimum safe values from an XLog
+ * record (either an on-line checkpoint or an mxact creation log entry).
+ * We need no locking since it is only called during XLog replay.
  */
 void
-MultiXactAdvanceNextMXact(MultiXactId minMulti)
+MultiXactAdvanceNextMXact(MultiXactId minMulti,
+						  MultiXactOffset minMultiOffset)
 {
 	if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
 	{
 		debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
 		MultiXactState->nextMXact = minMulti;
-		MultiXactState->mXactCount = 0;
+	}
+	if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
+	{
+		debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
+					minMultiOffset);
+		MultiXactState->nextOffset = minMultiOffset;
 	}
 }
 
 /*
  * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
  *
- * The MultiXactOffsetControlLock should be held at entry, and will
- * be held at exit.
+ * NB: this is called while holding MultiXactGenLock.  We want it to be very
+ * fast most of the time; even when it's not so fast, no actual I/O need
+ * happen unless we're forced to write out a dirty log or xlog page to make
+ * room in shared memory.
  */
-void
+static void
 ExtendMultiXactOffset(MultiXactId multi)
 {
 	int			pageno;
@@ -1358,32 +1437,56 @@ ExtendMultiXactOffset(MultiXactId multi)
 
 	pageno = MultiXactIdToOffsetPage(multi);
 
-	/* Zero the page */
-	ZeroMultiXactOffsetPage(pageno);
+	LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
+
+	/* Zero the page and make an XLOG entry about it */
+	ZeroMultiXactOffsetPage(pageno, true);
+
+	LWLockRelease(MultiXactOffsetControlLock);
 }
 
 /*
  * Make sure that MultiXactMember has room for the members of a newly-
  * allocated MultiXactId.
  *
- * The MultiXactMemberControlLock should be held at entry, and will be held
- * at exit.
+ * Like the above routine, this is called while holding MultiXactGenLock;
+ * same comments apply.
  */
-void
-ExtendMultiXactMember(uint32 offset)
+static void
+ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
 {
-	int		pageno;
-
 	/*
-	 * No work except at first entry of a page.
+	 * It's possible that the members span more than one page of the
+	 * members file, so we loop to ensure we consider each page.  The
+	 * coding is not optimal if the members span several pages, but
+	 * that seems unusual enough to not worry much about.
 	 */
-	if (MXOffsetToMemberEntry(offset) != 0)
-		return;
+	while (nmembers > 0)
+	{
+		int		entryno;
 
-	pageno = MXOffsetToMemberPage(offset);
+		/*
+		 * Only zero when at first entry of a page.
+		 */
+		entryno = MXOffsetToMemberEntry(offset);
+		if (entryno == 0)
+		{
+			int		pageno;
 
-	/* Zero the page */
-	ZeroMultiXactMemberPage(pageno);
+			pageno = MXOffsetToMemberPage(offset);
+
+			LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
+
+			/* Zero the page and make an XLOG entry about it */
+			ZeroMultiXactMemberPage(pageno, true);
+
+			LWLockRelease(MultiXactMemberControlLock);
+		}
+
+		/* Advance to next page (OK if nmembers goes negative) */
+		offset += (MULTIXACT_MEMBERS_PER_PAGE - entryno);
+		nmembers -= (MULTIXACT_MEMBERS_PER_PAGE - entryno);
+	}
 }
 
 /*
@@ -1392,14 +1495,16 @@ ExtendMultiXactMember(uint32 offset)
  *
  * This is called only during checkpoints.  We assume no more than one
  * backend does this at a time.
+ *
+ * XXX do we have any issues with needing to checkpoint here?
  */
 static void
 TruncateMultiXact(void)
 {
 	MultiXactId nextMXact;
-	uint32		nextOffset;
+	MultiXactOffset		nextOffset;
 	MultiXactId oldestMXact;
-	uint32		oldestOffset;
+	MultiXactOffset		oldestOffset;
 	int			cutoffPage;
 	int			i;
 
@@ -1460,7 +1565,7 @@ TruncateMultiXact(void)
 		int			pageno;
 		int			slotno;
 		int			entryno;
-		uint32	   *offptr;
+		MultiXactOffset	   *offptr;
 
 		LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
 
@@ -1468,7 +1573,7 @@ TruncateMultiXact(void)
 		entryno = MultiXactIdToOffsetEntry(oldestMXact);
 
 		slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, oldestMXact);
-		offptr = (uint32 *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 		offptr += entryno;
 		oldestOffset = *offptr;
 
@@ -1529,11 +1634,11 @@ MultiXactOffsetPagePrecedes(int page1, int page2)
 static bool
 MultiXactMemberPagePrecedes(int page1, int page2)
 {
-	uint32	offset1;
-	uint32	offset2;
+	MultiXactOffset	offset1;
+	MultiXactOffset	offset2;
 
-	offset1 = ((uint32) page1) * MULTIXACT_MEMBERS_PER_PAGE;
-	offset2 = ((uint32) page2) * MULTIXACT_MEMBERS_PER_PAGE;
+	offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
+	offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
 
 	return MultiXactOffsetPrecedes(offset1, offset2);
 }
@@ -1556,9 +1661,135 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
  * Decide which of two offsets is earlier.
  */
 static bool
-MultiXactOffsetPrecedes(uint32 offset1, uint32 offset2)
+MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
 {
 	int32 diff = (int32) (offset1 - offset2);
 
 	return (diff < 0);
 }
+
+
+/*
+ * Write an xlog record reflecting the zeroing of either a MEMBERs or
+ * OFFSETs page (info shows which)
+ *
+ * Note: xlog record is marked as outside transaction control, since we
+ * want it to be redone whether the invoking transaction commits or not.
+ */
+static void
+WriteMZeroPageXlogRec(int pageno, uint8 info)
+{
+	XLogRecData rdata;
+
+	rdata.data = (char *) (&pageno);
+	rdata.len = sizeof(int);
+	rdata.buffer = InvalidBuffer;
+	rdata.next = NULL;
+	(void) XLogInsert(RM_MULTIXACT_ID, info | XLOG_NO_TRAN, &rdata);
+}
+
+/*
+ * MULTIXACT resource manager's routines
+ */
+void
+multixact_redo(XLogRecPtr lsn, XLogRecord *record)
+{
+	uint8		info = record->xl_info & ~XLR_INFO_MASK;
+
+	if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
+	{
+		int			pageno;
+		int			slotno;
+
+		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
+
+		LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
+
+		slotno = ZeroMultiXactOffsetPage(pageno, false);
+		SimpleLruWritePage(MultiXactOffsetCtl, slotno, NULL);
+		Assert(MultiXactOffsetCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
+
+		LWLockRelease(MultiXactOffsetControlLock);
+	}
+	else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
+	{
+		int			pageno;
+		int			slotno;
+
+		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
+
+		LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
+
+		slotno = ZeroMultiXactMemberPage(pageno, false);
+		SimpleLruWritePage(MultiXactMemberCtl, slotno, NULL);
+		Assert(MultiXactMemberCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
+
+		LWLockRelease(MultiXactMemberControlLock);
+	}
+	else if (info == XLOG_MULTIXACT_CREATE_ID)
+	{
+		xl_multixact_create *xlrec = (xl_multixact_create *) XLogRecGetData(record);
+		TransactionId *xids = xlrec->xids;
+		TransactionId max_xid;
+		int			i;
+
+		/* Store the data back into the SLRU files */
+		RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nxids, xids);
+
+		/* Make sure nextMXact/nextOffset are beyond what this record has */
+		MultiXactAdvanceNextMXact(xlrec->mid + 1, xlrec->moff + xlrec->nxids);
+
+		/*
+		 * Make sure nextXid is beyond any XID mentioned in the record.
+		 * This should be unnecessary, since any XID found here ought to
+		 * have other evidence in the XLOG, but let's be safe.
+		 */
+		max_xid = record->xl_xid;
+		for (i = 0; i < xlrec->nxids; i++)
+		{
+			if (TransactionIdPrecedes(max_xid, xids[i]))
+				max_xid = xids[i];
+		}
+		if (TransactionIdFollowsOrEquals(max_xid,
+										 ShmemVariableCache->nextXid))
+		{
+			ShmemVariableCache->nextXid = max_xid;
+			TransactionIdAdvance(ShmemVariableCache->nextXid);
+		}
+	}
+	else
+		elog(PANIC, "multixact_redo: unknown op code %u", info);
+}
+
+void
+multixact_desc(char *buf, uint8 xl_info, char *rec)
+{
+	uint8		info = xl_info & ~XLR_INFO_MASK;
+
+	if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
+	{
+		int			pageno;
+
+		memcpy(&pageno, rec, sizeof(int));
+		sprintf(buf + strlen(buf), "zero offsets page: %d", pageno);
+	}
+	else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
+	{
+		int			pageno;
+
+		memcpy(&pageno, rec, sizeof(int));
+		sprintf(buf + strlen(buf), "zero members page: %d", pageno);
+	}
+	else if (info == XLOG_MULTIXACT_CREATE_ID)
+	{
+		xl_multixact_create *xlrec = (xl_multixact_create *) rec;
+		int			i;
+
+		sprintf(buf + strlen(buf), "create multixact %u offset %u:",
+				xlrec->mid, xlrec->moff);
+		for (i = 0; i < xlrec->nxids; i++)
+			sprintf(buf + strlen(buf), " %u", xlrec->xids[i]);
+	}
+	else
+		strcat(buf, "UNKNOWN");
+}
diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c
index 39d647f8a6f663f57a010f993a3545ccbb91a502..5fe442fd80d05cb9dce964d7b32c13b0446cb939 100644
--- a/src/backend/access/transam/rmgr.c
+++ b/src/backend/access/transam/rmgr.c
@@ -3,7 +3,7 @@
  *
  * Resource managers definition
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.18 2005/06/06 17:01:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.19 2005/06/08 15:50:26 tgl Exp $
  */
 #include "postgres.h"
 
@@ -11,6 +11,7 @@
 #include "access/gist_private.h"
 #include "access/hash.h"
 #include "access/heapam.h"
+#include "access/multixact.h"
 #include "access/nbtree.h"
 #include "access/rtree.h"
 #include "access/xact.h"
@@ -28,7 +29,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = {
 	{"CLOG", clog_redo, clog_desc, NULL, NULL},
 	{"Database", dbase_redo, dbase_desc, NULL, NULL},
 	{"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL},
-	{"Reserved 6", NULL, NULL, NULL, NULL},
+	{"MultiXact", multixact_redo, multixact_desc, NULL, NULL},
 	{"Reserved 7", NULL, NULL, NULL, NULL},
 	{"Reserved 8", NULL, NULL, NULL, NULL},
 	{"Reserved 9", NULL, NULL, NULL, NULL},
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index aa37244162ad110eb236eb4a6f11b9d7a14d8d61..6c01c20eaa36bbed41367a8c5cabb1d69ec4fade 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.197 2005/06/06 20:22:57 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.198 2005/06/08 15:50:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3688,12 +3688,13 @@ BootStrapXLOG(void)
 	checkPoint.nextXid = FirstNormalTransactionId;
 	checkPoint.nextOid = FirstBootstrapObjectId;
 	checkPoint.nextMulti = FirstMultiXactId;
+	checkPoint.nextMultiOffset = 0;
 	checkPoint.time = time(NULL);
 
 	ShmemVariableCache->nextXid = checkPoint.nextXid;
 	ShmemVariableCache->nextOid = checkPoint.nextOid;
 	ShmemVariableCache->oidCount = 0;
-	MultiXactSetNextMXact(checkPoint.nextMulti);
+	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
 
 	/* Set up the XLOG page header */
 	page->xlp_magic = XLOG_PAGE_MAGIC;
@@ -4344,8 +4345,11 @@ StartupXLOG(void)
 					checkPoint.undo.xlogid, checkPoint.undo.xrecoff,
 					wasShutdown ? "TRUE" : "FALSE")));
 	ereport(LOG,
-			(errmsg("next transaction ID: %u; next OID: %u; next MultiXactId: %u",
-					checkPoint.nextXid, checkPoint.nextOid, checkPoint.nextMulti)));
+			(errmsg("next transaction ID: %u; next OID: %u",
+					checkPoint.nextXid, checkPoint.nextOid)));
+	ereport(LOG,
+			(errmsg("next MultiXactId: %u; next MultiXactOffset: %u",
+					checkPoint.nextMulti, checkPoint.nextMultiOffset)));
 	if (!TransactionIdIsNormal(checkPoint.nextXid))
 		ereport(PANIC,
 				(errmsg("invalid next transaction ID")));
@@ -4353,7 +4357,7 @@ StartupXLOG(void)
 	ShmemVariableCache->nextXid = checkPoint.nextXid;
 	ShmemVariableCache->nextOid = checkPoint.nextOid;
 	ShmemVariableCache->oidCount = 0;
-	MultiXactSetNextMXact(checkPoint.nextMulti);
+	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
 
 	/*
 	 * We must replay WAL entries using the same TimeLineID they were
@@ -5080,7 +5084,9 @@ CreateCheckPoint(bool shutdown, bool force)
 		checkPoint.nextOid += ShmemVariableCache->oidCount;
 	LWLockRelease(OidGenLock);
 
-	checkPoint.nextMulti = MultiXactGetCheckptMulti(shutdown);
+	MultiXactGetCheckptMulti(shutdown,
+							 &checkPoint.nextMulti,
+							 &checkPoint.nextMultiOffset);
 
 	/*
 	 * Having constructed the checkpoint record, ensure all shmem disk
@@ -5228,25 +5234,6 @@ XLogPutNextOid(Oid nextOid)
 	 */
 }
 
-/*
- * Write a NEXT_MULTIXACT log record
- */
-void
-XLogPutNextMultiXactId(MultiXactId nextMulti)
-{
-	XLogRecData rdata;
-
-	rdata.data = (char *) (&nextMulti);
-	rdata.len = sizeof(MultiXactId);
-	rdata.buffer = InvalidBuffer;
-	rdata.next = NULL;
-	(void) XLogInsert(RM_XLOG_ID, XLOG_NEXTMULTI, &rdata);
-	/*
-	 * We do not flush here either; this assumes that heap_lock_tuple() will
-	 * always generate a WAL record.  See notes therein.
-	 */
-}
-
 /*
  * XLOG resource manager's routines
  */
@@ -5266,14 +5253,6 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 			ShmemVariableCache->oidCount = 0;
 		}
 	}
-	else if (info == XLOG_NEXTMULTI)
-	{
-		MultiXactId	nextMulti;
-
-		memcpy(&nextMulti, XLogRecGetData(record), sizeof(MultiXactId));
-
-		MultiXactAdvanceNextMXact(nextMulti);
-	}
 	else if (info == XLOG_CHECKPOINT_SHUTDOWN)
 	{
 		CheckPoint	checkPoint;
@@ -5283,7 +5262,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 		ShmemVariableCache->nextXid = checkPoint.nextXid;
 		ShmemVariableCache->nextOid = checkPoint.nextOid;
 		ShmemVariableCache->oidCount = 0;
-		MultiXactSetNextMXact(checkPoint.nextMulti);
+		MultiXactSetNextMXact(checkPoint.nextMulti,
+							  checkPoint.nextMultiOffset);
 
 		/*
 		 * TLI may change in a shutdown checkpoint, but it shouldn't
@@ -5315,7 +5295,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 			ShmemVariableCache->nextOid = checkPoint.nextOid;
 			ShmemVariableCache->oidCount = 0;
 		}
-		MultiXactAdvanceNextMXact(checkPoint.nextMulti);
+		MultiXactAdvanceNextMXact(checkPoint.nextMulti,
+								  checkPoint.nextMultiOffset);
 		/* TLI should not change in an on-line checkpoint */
 		if (checkPoint.ThisTimeLineID != ThisTimeLineID)
 			ereport(PANIC,
@@ -5335,12 +5316,13 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
 		CheckPoint *checkpoint = (CheckPoint *) rec;
 
 		sprintf(buf + strlen(buf), "checkpoint: redo %X/%X; undo %X/%X; "
-				"tli %u; xid %u; oid %u; multi %u; %s",
+				"tli %u; xid %u; oid %u; multi %u; offset %u; %s",
 				checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
 				checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
 				checkpoint->ThisTimeLineID, checkpoint->nextXid,
 				checkpoint->nextOid,
 				checkpoint->nextMulti,
+				checkpoint->nextMultiOffset,
 			 (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
 	}
 	else if (info == XLOG_NEXTOID)
@@ -5350,13 +5332,6 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
 		memcpy(&nextOid, rec, sizeof(Oid));
 		sprintf(buf + strlen(buf), "nextOid: %u", nextOid);
 	}
-	else if (info == XLOG_NEXTMULTI)
-	{
-		MultiXactId	multi;
-
-		memcpy(&multi, rec, sizeof(MultiXactId));
-		sprintf(buf + strlen(buf), "nextMultiXact: %u", multi);
-	}
 	else
 		strcat(buf, "UNKNOWN");
 }
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 77f61af06f467851216584e8fcae2eb40c13f8b2..b1aed8f421f6bbb36fc536ee48defadbbaac1d0f 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -6,7 +6,7 @@
  * copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001;
  * licence: BSD
  *
- * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.24 2005/06/02 05:55:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.25 2005/06/08 15:50:27 tgl Exp $
  */
 #include "postgres.h"
 
@@ -166,6 +166,7 @@ main(int argc, char *argv[])
 	printf(_("Latest checkpoint's NextXID:          %u\n"), ControlFile.checkPointCopy.nextXid);
 	printf(_("Latest checkpoint's NextOID:          %u\n"), ControlFile.checkPointCopy.nextOid);
 	printf(_("Latest checkpoint's NextMultiXactId:  %u\n"), ControlFile.checkPointCopy.nextMulti);
+	printf(_("Latest checkpoint's NextMultiOffset:  %u\n"), ControlFile.checkPointCopy.nextMultiOffset);
 	printf(_("Time of latest checkpoint:            %s\n"), ckpttime_str);
 	printf(_("Database block size:                  %u\n"), ControlFile.blcksz);
 	printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 6eceb0a3543943893ce9179736d1949df5485e77..15c291b1ee6b979d484bdacfd7f36a937f32e0de 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -23,7 +23,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.33 2005/06/02 05:55:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.34 2005/06/08 15:50:27 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -77,6 +77,7 @@ main(int argc, char *argv[])
 	TransactionId set_xid = 0;
 	Oid			set_oid = 0;
 	MultiXactId	set_mxid = 0;
+	MultiXactOffset set_mxoff = -1;
 	uint32		minXlogTli = 0,
 				minXlogId = 0,
 				minXlogSeg = 0;
@@ -106,7 +107,7 @@ main(int argc, char *argv[])
 	}
 
 
-	while ((c = getopt(argc, argv, "fl:m:no:x:")) != -1)
+	while ((c = getopt(argc, argv, "fl:m:no:O:x:")) != -1)
 	{
 		switch (c)
 		{
@@ -163,6 +164,21 @@ main(int argc, char *argv[])
 				}
 				break;
 
+			case 'O':
+				set_mxoff = strtoul(optarg, &endptr, 0);
+				if (endptr == optarg || *endptr != '\0')
+				{
+					fprintf(stderr, _("%s: invalid argument for option -O\n"), progname);
+					fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+					exit(1);
+				}
+				if (set_mxoff == -1)
+				{
+					fprintf(stderr, _("%s: multi transaction offset (-O) must not be -1\n"), progname);
+					exit(1);
+				}
+				break;
+
 			case 'l':
 				minXlogTli = strtoul(optarg, &endptr, 0);
 				if (endptr == optarg || *endptr != ',')
@@ -265,6 +281,9 @@ main(int argc, char *argv[])
 	if (set_mxid != 0)
 		ControlFile.checkPointCopy.nextMulti = set_mxid;
 
+	if (set_mxoff != -1)
+		ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
+
 	if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
 		ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
 
@@ -426,6 +445,7 @@ GuessControlValues(void)
 	ControlFile.checkPointCopy.nextXid = (TransactionId) 514;	/* XXX */
 	ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
 	ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
+	ControlFile.checkPointCopy.nextMultiOffset = 0;
 	ControlFile.checkPointCopy.time = time(NULL);
 
 	ControlFile.state = DB_SHUTDOWNED;
@@ -463,7 +483,7 @@ GuessControlValues(void)
 
 	/*
 	 * XXX eventually, should try to grovel through old XLOG to develop
-	 * more accurate values for TimeLineID, nextXID, and nextOID.
+	 * more accurate values for TimeLineID, nextXID, etc.
 	 */
 }
 
@@ -500,6 +520,7 @@ PrintControlValues(bool guessed)
 	printf(_("Latest checkpoint's NextXID:          %u\n"), ControlFile.checkPointCopy.nextXid);
 	printf(_("Latest checkpoint's NextOID:          %u\n"), ControlFile.checkPointCopy.nextOid);
 	printf(_("Latest checkpoint's NextMultiXactId:  %u\n"), ControlFile.checkPointCopy.nextMulti);
+	printf(_("Latest checkpoint's NextMultiOffset:  %u\n"), ControlFile.checkPointCopy.nextMultiOffset);
 	printf(_("Database block size:                  %u\n"), ControlFile.blcksz);
 	printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
 	printf(_("Maximum length of identifiers:        %u\n"), ControlFile.nameDataLen);
@@ -777,6 +798,7 @@ usage(void)
 	printf(_("  -o OID          set next OID\n"));
 	printf(_("  -x XID          set next transaction ID\n"));
 	printf(_("  -m multiXID     set next multi transaction ID\n"));
+	printf(_("  -O multiOffset  set next multi transaction offset\n"));
 	printf(_("  --help          show this help, then exit\n"));
 	printf(_("  --version       output version information, then exit\n"));
 	printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index adeb05fd560dae2e49ab77c3b299cbcb696f399c..e394afd31392a1e96fab99e8f333545b57e0cf1e 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.74 2005/04/28 21:47:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.75 2005/06/08 15:50:27 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -505,6 +505,8 @@ typedef struct xl_heap_newpage
 typedef struct xl_heap_lock
 {
 	xl_heaptid	target;			/* locked tuple id */
+	TransactionId locking_xid;	/* might be a MultiXactId not xid */
+	bool		xid_is_mxact;	/* is it? */
 	bool		shared_lock;	/* shared or exclusive row lock? */
 } xl_heap_lock;
 
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 65d19704c404bb0f9e1d1f71ddbde8df8fa21339..2199b05f2c5f177b9f414d289afb4f83c3740610 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -6,16 +6,38 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/multixact.h,v 1.2 2005/05/03 19:42:41 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/multixact.h,v 1.3 2005/06/08 15:50:28 tgl Exp $
  */
 #ifndef MULTIXACT_H
 #define MULTIXACT_H
 
+#include "access/xlog.h"
+
 #define InvalidMultiXactId	((MultiXactId) 0)
 #define FirstMultiXactId	((MultiXactId) 1)
 
 #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
 
+/* ----------------
+ *		multixact-related XLOG entries
+ * ----------------
+ */
+
+#define XLOG_MULTIXACT_ZERO_OFF_PAGE	0x00
+#define XLOG_MULTIXACT_ZERO_MEM_PAGE	0x10
+#define XLOG_MULTIXACT_CREATE_ID		0x20
+
+typedef struct xl_multixact_create
+{
+	MultiXactId		mid;		/* new MultiXact's ID */
+	MultiXactOffset	moff;		/* its starting offset in members file */
+	int32			nxids;		/* number of member XIDs */
+	TransactionId	xids[1];	/* VARIABLE LENGTH ARRAY */
+} xl_multixact_create;
+
+#define MinSizeOfMultiXactCreate offsetof(xl_multixact_create, xids)
+
+
 extern MultiXactId MultiXactIdCreate(TransactionId xid1, TransactionId xid2);
 extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid);
 extern bool MultiXactIdIsRunning(MultiXactId multi);
@@ -29,9 +51,16 @@ extern void MultiXactShmemInit(void);
 extern void BootStrapMultiXact(void);
 extern void StartupMultiXact(void);
 extern void ShutdownMultiXact(void);
-extern MultiXactId MultiXactGetCheckptMulti(bool is_shutdown);
+extern void MultiXactGetCheckptMulti(bool is_shutdown,
+									 MultiXactId *nextMulti,
+									 MultiXactOffset *nextMultiOffset);
 extern void CheckPointMultiXact(void);
-extern void MultiXactSetNextMXact(MultiXactId nextMulti);
-extern void MultiXactAdvanceNextMXact(MultiXactId minMulti);
+extern void MultiXactSetNextMXact(MultiXactId nextMulti,
+								  MultiXactOffset nextMultiOffset);
+extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
+									  MultiXactOffset minMultiOffset);
+
+extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record);
+extern void multixact_desc(char *buf, uint8 xl_info, char *rec);
 
 #endif   /* MULTIXACT_H */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 7547d7f5b9f8c1b9f34c1a8bdeff3cbc88e6575e..ead4619b027c995f2e3e2c97875f6cb2d305ac2c 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.64 2005/06/06 20:22:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.65 2005/06/08 15:50:28 tgl Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
@@ -165,7 +165,6 @@ extern void ShutdownXLOG(int code, Datum arg);
 extern void InitXLOGAccess(void);
 extern void CreateCheckPoint(bool shutdown, bool force);
 extern void XLogPutNextOid(Oid nextOid);
-extern void XLogPutNextMultiXactId(MultiXactId multi);
 extern XLogRecPtr GetRedoRecPtr(void);
 
 #endif   /* XLOG_H */
diff --git a/src/include/c.h b/src/include/c.h
index 6318c5573dd2c238c21e7d7b5b43388fc7c96626..1a92038774748a9f2a95b441073136f1b96ca80f 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/c.h,v 1.184 2005/05/25 21:40:41 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/c.h,v 1.185 2005/06/08 15:50:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -388,6 +388,8 @@ typedef uint32 SubTransactionId;
 /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
 typedef TransactionId MultiXactId;
 
+typedef uint32 MultiXactOffset;
+
 typedef uint32 CommandId;
 
 #define FirstCommandId	((CommandId) 0)
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index 3f96b6bf261734a9cda9b1a9267f16f30a2e5d6d..73f32b55ade4025d5d3c3e34775ed2807332be06 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.22 2005/06/02 05:55:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.23 2005/06/08 15:50:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,7 +22,7 @@
 
 
 /* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION	810
+#define PG_CONTROL_VERSION	811
 
 /*
  * Body of CheckPoint XLOG records.  This is declared here because we keep
@@ -40,13 +40,13 @@ typedef struct CheckPoint
 	TransactionId nextXid;		/* next free XID */
 	Oid			nextOid;		/* next free OID */
 	MultiXactId	nextMulti;		/* next free MultiXactId */
+	MultiXactOffset nextMultiOffset;	/* next free MultiXact offset */
 	time_t		time;			/* time stamp of checkpoint */
 } CheckPoint;
 
 /* XLOG info values for XLOG rmgr */
 #define XLOG_CHECKPOINT_SHUTDOWN		0x00
 #define XLOG_CHECKPOINT_ONLINE			0x10
-#define XLOG_NEXTMULTI					0x20
 #define XLOG_NEXTOID					0x30