diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index a3331bdef6eb1fd686e076af8265d64ff6e40ab9..0e49ba321780d07a3e30d9f5260be1810d2472cd 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.132 2007/07/24 01:53:55 alvherre Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.133 2007/07/24 04:54:08 tgl Exp $ -->
 
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -1413,7 +1413,7 @@ SET ENABLE_SEQSCAN TO OFF;
        </para>
       </listitem>
      </varlistentry>
-     
+
      <varlistentry id="guc-wal-buffers" xreflabel="wal_buffers">
       <term><varname>wal_buffers</varname> (<type>integer</type>)</term>
       <indexterm>
@@ -1438,7 +1438,27 @@ SET ENABLE_SEQSCAN TO OFF;
        </para>
       </listitem>
      </varlistentry>
-                
+
+     <varlistentry id="guc-wal-writer-delay" xreflabel="wal_writer_delay">
+      <term><varname>wal_writer_delay</varname> (<type>integer</type>)</term>
+      <indexterm>
+       <primary><varname>wal_writer_delay</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Specifies the delay between activity rounds for the WAL writer.
+        In each round the writer will flush WAL to disk. It then sleeps for
+        <varname>wal_writer_delay</> milliseconds, and repeats.  The default
+        value is 200 milliseconds (<literal>200ms</>).  Note that on many
+        systems, the effective resolution of sleep delays is 10 milliseconds;
+        setting <varname>wal_writer_delay</> to a value that is not a multiple
+        of 10 might have the same results as setting it to the next higher
+        multiple of 10. This parameter can only be set in the
+        <filename>postgresql.conf</> file or on the server command line.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-commit-delay" xreflabel="commit_delay">
       <term><varname>commit_delay</varname> (<type>integer</type>)</term>
       <indexterm>
@@ -1521,7 +1541,7 @@ SET ENABLE_SEQSCAN TO OFF;
       </indexterm>
       <listitem>
        <para>
-        Specifies the target length of checkpoints, as a fraction of 
+        Specifies the target length of checkpoints, as a fraction of
         the checkpoint interval. The default is 0.5.
 
         This parameter can only be set in the <filename>postgresql.conf</>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 15c9f310a63a57611f983d1b25319b9b9c5eaf2f..25789ddaa68bfb752753d0c7ab6863fdd7c5f148 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.274 2007/06/30 19:12:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.275 2007/07/24 04:54:08 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -484,7 +484,6 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
 	uint32		len,
 				write_len;
 	unsigned	i;
-	XLogwrtRqst LogwrtRqst;
 	bool		updrqst;
 	bool		doPageWrites;
 	bool		isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
@@ -643,43 +642,6 @@ begin:;
 
 	START_CRIT_SECTION();
 
-	/* update LogwrtResult before doing cache fill check */
-	{
-		/* use volatile pointer to prevent code rearrangement */
-		volatile XLogCtlData *xlogctl = XLogCtl;
-
-		SpinLockAcquire(&xlogctl->info_lck);
-		LogwrtRqst = xlogctl->LogwrtRqst;
-		LogwrtResult = xlogctl->LogwrtResult;
-		SpinLockRelease(&xlogctl->info_lck);
-	}
-
-	/*
-	 * If cache is half filled then try to acquire write lock and do
-	 * XLogWrite. Ignore any fractional blocks in performing this check.
-	 */
-	LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % XLOG_BLCKSZ;
-	if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
-		(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
-		 XLogCtl->XLogCacheByte / 2))
-	{
-		if (LWLockConditionalAcquire(WALWriteLock, LW_EXCLUSIVE))
-		{
-			/*
-			 * Since the amount of data we write here is completely optional
-			 * anyway, tell XLogWrite it can be "flexible" and stop at a
-			 * convenient boundary.  This allows writes triggered by this
-			 * mechanism to synchronize with the cache boundaries, so that in
-			 * a long transaction we'll basically dump alternating halves of
-			 * the buffer array.
-			 */
-			LogwrtResult = XLogCtl->Write.LogwrtResult;
-			if (XLByteLT(LogwrtResult.Write, LogwrtRqst.Write))
-				XLogWrite(LogwrtRqst, true, false);
-			LWLockRelease(WALWriteLock);
-		}
-	}
-
 	/* Now wait to get insert lock */
 	LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
 
@@ -1800,6 +1762,85 @@ XLogFlush(XLogRecPtr record)
 			 LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
 }
 
+/*
+ * Flush xlog, but without specifying exactly where to flush to.
+ *
+ * We normally flush only completed blocks; but if there is nothing to do on
+ * that basis, we check for unflushed async commits in the current incomplete
+ * block, and flush through the latest one of those.  Thus, if async commits
+ * are not being used, we will flush complete blocks only.  We can guarantee
+ * that async commits reach disk after at most three cycles; normally only
+ * one or two.  (We allow XLogWrite to write "flexibly", meaning it can stop
+ * at the end of the buffer ring; this makes a difference only with very high
+ * load or long wal_writer_delay, but imposes one extra cycle for the worst
+ * case for async commits.)
+ *
+ * This routine is invoked periodically by the background walwriter process.
+ */
+void
+XLogBackgroundFlush(void)
+{
+	XLogRecPtr	WriteRqstPtr;
+	bool		flexible = true;
+
+	/* read LogwrtResult and update local state */
+	{
+		/* use volatile pointer to prevent code rearrangement */
+		volatile XLogCtlData *xlogctl = XLogCtl;
+
+		SpinLockAcquire(&xlogctl->info_lck);
+		LogwrtResult = xlogctl->LogwrtResult;
+		WriteRqstPtr = xlogctl->LogwrtRqst.Write;
+		SpinLockRelease(&xlogctl->info_lck);
+	}
+
+	/* back off to last completed page boundary */
+	WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;
+
+#ifdef NOT_YET					/* async commit patch is still to come */
+	/* if we have already flushed that far, consider async commit records */
+	if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
+	{
+		/* use volatile pointer to prevent code rearrangement */
+		volatile XLogCtlData *xlogctl = XLogCtl;
+
+		SpinLockAcquire(&xlogctl->async_commit_lck);
+		WriteRqstPtr = xlogctl->asyncCommitLSN;
+		SpinLockRelease(&xlogctl->async_commit_lck);
+		flexible = false;		/* ensure it all gets written */
+	}
+#endif
+
+	/* Done if already known flushed */
+	if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
+		return;
+
+#ifdef WAL_DEBUG
+	if (XLOG_DEBUG)
+		elog(LOG, "xlog bg flush request %X/%X; write %X/%X; flush %X/%X",
+			 WriteRqstPtr.xlogid, WriteRqstPtr.xrecoff,
+			 LogwrtResult.Write.xlogid, LogwrtResult.Write.xrecoff,
+			 LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
+#endif
+
+	START_CRIT_SECTION();
+
+	/* now wait for the write lock */
+	LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
+	LogwrtResult = XLogCtl->Write.LogwrtResult;
+	if (!XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
+	{
+		XLogwrtRqst WriteRqst;
+
+		WriteRqst.Write = WriteRqstPtr;
+		WriteRqst.Flush = WriteRqstPtr;
+		XLogWrite(WriteRqst, flexible, false);
+	}
+	LWLockRelease(WALWriteLock);
+
+	END_CRIT_SECTION();
+}
+
 /*
  * Test whether XLOG data has been flushed up to (at least) the given position.
  *
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 78eb6797db43883893f5417f0cd158fb704bdd16..3ffff2a2cce3fe0ee2bb50fd079a758bcf2b747b 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.234 2007/06/28 00:02:37 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.235 2007/07/24 04:54:09 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -30,6 +30,7 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "postmaster/bgwriter.h"
+#include "postmaster/walwriter.h"
 #include "storage/freespace.h"
 #include "storage/ipc.h"
 #include "storage/proc.h"
@@ -195,7 +196,7 @@ static IndexList *ILHead = NULL;
  *	 AuxiliaryProcessMain
  *
  *	 The main entry point for auxiliary processes, such as the bgwriter,
- *	 bootstrapper and the shared memory checker code.
+ *	 walwriter, bootstrapper and the shared memory checker code.
  *
  *	 This code is here just because of historical reasons.
  */
@@ -331,6 +332,9 @@ AuxiliaryProcessMain(int argc, char *argv[])
 			case BgWriterProcess:
 				statmsg = "writer process";
 				break;
+			case WalWriterProcess:
+				statmsg = "wal writer process";
+				break;
 			default:
 				statmsg = "??? process";
 				break;
@@ -419,6 +423,12 @@ AuxiliaryProcessMain(int argc, char *argv[])
 			InitXLOGAccess();
 			BackgroundWriterMain();
 			proc_exit(1);		/* should never return */
+
+		case WalWriterProcess:
+			/* don't set signals, walwriter has its own agenda */
+			InitXLOGAccess();
+			WalWriterMain();
+			proc_exit(1);		/* should never return */
 			
 		default:
 			elog(PANIC, "unrecognized process type: %d", auxType);
diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile
index a49e0e393bfc6ffcb575697351589a2051e845d3..7ccba285f2149e173921e7e3a6e9debf1074ad7b 100644
--- a/src/backend/postmaster/Makefile
+++ b/src/backend/postmaster/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for src/backend/postmaster
 #
 # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.22 2007/01/20 17:16:12 petere Exp $
+#    $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.23 2007/07/24 04:54:09 tgl Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -12,8 +12,8 @@ subdir = src/backend/postmaster
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = bgwriter.o autovacuum.o pgarch.o pgstat.o postmaster.o syslogger.o \
-	fork_process.o
+OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \
+	syslogger.o walwriter.o
 
 all: SUBSYS.o
 
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 7a1270b0149eb0b10e7b518d252b430b66463c68..f1f9effae77085b7ec2c23aa25e0d570e3a350ac 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.534 2007/07/23 10:16:54 mha Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.535 2007/07/24 04:54:09 tgl Exp $
  *
  * NOTES
  *
@@ -136,7 +136,7 @@ typedef struct bkend
 {
 	pid_t		pid;			/* process id of backend */
 	long		cancel_key;		/* cancel key for cancels for this backend */
-	bool		is_autovacuum;	/* is it an autovacuum process */
+	bool		is_autovacuum;	/* is it an autovacuum process? */
 } Backend;
 
 static Dllist *BackendList;
@@ -144,9 +144,9 @@ static Dllist *BackendList;
 #ifdef EXEC_BACKEND
 /*
  * Number of entries in the backend table. Twice the number of backends,
- * plus four other subprocesses (stats, bgwriter, autovac, logger).
+ * plus five other subprocesses (stats, bgwriter, walwriter, autovac, logger).
  */
-#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends + 4)
+#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends + 5)
 static Backend *ShmemBackendArray;
 #endif
 
@@ -201,6 +201,7 @@ char	   *bonjour_name;
 /* PIDs of special child processes; 0 when not running */
 static pid_t StartupPID = 0,
 			BgWriterPID = 0,
+			WalWriterPID = 0,
 			AutoVacPID = 0,
 			PgArchPID = 0,
         	PgStatPID = 0,
@@ -221,7 +222,7 @@ bool		ClientAuthInProgress = false;		/* T during new-client
 bool redirection_done = false; 
 
 /* received START_AUTOVAC_LAUNCHER signal */
-static bool start_autovac_launcher = false;
+static volatile sig_atomic_t start_autovac_launcher = false;
 
 /*
  * State for assigning random salts and cancel keys.
@@ -365,6 +366,7 @@ static void ShmemBackendArrayRemove(pid_t pid);
 
 #define StartupDataBase()		StartChildProcess(StartupProcess)
 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
+#define StartWalWriter()		StartChildProcess(WalWriterProcess)
 
 /* Macros to check exit status of a child process */
 #define EXIT_STATUS_0(st)  ((st) == 0)
@@ -909,8 +911,9 @@ PostmasterMain(int argc, char *argv[])
 	 *
 	 * CAUTION: when changing this list, check for side-effects on the signal
 	 * handling setup of child processes.  See tcop/postgres.c,
-	 * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/autovacuum.c,
-	 * postmaster/pgarch.c, postmaster/pgstat.c, and postmaster/syslogger.c.
+	 * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
+	 * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c, and
+	 * postmaster/syslogger.c.
 	 */
 	pqinitmask();
 	PG_SETMASK(&BlockSig);
@@ -1244,6 +1247,15 @@ ServerLoop(void)
 				signal_child(BgWriterPID, SIGUSR2);
 		}
 
+		/*
+		 * Likewise, if we have lost the walwriter process, try to start a
+		 * new one.  We don't need walwriter to complete a shutdown, so
+		 * don't start it if shutdown already initiated.
+		 */
+		if (WalWriterPID == 0 &&
+			StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
+			WalWriterPID = StartWalWriter();
+
 		/* If we have lost the autovacuum launcher, try to start a new one */
 		if (AutoVacPID == 0 &&
 			(AutoVacuumingActive() || start_autovac_launcher) &&
@@ -1251,7 +1263,7 @@ ServerLoop(void)
 		{
 			AutoVacPID = StartAutoVacLauncher();
 			if (AutoVacPID != 0)
-				start_autovac_launcher = false;	/* signal successfully processed */
+				start_autovac_launcher = false;	/* signal processed */
 		}
 
 		/* If we have lost the archiver, try to start a new one */
@@ -1842,6 +1854,8 @@ SIGHUP_handler(SIGNAL_ARGS)
 		SignalChildren(SIGHUP);
 		if (BgWriterPID != 0)
 			signal_child(BgWriterPID, SIGHUP);
+		if (WalWriterPID != 0)
+			signal_child(WalWriterPID, SIGHUP);
 		if (AutoVacPID != 0)
 			signal_child(AutoVacPID, SIGHUP);
 		if (PgArchPID != 0)
@@ -1901,8 +1915,11 @@ pmdie(SIGNAL_ARGS)
 			/* and the autovac launcher too */
 			if (AutoVacPID != 0)
 				signal_child(AutoVacPID, SIGTERM);
+			/* and the walwriter too */
+			if (WalWriterPID != 0)
+				signal_child(WalWriterPID, SIGTERM);
 
-			if (DLGetHead(BackendList) || AutoVacPID != 0)
+			if (DLGetHead(BackendList) || AutoVacPID != 0 || WalWriterPID != 0)
 				break;			/* let reaper() handle this */
 
 			/*
@@ -1938,7 +1955,7 @@ pmdie(SIGNAL_ARGS)
 			ereport(LOG,
 					(errmsg("received fast shutdown request")));
 
-			if (DLGetHead(BackendList) || AutoVacPID != 0)
+			if (DLGetHead(BackendList) || AutoVacPID != 0 || WalWriterPID != 0)
 			{
 				if (!FatalError)
 				{
@@ -1947,6 +1964,8 @@ pmdie(SIGNAL_ARGS)
 					SignalChildren(SIGTERM);
 					if (AutoVacPID != 0)
 						signal_child(AutoVacPID, SIGTERM);
+					if (WalWriterPID != 0)
+						signal_child(WalWriterPID, SIGTERM);
 					/* reaper() does the rest */
 				}
 				break;
@@ -1957,6 +1976,7 @@ pmdie(SIGNAL_ARGS)
 			 *
 			 * Note: if we previously got SIGTERM then we may send SIGUSR2 to
 			 * the bgwriter a second time here.  This should be harmless.
+			 * Ditto for the signals to the other special children.
 			 */
 			if (StartupPID != 0)
 			{
@@ -1993,6 +2013,8 @@ pmdie(SIGNAL_ARGS)
 				signal_child(StartupPID, SIGQUIT);
 			if (BgWriterPID != 0)
 				signal_child(BgWriterPID, SIGQUIT);
+			if (WalWriterPID != 0)
+				signal_child(WalWriterPID, SIGQUIT);
 			if (AutoVacPID != 0)
 				signal_child(AutoVacPID, SIGQUIT);
 			if (PgArchPID != 0)
@@ -2091,13 +2113,14 @@ reaper(SIGNAL_ARGS)
 
 			/*
 			 * Go to shutdown mode if a shutdown request was pending.
-			 * Otherwise, try to start the archiver, stats collector and
-			 * autovacuum launcher.
+			 * Otherwise, try to start the other special children.
 			 */
 			if (Shutdown > NoShutdown && BgWriterPID != 0)
 				signal_child(BgWriterPID, SIGUSR2);
 			else if (Shutdown == NoShutdown)
 			{
+				if (WalWriterPID == 0)
+					WalWriterPID = StartWalWriter();
 				if (XLogArchivingActive() && PgArchPID == 0)
 					PgArchPID = pgarch_start();
 				if (PgStatPID == 0)
@@ -2121,7 +2144,8 @@ reaper(SIGNAL_ARGS)
 			BgWriterPID = 0;
 			if (EXIT_STATUS_0(exitstatus) &&
 				Shutdown > NoShutdown && !FatalError &&
-				!DLGetHead(BackendList) && AutoVacPID == 0)
+				!DLGetHead(BackendList) &&
+				WalWriterPID == 0 && AutoVacPID == 0)
 			{
 				/*
 				 * Normal postmaster exit is here: we've seen normal exit of
@@ -2150,7 +2174,8 @@ reaper(SIGNAL_ARGS)
 			 * required will happen on next postmaster start.
 			 */
 			if (Shutdown > NoShutdown &&
-				!DLGetHead(BackendList) && AutoVacPID == 0)
+				!DLGetHead(BackendList) &&
+				WalWriterPID == 0 && AutoVacPID == 0)
 			{
 				ereport(LOG,
 						(errmsg("abnormal database system shutdown")));
@@ -2161,6 +2186,20 @@ reaper(SIGNAL_ARGS)
 			continue;
 		}
 
+		/*
+		 * Was it the wal writer?  Normal exit can be ignored; we'll
+		 * start a new one at the next iteration of the postmaster's main loop,
+		 * if necessary.  Any other exit condition is treated as a crash.
+		 */
+		if (WalWriterPID != 0 && pid == WalWriterPID)
+		{
+			WalWriterPID = 0;
+			if (!EXIT_STATUS_0(exitstatus))
+				HandleChildCrash(pid, exitstatus,
+								 _("wal writer process"));
+			continue;
+		}
+
 		/*
 		 * Was it the autovacuum launcher?  Normal exit can be ignored; we'll
 		 * start a new one at the next iteration of the postmaster's main loop,
@@ -2233,7 +2272,8 @@ reaper(SIGNAL_ARGS)
 		 * StartupDataBase.  (We can ignore the archiver and stats processes
 		 * here since they are not connected to shmem.)
 		 */
-		if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0 ||
+		if (DLGetHead(BackendList) || StartupPID != 0 ||
+			BgWriterPID != 0 || WalWriterPID != 0 ||
 			AutoVacPID != 0)
 			goto reaper_done;
 		ereport(LOG,
@@ -2249,7 +2289,8 @@ reaper(SIGNAL_ARGS)
 
 	if (Shutdown > NoShutdown)
 	{
-		if (DLGetHead(BackendList) || StartupPID != 0 || AutoVacPID != 0)
+		if (DLGetHead(BackendList) || StartupPID != 0 || AutoVacPID != 0 ||
+			WalWriterPID != 0)
 			goto reaper_done;
 		/* Start the bgwriter if not running */
 		if (BgWriterPID == 0)
@@ -2315,7 +2356,8 @@ CleanupBackend(int pid,
 }
 
 /*
- * HandleChildCrash -- cleanup after failed backend, bgwriter, or autovacuum.
+ * HandleChildCrash -- cleanup after failed backend, bgwriter, walwriter,
+ * or autovacuum.
  *
  * The objectives here are to clean up our local state about the child
  * process, and to signal all other remaining children to quickdie.
@@ -2390,6 +2432,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 		signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
 	}
 
+	/* Take care of the walwriter too */
+	if (pid == WalWriterPID)
+		WalWriterPID = 0;
+	else if (WalWriterPID != 0 && !FatalError)
+	{
+		ereport(DEBUG2,
+				(errmsg_internal("sending %s to process %d",
+								 (SendStop ? "SIGSTOP" : "SIGQUIT"),
+								 (int) WalWriterPID)));
+		signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
+	}
+
 	/* Take care of the autovacuum launcher too */
 	if (pid == AutoVacPID)
 		AutoVacPID = 0;
@@ -3622,9 +3676,11 @@ sigusr1_handler(SIGNAL_ARGS)
 		start_autovac_launcher = true;
 	}
 
-	/* The autovacuum launcher wants us to start a worker process. */
 	if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER))
+	{
+		/* The autovacuum launcher wants us to start a worker process. */
 		StartAutovacuumWorker();
+	}
 
 	PG_SETMASK(&UnBlockSig);
 
@@ -3805,6 +3861,10 @@ StartChildProcess(AuxProcType type)
 				ereport(LOG,
 				   (errmsg("could not fork background writer process: %m")));
 				break;
+			case WalWriterProcess:
+				ereport(LOG,
+				   (errmsg("could not fork wal writer process: %m")));
+				break;
 			default:
 				ereport(LOG,
 						(errmsg("could not fork process: %m")));
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4d594661a2d7819a9b81fcecdeb96a35ddc069b
--- /dev/null
+++ b/src/backend/postmaster/walwriter.c
@@ -0,0 +1,311 @@
+/*-------------------------------------------------------------------------
+ *
+ * walwriter.c
+ *
+ * The WAL writer background process is new as of Postgres 8.3.  It attempts
+ * to keep regular backends from having to write out (and fsync) WAL pages.
+ * Also, it guarantees that transaction commit records that weren't synced
+ * to disk immediately upon commit (ie, were "asynchronously committed")
+ * will reach disk within a knowable time --- which, as it happens, is at
+ * most three times the wal_writer_delay cycle time.
+ *
+ * Note that as with the bgwriter for shared buffers, regular backends are
+ * still empowered to issue WAL writes and fsyncs when the walwriter doesn't
+ * keep up.
+ *
+ * Because the walwriter's cycle is directly linked to the maximum delay
+ * before async-commit transactions are guaranteed committed, it's probably
+ * unwise to load additional functionality onto it.  For instance, if you've
+ * got a yen to create xlog segments further in advance, that'd be better done
+ * in bgwriter than in walwriter.
+ *
+ * The walwriter is started by the postmaster as soon as the startup subprocess
+ * finishes.  It remains alive until the postmaster commands it to terminate.
+ * Normal termination is by SIGTERM, which instructs the walwriter to exit(0).
+ * Emergency termination is by SIGQUIT; like any backend, the walwriter will
+ * simply abort and exit on SIGQUIT.
+ *
+ * If the walwriter exits unexpectedly, the postmaster treats that the same
+ * as a backend crash: shared memory may be corrupted, so remaining backends
+ * should be killed by SIGQUIT and then a recovery cycle started.
+ *
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/walwriter.c,v 1.1 2007/07/24 04:54:09 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <signal.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "access/xlog.h"
+#include "libpq/pqsignal.h"
+#include "miscadmin.h"
+#include "postmaster/walwriter.h"
+#include "storage/bufmgr.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pmsignal.h"
+#include "storage/smgr.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+
+
+/*
+ * GUC parameters
+ */
+int			WalWriterDelay = 200;
+
+/*
+ * Flags set by interrupt handlers for later service in the main loop.
+ */
+static volatile sig_atomic_t got_SIGHUP = false;
+static volatile sig_atomic_t shutdown_requested = false;
+
+/* Signal handlers */
+static void wal_quickdie(SIGNAL_ARGS);
+static void WalSigHupHandler(SIGNAL_ARGS);
+static void WalShutdownHandler(SIGNAL_ARGS);
+
+
+/*
+ * Main entry point for walwriter process
+ *
+ * This is invoked from BootstrapMain, which has already created the basic
+ * execution environment, but not enabled signals yet.
+ */
+void
+WalWriterMain(void)
+{
+	sigjmp_buf	local_sigjmp_buf;
+	MemoryContext walwriter_context;
+
+	/*
+	 * If possible, make this process a group leader, so that the postmaster
+	 * can signal any child processes too.  (walwriter probably never has
+	 * any child processes, but for consistency we make all postmaster
+	 * child processes do this.)
+	 */
+#ifdef HAVE_SETSID
+	if (setsid() < 0)
+		elog(FATAL, "setsid() failed: %m");
+#endif
+
+	/*
+	 * Properly accept or ignore signals the postmaster might send us
+	 *
+	 * We have no particular use for SIGINT at the moment, but seems
+	 * reasonable to treat like SIGTERM.
+	 */
+	pqsignal(SIGHUP, WalSigHupHandler);	/* set flag to read config file */
+	pqsignal(SIGINT, WalShutdownHandler);		/* request shutdown */
+	pqsignal(SIGTERM, WalShutdownHandler);		/* request shutdown */
+	pqsignal(SIGQUIT, wal_quickdie);		/* hard crash time */
+	pqsignal(SIGALRM, SIG_IGN);
+	pqsignal(SIGPIPE, SIG_IGN);
+	pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */
+	pqsignal(SIGUSR2, SIG_IGN);	/* not used */
+
+	/*
+	 * Reset some signals that are accepted by postmaster but not here
+	 */
+	pqsignal(SIGCHLD, SIG_DFL);
+	pqsignal(SIGTTIN, SIG_DFL);
+	pqsignal(SIGTTOU, SIG_DFL);
+	pqsignal(SIGCONT, SIG_DFL);
+	pqsignal(SIGWINCH, SIG_DFL);
+
+	/* We allow SIGQUIT (quickdie) at all times */
+#ifdef HAVE_SIGPROCMASK
+	sigdelset(&BlockSig, SIGQUIT);
+#else
+	BlockSig &= ~(sigmask(SIGQUIT));
+#endif
+
+	/*
+	 * Create a resource owner to keep track of our resources (not clear
+	 * that we need this, but may as well have one).
+	 */
+	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");
+
+	/*
+	 * Create a memory context that we will do all our work in.  We do this so
+	 * that we can reset the context during error recovery and thereby avoid
+	 * possible memory leaks.  Formerly this code just ran in
+	 * TopMemoryContext, but resetting that would be a really bad idea.
+	 */
+	walwriter_context = AllocSetContextCreate(TopMemoryContext,
+											  "Wal Writer",
+											  ALLOCSET_DEFAULT_MINSIZE,
+											  ALLOCSET_DEFAULT_INITSIZE,
+											  ALLOCSET_DEFAULT_MAXSIZE);
+	MemoryContextSwitchTo(walwriter_context);
+
+	/*
+	 * If an exception is encountered, processing resumes here.
+	 *
+	 * This code is heavily based on bgwriter.c, q.v.
+	 */
+	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	{
+		/* Since not using PG_TRY, must reset error stack by hand */
+		error_context_stack = NULL;
+
+		/* Prevent interrupts while cleaning up */
+		HOLD_INTERRUPTS();
+
+		/* Report the error to the server log */
+		EmitErrorReport();
+
+		/*
+		 * These operations are really just a minimal subset of
+		 * AbortTransaction().	We don't have very many resources to worry
+		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
+		 */
+		LWLockReleaseAll();
+		AbortBufferIO();
+		UnlockBuffers();
+		/* buffer pins are released here: */
+		ResourceOwnerRelease(CurrentResourceOwner,
+							 RESOURCE_RELEASE_BEFORE_LOCKS,
+							 false, true);
+		/* we needn't bother with the other ResourceOwnerRelease phases */
+		AtEOXact_Buffers(false);
+
+		/*
+		 * Now return to normal top-level context and clear ErrorContext for
+		 * next time.
+		 */
+		MemoryContextSwitchTo(walwriter_context);
+		FlushErrorState();
+
+		/* Flush any leaked data in the top-level context */
+		MemoryContextResetAndDeleteChildren(walwriter_context);
+
+		/* Now we can allow interrupts again */
+		RESUME_INTERRUPTS();
+
+		/*
+		 * Sleep at least 1 second after any error.  A write error is likely
+		 * to be repeated, and we don't want to be filling the error logs as
+		 * fast as we can.
+		 */
+		pg_usleep(1000000L);
+
+		/*
+		 * Close all open files after any error.  This is helpful on Windows,
+		 * where holding deleted files open causes various strange errors.
+		 * It's not clear we need it elsewhere, but shouldn't hurt.
+		 */
+		smgrcloseall();
+	}
+
+	/* We can now handle ereport(ERROR) */
+	PG_exception_stack = &local_sigjmp_buf;
+
+	/*
+	 * Unblock signals (they were blocked when the postmaster forked us)
+	 */
+	PG_SETMASK(&UnBlockSig);
+
+	/*
+	 * Loop forever
+	 */
+	for (;;)
+	{
+		long		udelay;
+
+		/*
+		 * Emergency bailout if postmaster has died.  This is to avoid the
+		 * necessity for manual cleanup of all postmaster children.
+		 */
+		if (!PostmasterIsAlive(true))
+			exit(1);
+
+		/*
+		 * Process any requests or signals received recently.
+		 */
+		if (got_SIGHUP)
+		{
+			got_SIGHUP = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+		if (shutdown_requested)
+		{
+			/* Normal exit from the walwriter is here */
+			proc_exit(0);		/* done */
+		}
+
+		/*
+		 * Do what we're here for...
+		 */
+		XLogBackgroundFlush();
+
+		/*
+		 * Delay until time to do something more, but fall out of delay
+		 * reasonably quickly if signaled.
+		 */
+		udelay = WalWriterDelay * 1000L;
+		while (udelay > 999999L)
+		{
+			if (got_SIGHUP || shutdown_requested)
+				break;
+			pg_usleep(1000000L);
+			udelay -= 1000000L;
+		}
+		if (!(got_SIGHUP || shutdown_requested))
+			pg_usleep(udelay);
+	}
+}
+
+
+/* --------------------------------
+ *		signal handler routines
+ * --------------------------------
+ */
+
+/*
+ * wal_quickdie() occurs when signalled SIGQUIT by the postmaster.
+ *
+ * Some backend has bought the farm,
+ * so we need to stop what we're doing and exit.
+ */
+static void
+wal_quickdie(SIGNAL_ARGS)
+{
+	PG_SETMASK(&BlockSig);
+
+	/*
+	 * DO NOT proc_exit() -- we're here because shared memory may be
+	 * corrupted, so we don't want to try to clean up our transaction. Just
+	 * nail the windows shut and get out of town.
+	 *
+	 * Note we do exit(2) not exit(0).	This is to force the postmaster into a
+	 * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
+	 * backend.  This is necessary precisely because we don't clean up our
+	 * shared memory state.
+	 */
+	exit(2);
+}
+
+/* SIGHUP: set flag to re-read config file at next convenient time */
+static void
+WalSigHupHandler(SIGNAL_ARGS)
+{
+	got_SIGHUP = true;
+}
+
+/* SIGTERM: set flag to exit normally */
+static void
+WalShutdownHandler(SIGNAL_ARGS)
+{
+	shutdown_requested = true;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 06915017e6e794fe16bce272fa091bfed97b83ad..b2d0ea9cae597f6d6397e2695bceecb6795b9fc3 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.406 2007/07/24 01:53:56 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.407 2007/07/24 04:54:09 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -54,6 +54,7 @@
 #include "postmaster/bgwriter.h"
 #include "postmaster/postmaster.h"
 #include "postmaster/syslogger.h"
+#include "postmaster/walwriter.h"
 #include "storage/fd.h"
 #include "storage/freespace.h"
 #include "tcop/tcopprot.h"
@@ -1509,6 +1510,16 @@ static struct config_int ConfigureNamesInt[] =
 		8, 4, INT_MAX, NULL, NULL
 	},
 
+	{
+		{"wal_writer_delay", PGC_SIGHUP, WAL_SETTINGS,
+			gettext_noop("WAL writer sleep time between WAL flushes."),
+			NULL,
+			GUC_UNIT_MS
+		},
+		&WalWriterDelay,
+		200, 1, 10000, NULL, NULL
+	},
+
 	{
 		{"commit_delay", PGC_USERSET, WAL_CHECKPOINTS,
 			gettext_noop("Sets the delay in microseconds between transaction commit and "
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 51c83ade0afdf324af8d616576ef26c29228fadd..8bfad997ff38cf96b49d0eb333b899e270d0ae99 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -159,6 +159,8 @@
 #full_page_writes = on			# recover from partial page writes
 #wal_buffers = 64kB			# min 32kB
 					# (change requires restart)
+#wal_writer_delay = 200ms		# range 1-10000, in milliseconds
+
 #commit_delay = 0			# range 0-100000, in microseconds
 #commit_siblings = 5			# range 1-1000
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 1b4fecdb966f11f485360e28345c109cedcce15c..adc99a6eb0610e0f551279e8d721003bfd992106 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.80 2007/06/30 19:12:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.81 2007/07/24 04:54:09 tgl Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
@@ -196,6 +196,7 @@ extern CheckpointStatsData CheckpointStats;
 
 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
 extern void XLogFlush(XLogRecPtr RecPtr);
+extern void XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
 
 extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h
index bbde68ea1b14f0211640498f3fc35de50e931da6..d75626c8d257f5de764265270c9c689811472496 100644
--- a/src/include/bootstrap/bootstrap.h
+++ b/src/include/bootstrap/bootstrap.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/bootstrap/bootstrap.h,v 1.46 2007/03/07 13:35:03 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/bootstrap/bootstrap.h,v 1.47 2007/07/24 04:54:09 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,7 +69,8 @@ typedef enum
 	CheckerProcess,
 	BootstrapProcess,
 	StartupProcess,
-	BgWriterProcess
+	BgWriterProcess,
+	WalWriterProcess
 } AuxProcType;
 
 #endif   /* BOOTSTRAP_H */
diff --git a/src/include/postmaster/walwriter.h b/src/include/postmaster/walwriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..3cefe9ad7bafe6cb3764d6764933cbb8a42420ac
--- /dev/null
+++ b/src/include/postmaster/walwriter.h
@@ -0,0 +1,20 @@
+/*-------------------------------------------------------------------------
+ *
+ * walwriter.h
+ *	  Exports from postmaster/walwriter.c.
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ * $PostgreSQL: pgsql/src/include/postmaster/walwriter.h,v 1.1 2007/07/24 04:54:09 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _WALWRITER_H
+#define _WALWRITER_H
+
+/* GUC options */
+extern int	WalWriterDelay;
+
+extern void WalWriterMain(void);
+
+#endif   /* _WALWRITER_H */