From 6f5c38dcd0dfb391bcbaa683a96f4a62c574d95a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 25 Sep 2007 20:03:38 +0000
Subject: [PATCH] Just-in-time background writing strategy.  This code avoids
 re-scanning buffers that cannot possibly need to be cleaned, and estimates
 how many buffers it should try to clean based on moving averages of recent
 allocation requests and density of reusable buffers.  The patch also adds a
 couple more columns to pg_stat_bgwriter to help measure the effectiveness of
 the bgwriter.

Greg Smith, building on his own work and ideas from several other people,
in particular a much older patch from Itagaki Takahiro.
---
 doc/src/sgml/config.sgml                      |  43 ++-
 doc/src/sgml/monitoring.sgml                  |  24 +-
 src/backend/catalog/system_views.sql          |   6 +-
 src/backend/postmaster/bgwriter.c             |  35 +-
 src/backend/postmaster/pgstat.c               |   4 +-
 src/backend/storage/buffer/bufmgr.c           | 320 +++++++++++++++---
 src/backend/storage/buffer/freelist.c         |  41 ++-
 src/backend/utils/adt/pgstatfuncs.c           |  16 +-
 src/backend/utils/misc/guc.c                  |  12 +-
 src/backend/utils/misc/postgresql.conf.sample |   4 +-
 src/include/catalog/catversion.h              |   4 +-
 src/include/catalog/pg_proc.h                 |   7 +-
 src/include/pgstat.h                          |   6 +-
 src/include/storage/buf_internals.h           |   4 +-
 src/include/storage/bufmgr.h                  |   4 +-
 src/test/regress/expected/rules.out           |   2 +-
 16 files changed, 437 insertions(+), 95 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index fd2fd5d71bf..3cc911b265b 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.146 2007/09/24 03:12:23 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.147 2007/09/25 20:03:37 tgl Exp $ -->
 
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -1205,35 +1205,42 @@ SET ENABLE_SEQSCAN TO OFF;
        </listitem>
       </varlistentry>
 
-      <varlistentry id="guc-bgwriter-lru-percent" xreflabel="bgwriter_lru_percent">
-       <term><varname>bgwriter_lru_percent</varname> (<type>floating point</type>)</term>
+      <varlistentry id="guc-bgwriter-lru-maxpages" xreflabel="bgwriter_lru_maxpages">
+       <term><varname>bgwriter_lru_maxpages</varname> (<type>integer</type>)</term>
        <indexterm>
-        <primary><varname>bgwriter_lru_percent</> configuration parameter</primary>
+        <primary><varname>bgwriter_lru_maxpages</> configuration parameter</primary>
        </indexterm>
        <listitem>
         <para>
-         To reduce the probability that server processes will need to issue
-         their own writes, the background writer tries to write buffers that
-         are likely to be recycled soon.  In each round, it examines up to
-         <varname>bgwriter_lru_percent</> of the buffers that are nearest to
-         being recycled, and writes any that are dirty.
-         The default value is 1.0 (1% of the total number of shared buffers).
+         In each round, no more than this many buffers will be written
+         by the background writer.  Setting this to zero disables
+         background writing (except for checkpoint activity).
+         The default value is 100 buffers.
          This parameter can only be set in the <filename>postgresql.conf</>
          file or on the server command line.
         </para>
        </listitem>
       </varlistentry>
 
-      <varlistentry id="guc-bgwriter-lru-maxpages" xreflabel="bgwriter_lru_maxpages">
-       <term><varname>bgwriter_lru_maxpages</varname> (<type>integer</type>)</term>
+      <varlistentry id="guc-bgwriter-lru-multiplier" xreflabel="bgwriter_lru_multiplier">
+       <term><varname>bgwriter_lru_multiplier</varname> (<type>floating point</type>)</term>
        <indexterm>
-        <primary><varname>bgwriter_lru_maxpages</> configuration parameter</primary>
+        <primary><varname>bgwriter_lru_multiplier</> configuration parameter</primary>
        </indexterm>
        <listitem>
         <para>
-         In each round, no more than this many buffers will be written
-         as a result of scanning soon-to-be-recycled buffers.
-         The default value is five buffers.
+         Unless limited by <varname>bgwriter_lru_maxpages</>, the number
+         of dirty buffers written in each round is determined by reference
+         to the number of new buffers that have been needed by server
+         processes during recent rounds.  This number is multiplied by
+         <varname>bgwriter_lru_multiplier</> to arrive at the estimate
+         of the number of buffers that will be needed during the next round.
+         Thus, a setting of 1.0 represents a <quote>just in time</> policy
+         of writing exactly the number of buffers predicted to be needed.
+         Larger values provide some cushion against spikes in demand,
+         while smaller values intentionally leave writes to be done by
+         server processes.
+         The default is 2.0.
          This parameter can only be set in the <filename>postgresql.conf</>
          file or on the server command line.
         </para>
@@ -1242,8 +1249,8 @@ SET ENABLE_SEQSCAN TO OFF;
      </variablelist>
 
      <para>
-      Smaller values of <varname>bgwriter_lru_percent</varname> and
-      <varname>bgwriter_lru_maxpages</varname> reduce the extra I/O load
+      Smaller values of <varname>bgwriter_lru_maxpages</varname> and
+      <varname>bgwriter_lru_multiplier</varname> reduce the extra I/O load
       caused by the background writer, but make it more likely that server
       processes will have to issue writes for themselves, delaying interactive
       queries.
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index a2536c91900..2fea0a75b20 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/monitoring.sgml,v 1.53 2007/09/24 03:12:23 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/monitoring.sgml,v 1.54 2007/09/25 20:03:37 tgl Exp $ -->
 
 <chapter id="monitoring">
  <title>Monitoring Database Activity</title>
@@ -237,7 +237,10 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
       background writer: number of scheduled checkpoints, requested
       checkpoints, buffers written by checkpoints and cleaning scans,
       and the number of times the bgwriter stopped a cleaning scan
-      because it had written too many buffers.
+      because it had written too many buffers.  Also includes
+      statistics about the shared buffer pool, including buffers written
+      by backends (that is, not by the background writer) and total buffers
+      allocated.
      </entry>
      </row>
 
@@ -817,6 +820,23 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
       </entry>
      </row>
 
+     <row>
+      <entry><literal><function>pg_stat_get_buf_written_backend</function>()</literal></entry>
+      <entry><type>bigint</type></entry>
+      <entry>
+       The number of buffers written by backends because they needed
+       to allocate a new buffer
+      </entry>
+     </row>
+
+     <row>
+      <entry><literal><function>pg_stat_get_buf_alloc</function>()</literal></entry>
+      <entry><type>bigint</type></entry>
+      <entry>
+       The total number of buffer allocations
+      </entry>
+     </row>
+
      <row>
       <entry><literal><function>pg_stat_clear_snapshot</function>()</literal></entry>
       <entry><type>void</type></entry>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 3e76bd17253..5e557efef45 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1996-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.45 2007/09/20 17:56:30 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.46 2007/09/25 20:03:37 tgl Exp $
  */
 
 CREATE VIEW pg_roles AS 
@@ -382,7 +382,9 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req,
         pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint,
         pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean,
-        pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean;
+        pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean,
+        pg_stat_get_buf_written_backend() AS buffers_backend,
+        pg_stat_get_buf_alloc() AS buffers_alloc;
 
 -- Tsearch debug function. Defined here because it'd be pretty unwieldy
 -- to put it into pg_proc.h
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index a905b7f56e1..fc649df0d1b 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.43 2007/09/16 16:33:04 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.44 2007/09/25 20:03:37 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -98,8 +98,14 @@
  * requesting backends since the last checkpoint start.  The flags are
  * chosen so that OR'ing is the correct way to combine multiple requests.
  *
+ * num_backend_writes is used to count the number of buffer writes performed
+ * by non-bgwriter processes.  This counter should be wide enough that it
+ * can't overflow during a single bgwriter cycle.
+ *
  * The requests array holds fsync requests sent by backends and not yet
- * absorbed by the bgwriter.  Unlike the checkpoint fields, the requests
+ * absorbed by the bgwriter.
+ *
+ * Unlike the checkpoint fields, num_backend_writes and the requests
  * fields are protected by BgWriterCommLock.
  *----------
  */
@@ -122,6 +128,8 @@ typedef struct
 
 	int			ckpt_flags;		/* checkpoint flags, as defined in xlog.h */
 
+	uint32		num_backend_writes;	/* counts non-bgwriter buffer writes */
+
 	int			num_requests;	/* current # of requests */
 	int			max_requests;	/* allocated array size */
 	BgWriterRequest requests[1];	/* VARIABLE LENGTH ARRAY */
@@ -566,8 +574,7 @@ BgWriterNap(void)
 	 *
 	 * We absorb pending requests after each short sleep.
 	 */
-	if ((bgwriter_lru_percent > 0.0 && bgwriter_lru_maxpages > 0) ||
-		ckpt_active)
+	if (bgwriter_lru_maxpages > 0 || ckpt_active)
 		udelay = BgWriterDelay * 1000L;
 	else if (XLogArchiveTimeout > 0)
 		udelay = 1000000L;	/* One second */
@@ -648,12 +655,13 @@ CheckpointWriteDelay(int flags, double progress)
 			got_SIGHUP = false;
 			ProcessConfigFile(PGC_SIGHUP);
 		}
-		BgBufferSync();
-		CheckArchiveTimeout();
-		BgWriterNap();
 
 		AbsorbFsyncRequests();
 		absorb_counter = WRITES_PER_ABSORB;
+
+		BgBufferSync();
+		CheckArchiveTimeout();
+		BgWriterNap();
 	}
 	else if (--absorb_counter <= 0)
 	{
@@ -963,7 +971,8 @@ RequestCheckpoint(int flags)
  * Whenever a backend is compelled to write directly to a relation
  * (which should be seldom, if the bgwriter is getting its job done),
  * the backend calls this routine to pass over knowledge that the relation
- * is dirty and must be fsync'd before next checkpoint.
+ * is dirty and must be fsync'd before next checkpoint.  We also use this
+ * opportunity to count such writes for statistical purposes.
  *
  * segno specifies which segment (not block!) of the relation needs to be
  * fsync'd.  (Since the valid range is much less than BlockNumber, we can
@@ -987,7 +996,13 @@ ForwardFsyncRequest(RelFileNode rnode, BlockNumber segno)
 	if (!IsUnderPostmaster)
 		return false;			/* probably shouldn't even get here */
 
+	Assert(!am_bg_writer);
+
 	LWLockAcquire(BgWriterCommLock, LW_EXCLUSIVE);
+
+	/* we count non-bgwriter writes even when the request queue overflows */
+	BgWriterShmem->num_backend_writes++;
+
 	if (BgWriterShmem->bgwriter_pid == 0 ||
 		BgWriterShmem->num_requests >= BgWriterShmem->max_requests)
 	{
@@ -1035,6 +1050,10 @@ AbsorbFsyncRequests(void)
 	 */
 	LWLockAcquire(BgWriterCommLock, LW_EXCLUSIVE);
 
+	/* Transfer write count into pending pgstats message */
+	BgWriterStats.m_buf_written_backend += BgWriterShmem->num_backend_writes;
+	BgWriterShmem->num_backend_writes = 0;
+
 	n = BgWriterShmem->num_requests;
 	if (n > 0)
 	{
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 20ae5be6c99..8623dbd0055 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -13,7 +13,7 @@
  *
  *	Copyright (c) 2001-2007, PostgreSQL Global Development Group
  *
- *	$PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.165 2007/09/24 03:12:23 tgl Exp $
+ *	$PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.166 2007/09/25 20:03:37 tgl Exp $
  * ----------
  */
 #include "postgres.h"
@@ -3165,4 +3165,6 @@ pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
 	globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
 	globalStats.buf_written_clean += msg->m_buf_written_clean;
 	globalStats.maxwritten_clean += msg->m_maxwritten_clean;
+	globalStats.buf_written_backend += msg->m_buf_written_backend;
+	globalStats.buf_alloc += msg->m_buf_alloc;
 }
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 9c0ef67f6bb..6ba935a09af 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.224 2007/09/20 17:56:31 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.225 2007/09/25 20:03:37 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -52,11 +52,15 @@
 #define LocalBufHdrGetBlock(bufHdr) \
 	LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
+/* Bits in SyncOneBuffer's return value */
+#define BUF_WRITTEN				0x01
+#define BUF_REUSABLE			0x02
+
 
 /* GUC variables */
 bool		zero_damaged_pages = false;
-double		bgwriter_lru_percent = 1.0;
-int			bgwriter_lru_maxpages = 5;
+int			bgwriter_lru_maxpages = 100;
+double		bgwriter_lru_multiplier = 2.0;
 
 
 long		NDirectFileRead;	/* some I/O's are direct file access. bypass
@@ -79,7 +83,7 @@ static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
 static void PinBuffer_Locked(volatile BufferDesc *buf);
 static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);
 static void BufferSync(int flags);
-static bool SyncOneBuffer(int buf_id, bool skip_pinned);
+static int	SyncOneBuffer(int buf_id, bool skip_recently_used);
 static void WaitIO(volatile BufferDesc *buf);
 static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
 static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
@@ -1043,8 +1047,11 @@ BufferSync(int flags)
 	 * Loop over all buffers again, and write the ones (still) marked with
 	 * BM_CHECKPOINT_NEEDED.  In this loop, we start at the clock sweep
 	 * point since we might as well dump soon-to-be-recycled buffers first.
+	 *
+	 * Note that we don't read the buffer alloc count here --- that should
+	 * be left untouched till the next BgBufferSync() call.
 	 */
-	buf_id = StrategySyncStart();
+	buf_id = StrategySyncStart(NULL, NULL);
 	num_to_scan = NBuffers;
 	num_written = 0;
 	while (num_to_scan-- > 0)
@@ -1065,7 +1072,7 @@ BufferSync(int flags)
 		 */
 		if (bufHdr->flags & BM_CHECKPOINT_NEEDED)
 		{
-			if (SyncOneBuffer(buf_id, false))
+			if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
 			{
 				BgWriterStats.m_buf_written_checkpoints++;
 				num_written++;
@@ -1112,61 +1119,289 @@ BufferSync(int flags)
 void
 BgBufferSync(void)
 {
-	int			buf_id;
+	/* info obtained from freelist.c */
+	int			strategy_buf_id;
+	uint32		strategy_passes;
+	uint32		recent_alloc;
+
+	/*
+	 * Information saved between calls so we can determine the strategy
+	 * point's advance rate and avoid scanning already-cleaned buffers.
+	 */
+	static bool	saved_info_valid = false;
+	static int	prev_strategy_buf_id;
+	static uint32 prev_strategy_passes;
+	static int	next_to_clean;
+	static uint32 next_passes;
+
+	/* Moving averages of allocation rate and clean-buffer density */
+	static float smoothed_alloc = 0;
+	static float smoothed_density = 10.0;
+
+	/* Potentially these could be tunables, but for now, not */
+	float		smoothing_samples = 16;
+	float		scan_whole_pool_milliseconds = 120000.0;
+
+	/* Used to compute how far we scan ahead */
+	long		strategy_delta;
+	int			bufs_to_lap;
+	int			bufs_ahead;
+	float		scans_per_alloc;
+	int			reusable_buffers_est;
+	int			upcoming_alloc_est;
+	int			min_scan_buffers;
+
+	/* Variables for the scanning loop proper */
 	int			num_to_scan;
 	int			num_written;
+	int			reusable_buffers;
 
-	/* Make sure we can handle the pin inside SyncOneBuffer */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+	/*
+	 * Find out where the freelist clock sweep currently is, and how
+	 * many buffer allocations have happened since our last call.
+	 */
+	strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
+
+	/* Report buffer alloc counts to pgstat */
+	BgWriterStats.m_buf_alloc += recent_alloc;
+
+	/*
+	 * If we're not running the LRU scan, just stop after doing the
+	 * stats stuff.  We mark the saved state invalid so that we can recover
+	 * sanely if LRU scan is turned back on later.
+	 */
+	if (bgwriter_lru_maxpages <= 0)
+	{
+		saved_info_valid = false;
+		return;
+	}
+
+	/*
+	 * Compute strategy_delta = how many buffers have been scanned by the
+	 * clock sweep since last time.  If first time through, assume none.
+	 * Then see if we are still ahead of the clock sweep, and if so, how many
+	 * buffers we could scan before we'd catch up with it and "lap" it.
+	 * Note: weird-looking coding of xxx_passes comparisons are to avoid
+	 * bogus behavior when the passes counts wrap around.
+	 */
+	if (saved_info_valid)
+	{
+		int32	passes_delta = strategy_passes - prev_strategy_passes;
+
+		strategy_delta = strategy_buf_id - prev_strategy_buf_id;
+		strategy_delta += (long) passes_delta * NBuffers;
+		Assert(strategy_delta >= 0);
+
+		if ((int32) (next_passes - strategy_passes) > 0)
+		{
+			/* we're one pass ahead of the strategy point */
+			bufs_to_lap = strategy_buf_id - next_to_clean;
+#ifdef BGW_DEBUG
+			elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
+				 next_passes, next_to_clean,
+				 strategy_passes, strategy_buf_id,
+				 strategy_delta, bufs_to_lap);
+#endif
+		}
+		else if (next_passes == strategy_passes &&
+				 next_to_clean >= strategy_buf_id)
+		{
+			/* on same pass, but ahead or at least not behind */
+			bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
+#ifdef BGW_DEBUG
+			elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
+				 next_passes, next_to_clean,
+				 strategy_passes, strategy_buf_id,
+				 strategy_delta, bufs_to_lap);
+#endif
+		}
+		else
+		{
+			/*
+			 * We're behind, so skip forward to the strategy point
+			 * and start cleaning from there.
+			 */
+#ifdef BGW_DEBUG
+			elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
+				 next_passes, next_to_clean,
+				 strategy_passes, strategy_buf_id,
+				 strategy_delta);
+#endif
+			next_to_clean = strategy_buf_id;
+			next_passes = strategy_passes;
+			bufs_to_lap = NBuffers;
+		}
+	}
+	else
+	{
+		/*
+		 * Initializing at startup or after LRU scanning had been off.
+		 * Always start at the strategy point.
+		 */
+#ifdef BGW_DEBUG
+		elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
+			 strategy_passes, strategy_buf_id);
+#endif
+		strategy_delta = 0;
+		next_to_clean = strategy_buf_id;
+		next_passes = strategy_passes;
+		bufs_to_lap = NBuffers;
+	}
+
+	/* Update saved info for next time */
+	prev_strategy_buf_id = strategy_buf_id;
+	prev_strategy_passes = strategy_passes;
+	saved_info_valid = true;
+
+	/*
+	 * Compute how many buffers had to be scanned for each new allocation,
+	 * ie, 1/density of reusable buffers, and track a moving average of that.
+	 *
+	 * If the strategy point didn't move, we don't update the density estimate
+	 */
+	if (strategy_delta > 0 && recent_alloc > 0)
+	{
+		scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
+		smoothed_density += (scans_per_alloc - smoothed_density) /
+			smoothing_samples;
+	}
+
+	/*
+	 * Estimate how many reusable buffers there are between the current
+	 * strategy point and where we've scanned ahead to, based on the
+	 * smoothed density estimate.
+	 */
+	bufs_ahead = NBuffers - bufs_to_lap;
+	reusable_buffers_est = (float) bufs_ahead / smoothed_density;
+
+	/*
+	 * Track a moving average of recent buffer allocations.  Here, rather
+	 * than a true average we want a fast-attack, slow-decline behavior:
+	 * we immediately follow any increase.
+	 */
+	if (smoothed_alloc <= (float) recent_alloc)
+		smoothed_alloc = recent_alloc;
+	else
+		smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
+			smoothing_samples;
+
+	/* Scale the estimate by a GUC to allow more aggressive tuning. */
+	upcoming_alloc_est = smoothed_alloc * bgwriter_lru_multiplier;
 
 	/*
-	 * The purpose of this sweep is to ensure that buffers that
-	 * will be recycled soon are clean when needed; these buffers are the ones
-	 * just ahead of the StrategySyncStart point. 
+	 * Even in cases where there's been little or no buffer allocation
+	 * activity, we want to make a small amount of progress through the buffer
+	 * cache so that as many reusable buffers as possible are clean
+	 * after an idle period.
 	 *
-	 * This loop considers only unpinned buffers close to the clock sweep
-	 * point.
+	 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many
+	 * times the BGW will be called during the scan_whole_pool time;
+	 * slice the buffer pool into that many sections.
 	 */
-	if (bgwriter_lru_percent > 0.0 && bgwriter_lru_maxpages > 0)
+	min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
+
+	if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
 	{
-		num_to_scan = (int) ((NBuffers * bgwriter_lru_percent + 99) / 100);
-		num_written = 0;
+#ifdef BGW_DEBUG
+		elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
+			 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
+#endif
+		upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
+	}
+
+	/*
+	 * Now write out dirty reusable buffers, working forward from the
+	 * next_to_clean point, until we have lapped the strategy scan, or
+	 * cleaned enough buffers to match our estimate of the next cycle's
+	 * allocation requirements, or hit the bgwriter_lru_maxpages limit.
+	 */
+
+	/* Make sure we can handle the pin inside SyncOneBuffer */
+	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+
+	num_to_scan = bufs_to_lap;
+	num_written = 0;
+	reusable_buffers = reusable_buffers_est;
 
-		buf_id = StrategySyncStart();
+	/* Execute the LRU scan */
+	while (num_to_scan-- > 0 && reusable_buffers < upcoming_alloc_est)
+	{
+		int		buffer_state = SyncOneBuffer(next_to_clean, true);
 
-		while (num_to_scan-- > 0)
+		if (buffer_state & BUF_WRITTEN)
 		{
-			if (SyncOneBuffer(buf_id, true))
+			reusable_buffers++;
+			if (++num_written >= bgwriter_lru_maxpages)
 			{
-				if (++num_written >= bgwriter_lru_maxpages)
-				{
-					BgWriterStats.m_maxwritten_clean++;
-					break;
-				}
+				BgWriterStats.m_maxwritten_clean++;
+				break;
 			}
-			if (++buf_id >= NBuffers)
-				buf_id = 0;
 		}
-		BgWriterStats.m_buf_written_clean += num_written;
+		else if (buffer_state & BUF_REUSABLE)
+			reusable_buffers++;
+
+		if (++next_to_clean >= NBuffers)
+		{
+			next_to_clean = 0;
+			next_passes++;
+		}
+	}
+
+	BgWriterStats.m_buf_written_clean += num_written;
+
+#ifdef BGW_DEBUG
+	elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
+		 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
+		 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
+		 bufs_to_lap - num_to_scan - 1,
+		 num_written,
+		 reusable_buffers - reusable_buffers_est);
+#endif
+
+	/*
+	 * Consider the above scan as being like a new allocation scan.
+	 * Characterize its density and update the smoothed one based on it.
+	 * This effectively halves the moving average period in cases where
+	 * both the strategy and the background writer are doing some useful
+	 * scanning, which is helpful because a long memory isn't as desirable
+	 * on the density estimates.
+	 */
+	strategy_delta = bufs_to_lap - num_to_scan - 1;
+	recent_alloc = reusable_buffers - reusable_buffers_est;
+	if (strategy_delta > 0 && recent_alloc > 0)
+	{
+		scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
+		smoothed_density += (scans_per_alloc - smoothed_density) /
+			smoothing_samples;
+
+#ifdef BGW_DEBUG
+		elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
+			 recent_alloc, strategy_delta, scans_per_alloc, smoothed_density);
+#endif
 	}
 }
 
 /*
  * SyncOneBuffer -- process a single buffer during syncing.
  *
- * If skip_pinned is true, we don't write currently-pinned buffers, nor
+ * If skip_recently_used is true, we don't write currently-pinned buffers, nor
  * buffers marked recently used, as these are not replacement candidates.
  *
- * Returns true if buffer was written, else false.	(This could be in error
- * if FlushBuffers finds the buffer clean after locking it, but we don't
- * care all that much.)
+ * Returns a bitmask containing the following flag bits:
+ *	BUF_WRITTEN: we wrote the buffer.
+ *	BUF_REUSABLE: buffer is available for replacement, ie, it has
+ *		pin count 0 and usage count 0.
+ *
+ * (BUF_WRITTEN could be set in error if FlushBuffers finds the buffer clean
+ * after locking it, but we don't care all that much.)
  *
  * Note: caller must have done ResourceOwnerEnlargeBuffers.
  */
-static bool
-SyncOneBuffer(int buf_id, bool skip_pinned)
+static int
+SyncOneBuffer(int buf_id, bool skip_recently_used)
 {
 	volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+	int		result = 0;
 
 	/*
 	 * Check whether buffer needs writing.
@@ -1178,16 +1413,21 @@ SyncOneBuffer(int buf_id, bool skip_pinned)
 	 * upcoming changes and so we are not required to write such dirty buffer.
 	 */
 	LockBufHdr(bufHdr);
-	if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY))
+
+	if (bufHdr->refcount == 0 && bufHdr->usage_count == 0)
+		result |= BUF_REUSABLE;
+	else if (skip_recently_used)
 	{
+		/* Caller told us not to write recently-used buffers */
 		UnlockBufHdr(bufHdr);
-		return false;
+		return result;
 	}
-	if (skip_pinned &&
-		(bufHdr->refcount != 0 || bufHdr->usage_count != 0))
+
+	if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY))
 	{
+		/* It's clean, so nothing to do */
 		UnlockBufHdr(bufHdr);
-		return false;
+		return result;
 	}
 
 	/*
@@ -1202,7 +1442,7 @@ SyncOneBuffer(int buf_id, bool skip_pinned)
 	LWLockRelease(bufHdr->content_lock);
 	UnpinBuffer(bufHdr, true);
 
-	return true;
+	return result | BUF_WRITTEN;
 }
 
 
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index a3e2c7c4422..781b754cfce 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.60 2007/06/08 18:23:52 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.61 2007/09/25 20:03:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,6 +34,13 @@ typedef struct
 	 * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is,
 	 * when the list is empty)
 	 */
+
+	/*
+	 * Statistics.  These counters should be wide enough that they can't
+	 * overflow during a single bgwriter cycle.
+	 */
+	uint32		completePasses;		/* Complete cycles of the clock sweep */
+	uint32		numBufferAllocs;	/* Buffers allocated since last reset */
 } BufferStrategyControl;
 
 /* Pointers to shared state */
@@ -118,6 +125,13 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 	*lock_held = true;
 	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
 
+	/*
+	 * We count buffer allocation requests so that the bgwriter can estimate
+	 * the rate of buffer consumption.  Note that buffers recycled by a
+	 * strategy object are intentionally not counted here.
+	 */
+	StrategyControl->numBufferAllocs++;
+
 	/*
 	 * Try to get a buffer from the freelist.  Note that the freeNext fields
 	 * are considered to be protected by the BufFreelistLock not the
@@ -157,7 +171,10 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 		buf = &BufferDescriptors[StrategyControl->nextVictimBuffer];
 
 		if (++StrategyControl->nextVictimBuffer >= NBuffers)
+		{
 			StrategyControl->nextVictimBuffer = 0;
+			StrategyControl->completePasses++;
+		}
 
 		/*
 		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
@@ -226,18 +243,26 @@ StrategyFreeBuffer(volatile BufferDesc *buf)
  *
  * The result is the buffer index of the best buffer to sync first.
  * BufferSync() will proceed circularly around the buffer array from there.
+ *
+ * In addition, we return the completed-pass count (which is effectively
+ * the higher-order bits of nextVictimBuffer) and the count of recent buffer
+ * allocs if non-NULL pointers are passed.  The alloc count is reset after
+ * being read.
  */
 int
-StrategySyncStart(void)
+StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
 {
 	int			result;
 
-	/*
-	 * We could probably dispense with the locking here, but just to be safe
-	 * ...
-	 */
 	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
 	result = StrategyControl->nextVictimBuffer;
+	if (complete_passes)
+		*complete_passes = StrategyControl->completePasses;
+	if (num_buf_alloc)
+	{
+		*num_buf_alloc = StrategyControl->numBufferAllocs;
+		StrategyControl->numBufferAllocs = 0;
+	}
 	LWLockRelease(BufFreelistLock);
 	return result;
 }
@@ -313,6 +338,10 @@ StrategyInitialize(bool init)
 
 		/* Initialize the clock sweep pointer */
 		StrategyControl->nextVictimBuffer = 0;
+
+		/* Clear statistics */
+		StrategyControl->completePasses = 0;
+		StrategyControl->numBufferAllocs = 0;
 	}
 	else
 		Assert(!init);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 954e174bb71..f162381745c 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/pgstatfuncs.c,v 1.45 2007/09/20 17:56:31 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/pgstatfuncs.c,v 1.46 2007/09/25 20:03:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,6 +67,8 @@ extern Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_bgwriter_buf_written_clean(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_bgwriter_maxwritten_clean(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_buf_alloc(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_clear_snapshot(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset(PG_FUNCTION_ARGS);
@@ -813,6 +815,18 @@ pg_stat_get_bgwriter_maxwritten_clean(PG_FUNCTION_ARGS)
 	PG_RETURN_INT64(pgstat_fetch_global()->maxwritten_clean);
 }
 
+Datum
+pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_backend);
+}
+
+Datum
+pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_global()->buf_alloc);
+}
+
 
 /* Discard the active statistics snapshot */
 Datum
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 027d1b25e5f..3e32b6e54a1 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.421 2007/09/24 03:12:23 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.422 2007/09/25 20:03:38 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -1574,7 +1574,7 @@ static struct config_int ConfigureNamesInt[] =
 			NULL
 		},
 		&bgwriter_lru_maxpages,
-		5, 0, 1000, NULL, NULL
+		100, 0, 1000, NULL, NULL
 	},
 
 	{
@@ -1821,12 +1821,12 @@ static struct config_real ConfigureNamesReal[] =
 	},
 
 	{
-		{"bgwriter_lru_percent", PGC_SIGHUP, RESOURCES,
-			gettext_noop("Background writer percentage of LRU buffers to flush per round."),
+		{"bgwriter_lru_multiplier", PGC_SIGHUP, RESOURCES,
+			gettext_noop("Background writer multiplier on average buffers to scan per round."),
 			NULL
 		},
-		&bgwriter_lru_percent,
-		1.0, 0.0, 100.0, NULL, NULL
+		&bgwriter_lru_multiplier,
+		2.0, 0.0, 10.0, NULL, NULL
 	},
 
 	{
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f63ba382017..8463261e5bc 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -138,8 +138,8 @@
 # - Background writer -
 
 #bgwriter_delay = 200ms			# 10-10000ms between rounds
-#bgwriter_lru_percent = 1.0		# 0-100% of LRU buffers scanned/round
-#bgwriter_lru_maxpages = 5		# 0-1000 buffers max written/round
+#bgwriter_lru_maxpages = 100		# 0-1000 max buffers written/round
+#bgwriter_lru_multiplier = 2.0		# 0-10.0 multipler on buffers scanned/round
 
 
 #---------------------------------------------------------------------------
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 4c1b84bd62b..d00770acc21 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.427 2007/09/21 21:25:42 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.428 2007/09/25 20:03:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200709211
+#define CATALOG_VERSION_NO	200709241
 
 #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 6dba4dbc39d..9578ff1e735 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.472 2007/09/24 01:29:29 adunstan Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.473 2007/09/25 20:03:38 tgl Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -2942,6 +2942,11 @@ DATA(insert OID = 2772 ( pg_stat_get_bgwriter_buf_written_clean PGNSP PGUID 12 1
 DESCR("statistics: number of buffers written by the bgwriter for cleaning dirty buffers");
 DATA(insert OID = 2773 ( pg_stat_get_bgwriter_maxwritten_clean PGNSP PGUID 12 1 0 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_maxwritten_clean - _null_ _null_ ));
 DESCR("statistics: number of times the bgwriter stopped processing when it had written too many buffers while cleaning");
+DATA(insert OID = 2775 ( pg_stat_get_buf_written_backend PGNSP PGUID 12 1 0 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_buf_written_backend - _null_ _null_ ));
+DESCR("statistics: number of buffers written by backends");
+DATA(insert OID = 2859 ( pg_stat_get_buf_alloc			PGNSP PGUID 12 1 0 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_buf_alloc - _null_ _null_ ));
+DESCR("statistics: number of buffer allocations");
+
 DATA(insert OID = 2230 (  pg_stat_clear_snapshot		PGNSP PGUID 12 1 0 f f f f v 0 2278  "" _null_ _null_ _null_	pg_stat_clear_snapshot - _null_ _null_ ));
 DESCR("statistics: discard current transaction's statistics snapshot");
 DATA(insert OID = 2274 (  pg_stat_reset					PGNSP PGUID 12 1 0 f f f f v 0 2278  "" _null_ _null_ _null_	pg_stat_reset - _null_ _null_ ));
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 2f53fdcd7c8..2d3c698953a 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -5,7 +5,7 @@
  *
  *	Copyright (c) 2001-2007, PostgreSQL Global Development Group
  *
- *	$PostgreSQL: pgsql/src/include/pgstat.h,v 1.67 2007/09/24 03:12:23 tgl Exp $
+ *	$PostgreSQL: pgsql/src/include/pgstat.h,v 1.68 2007/09/25 20:03:38 tgl Exp $
  * ----------
  */
 #ifndef PGSTAT_H
@@ -294,6 +294,8 @@ typedef struct PgStat_MsgBgWriter
 	PgStat_Counter	m_buf_written_checkpoints;
 	PgStat_Counter	m_buf_written_clean;
 	PgStat_Counter	m_maxwritten_clean;
+	PgStat_Counter  m_buf_written_backend;
+	PgStat_Counter  m_buf_alloc;
 } PgStat_MsgBgWriter;
 
 
@@ -394,6 +396,8 @@ typedef struct PgStat_GlobalStats
 	PgStat_Counter  buf_written_checkpoints;
 	PgStat_Counter  buf_written_clean;
 	PgStat_Counter  maxwritten_clean;
+	PgStat_Counter  buf_written_backend;
+	PgStat_Counter  buf_alloc;
 } PgStat_GlobalStats;
 
 
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 4afd677b9a6..34ad286ec24 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.92 2007/07/25 12:22:53 mha Exp $
+ * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.93 2007/09/25 20:03:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -188,7 +188,7 @@ extern void StrategyFreeBuffer(volatile BufferDesc *buf);
 extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
 								 volatile BufferDesc *buf);
 
-extern int	StrategySyncStart(void);
+extern int	StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
 
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 1324befa1e2..d40f39ccab4 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.107 2007/09/20 17:56:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.108 2007/09/25 20:03:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,8 +32,8 @@ extern PGDLLIMPORT int NBuffers;
 
 /* in bufmgr.c */
 extern bool zero_damaged_pages;
-extern double bgwriter_lru_percent;
 extern int	bgwriter_lru_maxpages;
+extern double bgwriter_lru_multiplier;
 
 /* in buf_init.c */
 extern PGDLLIMPORT char *BufferBlocks;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 3fc65ea2350..3439642fe65 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1292,7 +1292,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
  pg_stat_activity         | SELECT d.oid AS datid, d.datname, pg_stat_get_backend_pid(s.backendid) AS procpid, pg_stat_get_backend_userid(s.backendid) AS usesysid, u.rolname AS usename, pg_stat_get_backend_activity(s.backendid) AS current_query, pg_stat_get_backend_waiting(s.backendid) AS waiting, pg_stat_get_backend_xact_start(s.backendid) AS xact_start, pg_stat_get_backend_activity_start(s.backendid) AS query_start, pg_stat_get_backend_start(s.backendid) AS backend_start, pg_stat_get_backend_client_addr(s.backendid) AS client_addr, pg_stat_get_backend_client_port(s.backendid) AS client_port FROM pg_database d, (SELECT pg_stat_get_backend_idset() AS backendid) s, pg_authid u WHERE ((pg_stat_get_backend_dbid(s.backendid) = d.oid) AND (pg_stat_get_backend_userid(s.backendid) = u.oid));
  pg_stat_all_indexes      | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
  pg_stat_all_tables       | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
- pg_stat_bgwriter         | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean;
+ pg_stat_bgwriter         | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_alloc() AS buffers_alloc;
  pg_stat_database         | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted FROM pg_database d;
  pg_stat_sys_indexes      | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text));
  pg_stat_sys_tables       | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_tables.schemaname ~ '^pg_toast'::text));
-- 
GitLab