From 23e5b16c71f2706978c5f54aabd28ed23a54b6a5 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 17 Jul 2011 14:19:31 -0400
Subject: [PATCH] Add temp_file_limit GUC parameter to constrain temporary file
 space usage.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The limit is enforced against the total amount of temp file space used by
each session.

Mark Kirkwood, reviewed by Cédric Villemain and Tatsuo Ishii
---
 doc/src/sgml/config.sgml                      | 33 ++++++++-
 src/backend/storage/file/fd.c                 | 69 ++++++++++++++++---
 src/backend/utils/errcodes.txt                | 11 +--
 src/backend/utils/misc/guc.c                  | 15 ++++
 src/backend/utils/misc/postgresql.conf.sample |  5 ++
 src/include/utils/guc.h                       |  2 +
 src/include/utils/guc_tables.h                |  1 +
 7 files changed, 122 insertions(+), 14 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 842558d6735..afa087a3465 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1025,10 +1025,41 @@ SET ENABLE_SEQSCAN TO OFF;
      </variablelist>
      </sect2>
 
+     <sect2 id="runtime-config-resource-disk">
+     <title>Disk</title>
+
+     <variablelist>
+     <varlistentry id="guc-temp-file-limit" xreflabel="temp_file_limit">
+      <term><varname>temp_file_limit</varname> (<type>integer</type>)</term>
+      <indexterm>
+       <primary><varname>temp_file_limit</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Specifies the maximum amount of disk space that a session can use
+        for temporary files, such as sort and hash temporary files, or the
+        storage file for a held cursor.
+        The value is specified in kilobytes, and <literal>-1</> (the
+        default) means no limit.
+        Only superusers can change this setting.
+       </para>
+       <para>
+        This setting constrains the total space used at any instant by all
+        temporary files used by a given <productname>PostgreSQL</> session.
+        It should be noted that disk space used for explicit temporary
+        tables, as opposed to temporary files used behind-the-scenes in query
+        execution, does <emphasis>not</emphasis> count against this limit.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     </variablelist>
+     </sect2>
+
      <sect2 id="runtime-config-resource-kernel">
      <title>Kernel Resource Usage</title>
-     <variablelist>
 
+     <variablelist>
      <varlistentry id="guc-max-files-per-process" xreflabel="max_files_per_process">
       <term><varname>max_files_per_process</varname> (<type>integer</type>)</term>
       <indexterm>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 820e6dbfd97..884d9151278 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -128,9 +128,6 @@ static int	max_safe_fds = 32;	/* default if not changed */
 #define FD_XACT_TRANSIENT	(1 << 2)	/* T = close (not delete) at aoXact,
 										 * but keep VFD */
 
-/* Flag to tell whether there are files to close/delete at end of transaction */
-static bool have_pending_fd_cleanup = false;
-
 typedef struct vfd
 {
 	int			fd;				/* current FD, or VFD_CLOSED if none */
@@ -140,6 +137,7 @@ typedef struct vfd
 	File		lruMoreRecently;	/* doubly linked recency-of-use list */
 	File		lruLessRecently;
 	off_t		seekPos;		/* current logical file position */
+	off_t		fileSize;		/* current size of file (0 if not temporary) */
 	char	   *fileName;		/* name of file, or NULL for unused VFD */
 	/* NB: fileName is malloc'd, and must be free'd when closing the VFD */
 	int			fileFlags;		/* open(2) flags for (re)opening the file */
@@ -159,6 +157,17 @@ static Size SizeVfdCache = 0;
  */
 static int	nfile = 0;
 
+/* True if there are files to close/delete at end of transaction */
+static bool have_pending_fd_cleanup = false;
+
+/*
+ * Tracks the total size of all temporary files.  Note: when temp_file_limit
+ * is being enforced, this cannot overflow since the limit cannot be more
+ * than INT_MAX kilobytes.  When not enforcing, it could theoretically
+ * overflow, but we don't care.
+ */
+static uint64 temporary_files_size = 0;
+
 /*
  * List of stdio FILEs and <dirent.h> DIRs opened with AllocateFile
  * and AllocateDir.
@@ -887,6 +896,7 @@ PathNameOpenFile(FileName fileName, int fileFlags, int fileMode)
 	vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL);
 	vfdP->fileMode = fileMode;
 	vfdP->seekPos = 0;
+	vfdP->fileSize = 0;
 	vfdP->fdstate = 0x0;
 	vfdP->resowner = NULL;
 
@@ -1123,6 +1133,10 @@ FileClose(File file)
 			if (unlink(vfdP->fileName))
 				elog(LOG, "could not unlink file \"%s\": %m", vfdP->fileName);
 		}
+
+		/* Subtract its size from current usage */
+		temporary_files_size -= vfdP->fileSize;
+		vfdP->fileSize = 0;
 	}
 
 	/* Unregister it from the resource owner */
@@ -1242,6 +1256,31 @@ FileWrite(File file, char *buffer, int amount)
 	if (returnCode < 0)
 		return returnCode;
 
+	/*
+	 * If enforcing temp_file_limit and it's a temp file, check to see if the
+	 * write would overrun temp_file_limit, and throw error if so.  Note: it's
+	 * really a modularity violation to throw error here; we should set errno
+	 * and return -1.  However, there's no way to report a suitable error
+	 * message if we do that.  All current callers would just throw error
+	 * immediately anyway, so this is safe at present.
+	 */
+	if (temp_file_limit >= 0 && (VfdCache[file].fdstate & FD_TEMPORARY))
+	{
+		off_t	newPos = VfdCache[file].seekPos + amount;
+
+		if (newPos > VfdCache[file].fileSize)
+		{
+			uint64	newTotal = temporary_files_size;
+
+			newTotal += newPos - VfdCache[file].fileSize;
+			if (newTotal > (uint64) temp_file_limit * (uint64) 1024)
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+						 errmsg("temporary file size exceeds temp_file_limit (%dkB)",
+								temp_file_limit)));
+		}
+	}
+
 retry:
 	errno = 0;
 	returnCode = write(VfdCache[file].fd, buffer, amount);
@@ -1251,7 +1290,21 @@ retry:
 		errno = ENOSPC;
 
 	if (returnCode >= 0)
+	{
 		VfdCache[file].seekPos += returnCode;
+
+		/* maintain fileSize and temporary_files_size if it's a temp file */
+		if (VfdCache[file].fdstate & FD_TEMPORARY)
+		{
+			off_t	newPos = VfdCache[file].seekPos;
+
+			if (newPos > VfdCache[file].fileSize)
+			{
+				temporary_files_size += newPos - VfdCache[file].fileSize;
+				VfdCache[file].fileSize = newPos;
+			}
+		}
+	}
 	else
 	{
 		/*
@@ -1854,11 +1907,11 @@ CleanupTempFiles(bool isProcExit)
 				if (fdstate & FD_TEMPORARY)
 				{
 					/*
-					 * If we're in the process of exiting a backend process, close
-					 * all temporary files. Otherwise, only close temporary files
-					 * local to the current transaction. They should be closed by
-					 * the ResourceOwner mechanism already, so this is just a
-					 * debugging cross-check.
+					 * If we're in the process of exiting a backend process,
+					 * close all temporary files. Otherwise, only close
+					 * temporary files local to the current transaction.
+					 * They should be closed by the ResourceOwner mechanism
+					 * already, so this is just a debugging cross-check.
 					 */
 					if (isProcExit)
 						FileClose(i);
diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt
index 0315f6b6f0f..d27fe2c3d90 100644
--- a/src/backend/utils/errcodes.txt
+++ b/src/backend/utils/errcodes.txt
@@ -4,7 +4,7 @@
 #
 # Copyright (c) 2003-2011, PostgreSQL Global Development Group
 #
-# This list serves a basis for generating source files containing error
+# This list serves as the basis for generating source files containing error
 # codes. It is kept in a common format to make sure all these source files have
 # the same contents.
 # The files generated from this one are:
@@ -24,14 +24,14 @@
 #      sqlstate    E/W/S    errcode_macro_name    spec_name
 #
 # where sqlstate is a five-character string following the SQLSTATE conventions,
-# the second field determines if the code means an error, a warning or success,
+# the second field indicates if the code means an error, a warning or success,
 # errcode_macro_name is the C macro name starting with ERRCODE that will be put
-# in errcodes.h and spec_name is a lowercase, underscore-separated name that
+# in errcodes.h, and spec_name is a lowercase, underscore-separated name that
 # will be used as the PL/pgSQL condition name and will also be included in the
 # SGML list. The last field is optional, if not present the PL/pgSQL condition
 # and the SGML entry will not be generated.
 #
-# Empty lines and ones starting with a hash are comments.
+# Empty lines and lines starting with a hash are comments.
 #
 # There are also special lines in the format of:
 #
@@ -368,6 +368,7 @@ Section: Class 53 - Insufficient Resources
 53100    E    ERRCODE_DISK_FULL                                              disk_full
 53200    E    ERRCODE_OUT_OF_MEMORY                                          out_of_memory
 53300    E    ERRCODE_TOO_MANY_CONNECTIONS                                   too_many_connections
+53400    E    ERRCODE_CONFIGURATION_LIMIT_EXCEEDED                           configuration_limit_exceeded
 
 Section: Class 54 - Program Limit Exceeded
 
@@ -393,7 +394,7 @@ Section: Class 57 - Operator Intervention
 57P01    E    ERRCODE_ADMIN_SHUTDOWN                                         admin_shutdown
 57P02    E    ERRCODE_CRASH_SHUTDOWN                                         crash_shutdown
 57P03    E    ERRCODE_CANNOT_CONNECT_NOW                                     cannot_connect_now
-57P04    E    ERRCODE_DATABASE_DROPPED										 database_dropped
+57P04    E    ERRCODE_DATABASE_DROPPED                                       database_dropped
 
 Section: Class 58 - System Error (errors external to PostgreSQL itself)
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 72e93107910..3b33549a571 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -425,6 +425,8 @@ int			log_min_duration_statement = -1;
 int			log_temp_files = -1;
 int			trace_recovery_messages = LOG;
 
+int			temp_file_limit = -1;
+
 int			num_temp_buffers = 1024;
 
 char	   *data_directory;
@@ -535,6 +537,8 @@ const char *const config_group_names[] =
 	gettext_noop("Resource Usage"),
 	/* RESOURCES_MEM */
 	gettext_noop("Resource Usage / Memory"),
+	/* RESOURCES_DISK */
+	gettext_noop("Resource Usage / Disk"),
 	/* RESOURCES_KERNEL */
 	gettext_noop("Resource Usage / Kernel Resources"),
 	/* RESOURCES_VACUUM_DELAY */
@@ -1693,6 +1697,17 @@ static struct config_int ConfigureNamesInt[] =
 		check_max_stack_depth, assign_max_stack_depth, NULL
 	},
 
+	{
+		{"temp_file_limit", PGC_SUSET, RESOURCES_DISK,
+			gettext_noop("Limits the total size of all temp files used by each session."),
+			gettext_noop("-1 means no limit."),
+			GUC_UNIT_KB
+		},
+		&temp_file_limit,
+		-1, -1, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"vacuum_cost_page_hit", PGC_USERSET, RESOURCES_VACUUM_DELAY,
 			gettext_noop("Vacuum cost for a page found in the buffer cache."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 940329377a4..fa67e7a3d5c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -119,6 +119,11 @@
 #maintenance_work_mem = 16MB		# min 1MB
 #max_stack_depth = 2MB			# min 100kB
 
+# - Disk -
+
+#temp_file_limit = -1			# limits per-session temp file space
+					# in kB, or -1 for no limit
+
 # - Kernel Resource Usage -
 
 #max_files_per_process = 1000		# min 25
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 011f6b7f001..6b3b5c66949 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -209,6 +209,8 @@ extern int	client_min_messages;
 extern int	log_min_duration_statement;
 extern int	log_temp_files;
 
+extern int	temp_file_limit;
+
 extern int	num_temp_buffers;
 
 extern char *data_directory;
diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h
index 7f8c69fcf4d..d3b25812a02 100644
--- a/src/include/utils/guc_tables.h
+++ b/src/include/utils/guc_tables.h
@@ -59,6 +59,7 @@ enum config_group
 	CONN_AUTH_SECURITY,
 	RESOURCES,
 	RESOURCES_MEM,
+	RESOURCES_DISK,
 	RESOURCES_KERNEL,
 	RESOURCES_VACUUM_DELAY,
 	RESOURCES_BGWRITER,
-- 
GitLab