diff --git a/contrib/start-scripts/linux b/contrib/start-scripts/linux index 6d6ff2aed9f9cbbacff27372d526686d5f6f008d..e1ea1e3da4bbf39deda6759817e95b9c7764a694 100644 --- a/contrib/start-scripts/linux +++ b/contrib/start-scripts/linux @@ -24,7 +24,7 @@ # Original author: Ryan Kirkpatrick <pgsql@rkirkpat.net> -# $PostgreSQL: pgsql/contrib/start-scripts/linux,v 1.9 2009/08/27 16:59:38 tgl Exp $ +# $PostgreSQL: pgsql/contrib/start-scripts/linux,v 1.10 2010/01/11 18:39:32 tgl Exp $ ## EDIT FROM HERE @@ -40,6 +40,14 @@ PGUSER=postgres # Where to keep a log file PGLOG="$PGDATA/serverlog" +# It's often a good idea to protect the postmaster from being killed by the +# OOM killer (which will tend to preferentially kill the postmaster because +# of the way it accounts for shared memory). Setting the OOM_ADJ value to +# -17 will disable OOM kill altogether. If you enable this, you probably want +# to compile PostgreSQL with "-DLINUX_OOM_ADJ=0", so that individual backends +# can still be killed by the OOM killer. +#OOM_ADJ=-17 + ## STOP EDITING HERE # The path that is to be used for the script @@ -62,6 +70,7 @@ test -x $DAEMON || exit 0 case $1 in start) echo -n "Starting PostgreSQL: " + test x"$OOM_ADJ" != x && echo "$OOM_ADJ" > /proc/self/oom_adj su - $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1 echo "ok" ;; @@ -73,6 +82,7 @@ case $1 in restart) echo -n "Restarting PostgreSQL: " su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast -w" + test x"$OOM_ADJ" != x && echo "$OOM_ADJ" > /proc/self/oom_adj su - $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1 echo "ok" ;; diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index a68ba64dac52ce68603c1d9d8b967a190535ac12..6213b9253070768df56652c596ec17034ff81e9c 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.429 2009/12/10 06:32:28 petere Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.430 2010/01/11 18:39:32 tgl Exp $ --> <chapter Id="runtime"> <title>Server Setup and Operation</title> @@ -1244,7 +1244,7 @@ default:\ this (consult your system documentation and configuration on where to look for such a message): <programlisting> -Out of Memory: Killed process 12345 (postgres). +Out of Memory: Killed process 12345 (postgres). </programlisting> This indicates that the <filename>postgres</filename> process has been terminated due to memory pressure. @@ -1258,13 +1258,13 @@ Out of Memory: Killed process 12345 (postgres). <productname>PostgreSQL</productname> on a machine where you can be sure that other processes will not run the machine out of memory. If memory is tight, increasing the swap space of the - operating system can help avoiding the problem, because the - out-of-memory (OOM) killer is invoked whenever physical memory and + operating system can help avoid the problem, because the + out-of-memory (OOM) killer is invoked only when physical memory and swap space are exhausted. </para> <para> - On Linux 2.6 and later, an additional measure is to modify the + On Linux 2.6 and later, it is possible to modify the kernel's behavior so that it will not <quote>overcommit</> memory. Although this setting will not prevent the <ulink url="http://lwn.net/Articles/104179/">OOM killer</> from being invoked @@ -1275,11 +1275,31 @@ Out of Memory: Killed process 12345 (postgres). sysctl -w vm.overcommit_memory=2 </programlisting> or placing an equivalent entry in <filename>/etc/sysctl.conf</>. - You might also wish to modify the related setting - <literal>vm.overcommit_ratio</>. For details see the kernel documentation + You might also wish to modify the related setting + <varname>vm.overcommit_ratio</>. For details see the kernel documentation file <filename>Documentation/vm/overcommit-accounting</>. </para> + <para> + Another approach, which can be used with or without altering + <varname>vm.overcommit_memory</>, is to set the process-specific + <varname>oom_adj</> value for the postmaster process to <literal>-17</>, + thereby guaranteeing it will not be targeted by the OOM killer. The + simplest way to do this is to execute +<programlisting> +echo -17 > /proc/self/oom_adj +</programlisting> + in the postmaster's startup script just before invoking the postmaster. + Note that this action must be done as root, or it will have no effect; + so a root-owned startup script is the easiest place to do it. If you + do this, you may also wish to build <productname>PostgreSQL</> + with <literal>-DLINUX_OOM_ADJ=0</> added to <varname>CFLAGS</>. + That will cause postmaster child processes to run with the normal + <varname>oom_adj</> value of zero, so that the OOM killer can still + target them at need. + </para> + + <note> <para> Some vendors' Linux 2.4 kernels are reported to have early versions of the 2.6 overcommit <command>sysctl</command> parameter. However, setting @@ -1294,6 +1314,7 @@ sysctl -w vm.overcommit_memory=2 feature is there. If in any doubt, consult a kernel expert or your kernel vendor. </para> + </note> </sect2> </sect1> diff --git a/src/backend/postmaster/fork_process.c b/src/backend/postmaster/fork_process.c index fea72d7e54cd6de8c822a10acf527134b8c55a63..91ef9de021443e5e9db494cc01f5e4498e51888b 100644 --- a/src/backend/postmaster/fork_process.c +++ b/src/backend/postmaster/fork_process.c @@ -7,12 +7,14 @@ * Copyright (c) 1996-2010, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/fork_process.c,v 1.10 2010/01/02 16:57:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/fork_process.c,v 1.11 2010/01/11 18:39:32 tgl Exp $ */ #include "postgres.h" #include "postmaster/fork_process.h" +#include <fcntl.h> #include <time.h> +#include <sys/stat.h> #include <sys/time.h> #include <unistd.h> @@ -60,6 +62,38 @@ fork_process(void) setitimer(ITIMER_PROF, &prof_itimer, NULL); #endif + /* + * By default, Linux tends to kill the postmaster in out-of-memory + * situations, because it blames the postmaster for the sum of child + * process sizes *including shared memory*. (This is unbelievably + * stupid, but the kernel hackers seem uninterested in improving it.) + * Therefore it's often a good idea to protect the postmaster by + * setting its oom_adj value negative (which has to be done in a + * root-owned startup script). If you just do that much, all child + * processes will also be protected against OOM kill, which might not + * be desirable. You can then choose to build with LINUX_OOM_ADJ + * #defined to 0, or some other value that you want child processes + * to adopt here. + */ +#ifdef LINUX_OOM_ADJ + { + /* + * Use open() not stdio, to ensure we control the open flags. + * Some Linux security environments reject anything but O_WRONLY. + */ + int fd = open("/proc/self/oom_adj", O_WRONLY, 0); + + /* We ignore all errors */ + if (fd >= 0) + { + char buf[16]; + + snprintf(buf, sizeof(buf), "%d\n", LINUX_OOM_ADJ); + (void) write(fd, buf, strlen(buf)); + close(fd); + } + } +#endif /* LINUX_OOM_ADJ */ } return result;