From c8196c87a76b86c514e1f245624a21ab068c9012 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 1 Oct 2004 18:30:25 +0000
Subject: [PATCH] Adjust postmaster to recognize that a lockfile containing its
 parent's PID must be stale.  Tweak example startup scripts to not use pg_ctl
 but launch the postmaster directly, thereby ensuring that only the
 postmaster's direct parent shell will be a postgres-owned process.  In
 combination these should fix the longstanding problem of the postmaster
 sometimes refusing to start during reboot because it thinks the old lockfile
 is not stale.

---
 contrib/start-scripts/PostgreSQL.darwin | 26 ++++++++++++--------
 contrib/start-scripts/freebsd           | 23 +++++++++++-------
 contrib/start-scripts/linux             | 28 +++++++++++++---------
 src/backend/utils/init/miscinit.c       | 32 ++++++++++++++++++-------
 4 files changed, 72 insertions(+), 37 deletions(-)

diff --git a/contrib/start-scripts/PostgreSQL.darwin b/contrib/start-scripts/PostgreSQL.darwin
index e953a3091b9..3e4b86a7f3b 100755
--- a/contrib/start-scripts/PostgreSQL.darwin
+++ b/contrib/start-scripts/PostgreSQL.darwin
@@ -48,7 +48,7 @@ prefix="/usr/local/pgsql"
 # Data directory
 PGDATA="/usr/local/pgsql/data"
 
-# Who to run pg_ctl as, should be "postgres".
+# Who to run the postmaster as, usually "postgres".  (NOT "root")
 PGUSER="postgres"
 
 # the logfile path and name (NEEDS to be writeable by PGUSER)
@@ -68,8 +68,13 @@ ROTATESEC="604800"
 # The path that is to be used for the script
 PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
 
-# What to use to start up the postmaster
-DAEMON="$prefix/bin/pg_ctl"
+# What to use to start up the postmaster (we do NOT use pg_ctl for this,
+# as it adds no value and can cause the postmaster to misrecognize a stale
+# lock file)
+DAEMON="$prefix/bin/postmaster"
+
+# What to use to shut down the postmaster
+PGCTL="$prefix/bin/pg_ctl"
 
 # The apache log rotation utility
 LOGUTIL="/usr/sbin/rotatelogs"
@@ -80,27 +85,28 @@ StartService () {
     if [ "${POSTGRESQLSERVER:=-NO-}" = "-YES-" ]; then
         ConsoleMessage "Starting PostgreSQL database server"
         if [ "${ROTATELOGS}" = "1" ]; then
-            sudo -u $PGUSER sh -c "${DAEMON} start -D ${PGDATA} -s | ${LOGUTIL} ${PGLOG} ${ROTATESEC} &"
+            sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' | ${LOGUTIL} '${PGLOG}' ${ROTATESEC} &"
         else
-            sudo -u $PGUSER $DAEMON start -D "$PGDATA" -s -l $PGLOG
+            sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' &" >>$PGLOG 2>&1
         fi
     fi
 }
 
 StopService () {
     ConsoleMessage "Stopping PostgreSQL database server"
-    sudo -u $PGUSER $DAEMON stop -D "$PGDATA" -s -m fast
+    sudo -u $PGUSER $PGCTL stop -D "$PGDATA" -s -m fast
 }
 
 RestartService () {
     if [ "${POSTGRESQLSERVER:=-NO-}" = "-YES-" ]; then
         ConsoleMessage "Restarting PostgreSQL database server"
+	# should match StopService:
+	sudo -u $PGUSER $PGCTL stop -D "$PGDATA" -s -m fast
+	# should match StartService:
         if [ "${ROTATELOGS}" = "1" ]; then
-#            StopService
-#            StartService
-            sudo -u $PGUSER sh -c "${DAEMON} restart -D ${PGDATA} -s -m fast | ${LOGUTIL} ${PGLOG} ${ROTATESEC} &"
+            sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' | ${LOGUTIL} '${PGLOG}' ${ROTATESEC} &"
         else
-            sudo -u $PGUSER $DAEMON restart -D "$PGDATA" -s -m fast
+            sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' &" >>$PGLOG 2>&1
         fi
     else
         StopService
diff --git a/contrib/start-scripts/freebsd b/contrib/start-scripts/freebsd
index cee56956ce7..528fc776c78 100644
--- a/contrib/start-scripts/freebsd
+++ b/contrib/start-scripts/freebsd
@@ -6,7 +6,7 @@
 # Created through merger of the Linux start script by Ryan Kirkpatrick
 # and the script in the FreeBSD ports collection.
 
-# $PostgreSQL: pgsql/contrib/start-scripts/freebsd,v 1.3 2003/11/29 19:51:35 pgsql Exp $
+# $PostgreSQL: pgsql/contrib/start-scripts/freebsd,v 1.4 2004/10/01 18:30:21 tgl Exp $
 
 ## EDIT FROM HERE
 
@@ -16,7 +16,7 @@ prefix=/usr/local/pgsql
 # Data directory
 PGDATA="/usr/local/pgsql/data"
 
-# Who to run pg_ctl as, should be "postgres".
+# Who to run the postmaster as, usually "postgres".  (NOT "root")
 PGUSER=postgres
 
 # Where to keep a log file
@@ -27,24 +27,31 @@ PGLOG="$PGDATA/serverlog"
 # The path that is to be used for the script
 PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
 
-# What to use to start up the postmaster
-DAEMON="$prefix/bin/pg_ctl"
+# What to use to start up the postmaster (we do NOT use pg_ctl for this,
+# as it adds no value and can cause the postmaster to misrecognize a stale
+# lock file)
+DAEMON="$prefix/bin/postmaster"
 
+# What to use to shut down the postmaster
+PGCTL="$prefix/bin/pg_ctl"
+
+# Only start if we can find the postmaster.
 test -x "$DAEMON" || exit 0
 
 case $1 in
     start)
-        su -l $PGUSER -c "$DAEMON start -D '$PGDATA' -s -l $PGLOG"
+	su -l $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1
 	echo -n ' postgresql'
 	;;
     stop)
-	su -l $PGUSER -c "$DAEMON stop -D '$PGDATA' -s -m fast"
+	su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast"
 	;;
     restart)
-	su -l $PGUSER -c "$DAEMON restart -D '$PGDATA' -s -m fast"
+	su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast -w"
+	su -l $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1
 	;;
     status)
-	su -l $PGUSER -c "$DAEMON status -D '$PGDATA'"
+	su -l $PGUSER -c "$PGCTL status -D '$PGDATA'"
 	;;
     *)
 	# Print help
diff --git a/contrib/start-scripts/linux b/contrib/start-scripts/linux
index 19042d0e994..e0174de571d 100644
--- a/contrib/start-scripts/linux
+++ b/contrib/start-scripts/linux
@@ -24,7 +24,7 @@
 
 # Original author:  Ryan Kirkpatrick <pgsql@rkirkpat.net>
 
-# $PostgreSQL: pgsql/contrib/start-scripts/linux,v 1.6 2003/11/29 19:51:36 pgsql Exp $
+# $PostgreSQL: pgsql/contrib/start-scripts/linux,v 1.7 2004/10/01 18:30:21 tgl Exp $
 
 ## EDIT FROM HERE
 
@@ -34,7 +34,7 @@ prefix=/usr/local/pgsql
 # Data directory
 PGDATA="/usr/local/pgsql/data"
 
-# Who to run pg_ctl as, should be "postgres".
+# Who to run the postmaster as, usually "postgres".  (NOT "root")
 PGUSER=postgres
 
 # Where to keep a log file
@@ -54,38 +54,44 @@ fi
 # The path that is to be used for the script
 PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
 
-# What to use to start up the postmaster
-DAEMON="$prefix/bin/pg_ctl"
+# What to use to start up the postmaster (we do NOT use pg_ctl for this,
+# as it adds no value and can cause the postmaster to misrecognize a stale
+# lock file)
+DAEMON="$prefix/bin/postmaster"
+
+# What to use to shut down the postmaster
+PGCTL="$prefix/bin/pg_ctl"
 
 set -e
 
-# Only start if we can find pg_ctl.
-test -f $DAEMON || exit 0
+# Only start if we can find the postmaster.
+test -x $DAEMON || exit 0
 
 # Parse command line parameters.
 case $1 in
   start)
 	$ECHO_N "Starting PostgreSQL: "$ECHO_C
-	su - $PGUSER -c "$DAEMON start -D '$PGDATA' -s -l $PGLOG" 
+	su - $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1
 	echo "ok"
 	;;
   stop)
 	echo -n "Stopping PostgreSQL: "
-	su - $PGUSER -c "$DAEMON stop -D '$PGDATA' -s -m fast"
+	su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast"
 	echo "ok"
 	;;
   restart)
 	echo -n "Restarting PostgreSQL: "
-	su - $PGUSER -c "$DAEMON restart -D '$PGDATA' -s -m fast -l $PGLOG"
+	su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast -w"
+	su - $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1
 	echo "ok"
 	;;
   reload)
         echo -n "Reload PostgreSQL: "
-        su - $PGUSER -c "$DAEMON reload -D '$PGDATA' -s"
+        su - $PGUSER -c "$PGCTL reload -D '$PGDATA' -s"
         echo "ok"
         ;;
   status)
-	su - $PGUSER -c "$DAEMON status -D '$PGDATA'"
+	su - $PGUSER -c "$PGCTL status -D '$PGDATA'"
 	;;
   *)
 	# Print help
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index 9bb72c44328..5136e39f44b 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.132 2004/08/29 05:06:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.133 2004/10/01 18:30:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -526,10 +526,22 @@ CreateLockFile(const char *filename, bool amPostmaster,
 		/*
 		 * Check to see if the other process still exists
 		 *
+		 * If the PID in the lockfile is our own PID or our parent's PID,
+		 * then the file must be stale (probably left over from a previous
+		 * system boot cycle).  We need this test because of the likelihood
+		 * that a reboot will assign exactly the same PID as we had in the
+		 * previous reboot.  Also, if there is just one more process launch
+		 * in this reboot than in the previous one, the lockfile might mention
+		 * our parent's PID.  We can reject that since we'd never be launched
+		 * directly by a competing postmaster.  We can't detect grandparent
+		 * processes unfortunately, but if the init script is written carefully
+		 * then all but the immediate parent shell will be root-owned processes
+		 * and so the kill test will fail with EPERM.
+		 *
 		 * Normally kill() will fail with ESRCH if the given PID doesn't
 		 * exist.  BeOS returns EINVAL for some silly reason, however.
 		 */
-		if (other_pid != my_pid)
+		if (other_pid != my_pid && other_pid != getppid())
 		{
 			if (kill(other_pid, 0) == 0 ||
 				(errno != ESRCH
@@ -544,12 +556,16 @@ CreateLockFile(const char *filename, bool amPostmaster,
 						 errmsg("lock file \"%s\" already exists",
 								filename),
 						 isDDLock ?
-						 errhint("Is another %s (PID %d) running in data directory \"%s\"?",
-						   (encoded_pid < 0 ? "postgres" : "postmaster"),
-								 (int) other_pid, refName) :
-						 errhint("Is another %s (PID %d) using socket file \"%s\"?",
-						   (encoded_pid < 0 ? "postgres" : "postmaster"),
-								 (int) other_pid, refName)));
+						 (encoded_pid < 0 ?
+						  errhint("Is another postgres (PID %d) running in data directory \"%s\"?",
+								  (int) other_pid, refName) :
+						  errhint("Is another postmaster (PID %d) running in data directory \"%s\"?",
+								  (int) other_pid, refName)) :
+						 (encoded_pid < 0 ?
+						  errhint("Is another postgres (PID %d) using socket file \"%s\"?",
+								  (int) other_pid, refName) :
+						  errhint("Is another postmaster (PID %d) using socket file \"%s\"?",
+								  (int) other_pid, refName))));
 			}
 		}
 
-- 
GitLab