From 1d57374114dadd83bb8428dfa841312568ef3cc4 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 6 Mar 2003 03:16:55 +0000
Subject: [PATCH] Use poll(2) in preference to select(2), if available.  This
 solves problems in applications that may have a large number of files open,
 such that libpq's socket number exceeds the range supported by fd_set. From
 Chris Brown.

---
 configure                      |   7 +-
 configure.in                   |   6 +-
 src/include/pg_config.h.in     |  17 ++-
 src/interfaces/libpq/fe-misc.c | 262 ++++++++++++++++++---------------
 4 files changed, 167 insertions(+), 125 deletions(-)

diff --git a/configure b/configure
index edc6b5ac87e..4c1b6b1782a 100755
--- a/configure
+++ b/configure
@@ -6888,7 +6888,9 @@ done
 
 
 
-for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h pwd.h sys/ipc.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/un.h termios.h utime.h kernel/OS.h kernel/image.h SupportDefs.h
+
+
+for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/un.h termios.h utime.h kernel/OS.h kernel/image.h SupportDefs.h
 do
 as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
 if eval "test \"\${$as_ac_Header+set}\" = set"; then
@@ -10175,7 +10177,8 @@ test $ac_cv_func_memcmp_working = no && LIBOBJS="$LIBOBJS memcmp.$ac_objext"
 
 
 
-for ac_func in cbrt fcvt getpeereid memmove pstat setproctitle setsid sigprocmask sysconf waitpid dlopen fdatasync utime utimes
+
+for ac_func in cbrt dlopen fcvt fdatasync getpeereid memmove poll pstat setproctitle setsid sigprocmask sysconf utime utimes waitpid
 do
 as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
 echo "$as_me:$LINENO: checking for $ac_func" >&5
diff --git a/configure.in b/configure.in
index 77a63434aa4..a79e0107e3b 100644
--- a/configure.in
+++ b/configure.in
@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-dnl $Header: /cvsroot/pgsql/configure.in,v 1.237 2003/02/19 04:04:04 momjian Exp $
+dnl $Header: /cvsroot/pgsql/configure.in,v 1.238 2003/03/06 03:16:55 tgl Exp $
 dnl
 dnl Developers, please strive to achieve this order:
 dnl
@@ -675,7 +675,7 @@ fi
 ##
 
 dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES
-AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h pwd.h sys/ipc.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/un.h termios.h utime.h kernel/OS.h kernel/image.h SupportDefs.h])
+AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/un.h termios.h utime.h kernel/OS.h kernel/image.h SupportDefs.h])
 
 # At least on IRIX, cpp test for netinet/tcp.h will fail unless
 # netinet/in.h is included first.
@@ -786,7 +786,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG
 # SunOS doesn't handle negative byte comparisons properly with +/- return
 AC_FUNC_MEMCMP
 
-AC_CHECK_FUNCS([cbrt fcvt getpeereid memmove pstat setproctitle setsid sigprocmask sysconf waitpid dlopen fdatasync utime utimes])
+AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getpeereid memmove poll pstat setproctitle setsid sigprocmask sysconf utime utimes waitpid])
 
 AC_CHECK_DECLS(fdatasync, [], [], [#include <unistd.h>])
 
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 50960bdfd12..3c8e0b5d507 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -8,7 +8,7 @@
  * or in pg_config.h afterwards.  Of course, if you edit pg_config.h, then your
  * changes will be overwritten the next time you run configure.
  *
- * $Id: pg_config.h.in,v 1.40 2003/02/19 04:04:04 momjian Exp $
+ * $Id: pg_config.h.in,v 1.41 2003/03/06 03:16:55 tgl Exp $
  */
 
 #ifndef PG_CONFIG_H
@@ -341,6 +341,12 @@
 /* Set to 1 if you have <ieeefp.h> */
 #undef HAVE_IEEEFP_H
 
+/* Set to 1 if you have <poll.h> */
+#undef HAVE_POLL_H
+
+/* Set to 1 if you have <sys/poll.h> */
+#undef HAVE_SYS_POLL_H
+
 /* Set to 1 if you have <netinet/tcp.h> */
 #undef HAVE_NETINET_TCP_H
 
@@ -649,6 +655,12 @@
 /* Define if you have on_exit() */
 #undef HAVE_ON_EXIT
 
+/* Define if you have syslog() */
+#undef HAVE_SYSLOG
+
+/* Define if you have poll() */
+#undef HAVE_POLL
+
 /* Define if the corresponding types are defined in standard headers */
 #undef HAVE_INT8
 #undef HAVE_UINT8
@@ -656,9 +668,6 @@
 #undef HAVE_UINT64
 #undef HAVE_SIG_ATOMIC_T
 
-/* Define if you have syslog() */
-#undef HAVE_SYSLOG
-
 /* Define exactly one of these symbols to select semaphore implementation */
 #undef USE_NAMED_POSIX_SEMAPHORES
 #undef USE_UNNAMED_POSIX_SEMAPHORES
diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index 2ae1c2630b6..ad99e5de15a 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -25,7 +25,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-misc.c,v 1.86 2003/01/07 22:23:17 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-misc.c,v 1.87 2003/03/06 03:16:55 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,6 +43,12 @@
 #include <sys/time.h>
 #endif
 
+#ifdef HAVE_POLL_H
+#include <poll.h>
+#endif
+#ifdef HAVE_SYS_POLL_H
+#include <sys/poll.h>
+#endif
 #ifdef HAVE_SYS_SELECT_H
 #include <sys/select.h>
 #endif
@@ -55,6 +61,9 @@
 #define DONOTICE(conn,message) \
 	((*(conn)->noticeHook) ((conn)->noticeArg, (message)))
 
+static int	pqSocketCheck(PGconn *conn, int forRead, int forWrite,
+						  time_t end_time);
+static int	pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time);
 static int	pqPutBytes(const char *s, size_t nbytes, PGconn *conn);
 
 
@@ -358,40 +367,7 @@ pqPutInt(int value, size_t bytes, PGconn *conn)
 int
 pqReadReady(PGconn *conn)
 {
-	fd_set		input_mask;
-	struct timeval timeout;
-
-	if (!conn || conn->sock < 0)
-		return -1;
-
-/* JAB: Check for SSL library buffering read bytes */
-#ifdef USE_SSL
-	if (conn->ssl && SSL_pending(conn->ssl) > 0)
-	{
-		/* short-circuit the select */
-		return 1;
-	}
-#endif
-
-retry1:
-	FD_ZERO(&input_mask);
-	FD_SET(conn->sock, &input_mask);
-	timeout.tv_sec = 0;
-	timeout.tv_usec = 0;
-	if (select(conn->sock + 1, &input_mask, (fd_set *) NULL, (fd_set *) NULL,
-			   &timeout) < 0)
-	{
-		if (SOCK_ERRNO == EINTR)
-			/* Interrupted system call - we'll just try again */
-			goto retry1;
-
-		printfPQExpBuffer(&conn->errorMessage,
-						  libpq_gettext("select() failed: %s\n"),
-						  SOCK_STRERROR(SOCK_ERRNO));
-		return -1;
-	}
-
-	return FD_ISSET(conn->sock, &input_mask) ? 1 : 0;
+	return pqSocketCheck(conn, 1, 0, (time_t) 0);
 }
 
 /*
@@ -401,30 +377,7 @@ retry1:
 int
 pqWriteReady(PGconn *conn)
 {
-	fd_set		input_mask;
-	struct timeval timeout;
-
-	if (!conn || conn->sock < 0)
-		return -1;
-
-retry2:
-	FD_ZERO(&input_mask);
-	FD_SET(conn->sock, &input_mask);
-	timeout.tv_sec = 0;
-	timeout.tv_usec = 0;
-	if (select(conn->sock + 1, (fd_set *) NULL, &input_mask, (fd_set *) NULL,
-			   &timeout) < 0)
-	{
-		if (SOCK_ERRNO == EINTR)
-			/* Interrupted system call - we'll just try again */
-			goto retry2;
-
-		printfPQExpBuffer(&conn->errorMessage,
-						  libpq_gettext("select() failed: %s\n"),
-						  SOCK_STRERROR(SOCK_ERRNO));
-		return -1;
-	}
-	return FD_ISSET(conn->sock, &input_mask) ? 1 : 0;
+	return pqSocketCheck(conn, 0, 1, (time_t) 0);
 }
 
 /* ----------
@@ -785,90 +738,166 @@ pqWait(int forRead, int forWrite, PGconn *conn)
 /*
  * pqWaitTimed: wait, but not past finish_time.
  *
- * If finish_time is exceeded then we return failure (EOF).  This is different
- * from the response for a kernel exception (return 0) because we don't want
- * the caller to try to read/write in that case.
+ * If finish_time is exceeded then we return failure (EOF).  This is like
+ * the response for a kernel exception because we don't want the caller
+ * to try to read/write in that case.
  *
  * finish_time = ((time_t) -1) disables the wait limit.
  */
 int
 pqWaitTimed(int forRead, int forWrite, PGconn *conn, time_t finish_time)
 {
-	fd_set		input_mask;
-	fd_set		output_mask;
-	fd_set		except_mask;
-	struct timeval tmp_timeout;
-	struct timeval *ptmp_timeout = NULL;
-	int			selresult;
+	int result;
 
-	if (conn->sock < 0)
+	result = pqSocketCheck(conn, forRead, forWrite, finish_time);
+
+	if (result < 0)
+		return EOF;				/* errorMessage is already set */
+
+	if (result == 0)
 	{
 		printfPQExpBuffer(&conn->errorMessage,
-						  libpq_gettext("connection not open\n"));
+						  libpq_gettext("timeout expired\n"));
 		return EOF;
 	}
 
+	return 0;
+}
+
+/*
+ * Checks a socket, using poll or select, for data to be read, written,
+ * or both.  Returns >0 if one or more conditions are met, 0 if it timed
+ * out, -1 if an error occurred.
+ * If SSL is in use, the SSL buffer is checked prior to checking the socket
+ * for read data directly.
+ */
+static int
+pqSocketCheck(PGconn *conn, int forRead, int forWrite, time_t end_time)
+{
+	int result;
+
+	if (!conn)
+		return -1;
+	if (conn->sock < 0)
+	{
+		printfPQExpBuffer(&conn->errorMessage,
+		                  libpq_gettext("socket not open\n"));
+		return -1;
+	}
+
 /* JAB: Check for SSL library buffering read bytes */
 #ifdef USE_SSL
 	if (forRead && conn->ssl && SSL_pending(conn->ssl) > 0)
 	{
 		/* short-circuit the select */
-		return 0;
+		return 1;
 	}
 #endif
 
-	if (forRead || forWrite)
+	/* We will retry as long as we get EINTR */
+	do
 	{
-retry5:
-		FD_ZERO(&input_mask);
-		FD_ZERO(&output_mask);
-		FD_ZERO(&except_mask);
-		if (forRead)
-			FD_SET(conn->sock, &input_mask);
-		if (forWrite)
-			FD_SET(conn->sock, &output_mask);
-		FD_SET(conn->sock, &except_mask);
-
-		if (finish_time != ((time_t) -1))
-		{
-			/*
-			 * Set up delay.  Assume caller incremented finish_time
-			 * so that we can error out as soon as time() passes it.
-			 * Note we will recalculate delay each time through the loop.
-			 */
-			time_t	now = time(NULL);
-
-			if (finish_time > now)
-				tmp_timeout.tv_sec = finish_time - now;
-			else
-				tmp_timeout.tv_sec = 0;
-			tmp_timeout.tv_usec = 0;
-			ptmp_timeout = &tmp_timeout;
-		}
+		result = pqSocketPoll(conn->sock, forRead, forWrite, end_time);
+	}
+	while (result < 0 && SOCK_ERRNO == EINTR);
 
-		selresult = select(conn->sock + 1, &input_mask, &output_mask,
-						   &except_mask, ptmp_timeout);
-		if (selresult < 0)
-		{
-			if (SOCK_ERRNO == EINTR)
-				goto retry5;
-			printfPQExpBuffer(&conn->errorMessage,
-							  libpq_gettext("select() failed: %s\n"),
-							  SOCK_STRERROR(SOCK_ERRNO));
-			return EOF;
-		}
-		if (selresult == 0)
-		{
-			printfPQExpBuffer(&conn->errorMessage,
-							  libpq_gettext("timeout expired\n"));
-			return EOF;
-		}
+	if (result < 0)
+	{
+		printfPQExpBuffer(&conn->errorMessage,
+		                  libpq_gettext("select() failed: %s\n"),
+		                  SOCK_STRERROR(SOCK_ERRNO));
 	}
 
-	return 0;
+	return result;
 }
 
 
+/*
+ * Check a file descriptor for read and/or write data, possibly waiting.
+ * If neither forRead nor forWrite are set, immediately return a timeout
+ * condition (without waiting).  Return >0 if condition is met, 0
+ * if a timeout occurred, -1 if an error or interrupt occurred.
+ * Timeout is infinite if end_time is -1.  Timeout is immediate (no blocking)
+ * if end_time is 0 (or indeed, any time before now).
+ */
+static int
+pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time)
+{
+	/* We use poll(2) if available, otherwise select(2) */
+#ifdef HAVE_POLL
+	struct pollfd input_fd;
+	int           timeout_ms;
+
+	input_fd.fd      = sock;
+	input_fd.events  = 0;
+	input_fd.revents = 0;
+
+	if (forRead)
+		input_fd.events |= POLLIN;
+	if (forWrite)
+		input_fd.events |= POLLOUT;
+	if (!input_fd.events)
+		return 0;
+
+	/* Compute appropriate timeout interval */
+	if (end_time == ((time_t) -1))
+	{
+		timeout_ms = -1;
+	}
+	else
+	{
+		time_t now = time(NULL);
+
+		if (end_time > now)
+			timeout_ms = (end_time - now) * 1000;
+		else
+			timeout_ms = 0;
+	}
+
+	return poll(&input_fd, 1, timeout_ms);
+
+#else /* !HAVE_POLL */
+
+	fd_set          input_mask;
+	fd_set          output_mask;
+	fd_set          except_mask;
+	struct timeval  timeout;
+	struct timeval *ptr_timeout;
+
+	if (!forRead && !forWrite)
+		return 0;
+
+	FD_ZERO(&input_mask);
+	FD_ZERO(&output_mask);
+	FD_ZERO(&except_mask);
+	if (forRead)
+		FD_SET(sock, &input_mask);
+	if (forWrite)
+		FD_SET(sock, &output_mask);
+	FD_SET(sock, &except_mask);
+
+	/* Compute appropriate timeout interval */
+	if (end_time == ((time_t) -1))
+	{
+		ptr_timeout = NULL;
+	}
+	else
+	{
+		time_t	now = time(NULL);
+
+		if (end_time > now)
+			timeout.tv_sec = end_time - now;
+		else
+			timeout.tv_sec = 0;
+		timeout.tv_usec = 0;
+		ptr_timeout = &timeout;
+	}
+
+	return select(sock + 1, &input_mask, &output_mask,
+				  &except_mask, ptr_timeout);
+#endif /* HAVE_POLL */
+}
+
 
 /*
  * A couple of "miscellaneous" multibyte related functions. They used
@@ -902,6 +931,7 @@ PQenv2encoding(void)
 
 
 #ifdef ENABLE_NLS
+
 char *
 libpq_gettext(const char *msgid)
 {
-- 
GitLab