From e0bd60171a26e6a324989a5b649dd08e8e77ed7e Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 16 Jan 2000 21:37:50 +0000
Subject: [PATCH] Rearrange coding in COPY so that expansible string buffer for
 data being read is reused for successive attributes, instead of being deleted
 and recreated from scratch for each value read in.  This reduces palloc/pfree
 overhead a lot.  COPY IN still seems to be noticeably slower than it was in
 6.5 --- we need to figure out why.  This change takes care of the only major
 performance loss I can see in copy.c itself, so the performance problem is at
 a lower level somewhere.

---
 src/backend/commands/copy.c | 168 ++++++++++++++++++++----------------
 1 file changed, 95 insertions(+), 73 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 7a8dd880079..328b2d644eb 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -6,7 +6,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.95 2000/01/14 22:11:33 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.96 2000/01/16 21:37:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,6 +66,21 @@ static int	CountTuples(Relation relation);
 static int	lineno;
 static bool	fe_eof;
 
+/*
+ * These static variables are used to avoid incurring overhead for each
+ * attribute processed.  attribute_buf is reused on each CopyReadAttribute
+ * call to hold the string being read in.  Under normal use it will soon
+ * grow to a suitable size, and then we will avoid palloc/pfree overhead
+ * for subsequent attributes.  Note that CopyReadAttribute returns a pointer
+ * to attribute_buf's data buffer!
+ * encoding, if needed, can be set once at the start of the copy operation.
+ */
+static StringInfoData	attribute_buf;
+#ifdef MULTIBYTE
+static int				encoding;
+#endif
+
+
 /*
  * Internal communications functions
  */
@@ -276,78 +291,88 @@ DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
 			 "directly to or from a file.  Anyone can COPY to stdout or "
 			 "from stdin.  Psql's \\copy command also works for anyone.");
 
-		if (from)
-		{						/* copy from file to database */
-			if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
-				elog(ERROR, "You can't change sequence relation %s", relname);
-			if (pipe)
+	/*
+	 * Set up variables to avoid per-attribute overhead.
+	 */
+	initStringInfo(&attribute_buf);
+#ifdef MULTIBYTE
+	encoding = pg_get_client_encoding();
+#endif
+
+	if (from)
+	{							/* copy from file to database */
+		if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
+			elog(ERROR, "You can't change sequence relation %s", relname);
+		if (pipe)
+		{
+			if (IsUnderPostmaster)
 			{
-				if (IsUnderPostmaster)
-				{
-					ReceiveCopyBegin();
-					fp = NULL;
-				}
-				else
-					fp = stdin;
+				ReceiveCopyBegin();
+				fp = NULL;
 			}
 			else
-			{
+				fp = stdin;
+		}
+		else
+		{
 #ifndef __CYGWIN32__
-				fp = AllocateFile(filename, "r");
+			fp = AllocateFile(filename, "r");
 #else
-				fp = AllocateFile(filename, "rb");
+			fp = AllocateFile(filename, "rb");
 #endif
-				if (fp == NULL)
-					elog(ERROR, "COPY command, running in backend with "
-						 "effective uid %d, could not open file '%s' for "
-						 "reading.  Errno = %s (%d).",
-						 geteuid(), filename, strerror(errno), errno);
-			}
-			CopyFrom(rel, binary, oids, fp, delim, null_print);
+			if (fp == NULL)
+				elog(ERROR, "COPY command, running in backend with "
+					 "effective uid %d, could not open file '%s' for "
+					 "reading.  Errno = %s (%d).",
+					 geteuid(), filename, strerror(errno), errno);
 		}
-		else
-		{						/* copy from database to file */
-			if (pipe)
+		CopyFrom(rel, binary, oids, fp, delim, null_print);
+	}
+	else
+	{							/* copy from database to file */
+		if (pipe)
+		{
+			if (IsUnderPostmaster)
 			{
-				if (IsUnderPostmaster)
-				{
-					SendCopyBegin();
-					pq_startcopyout();
-					fp = NULL;
-				}
-				else
-					fp = stdout;
+				SendCopyBegin();
+				pq_startcopyout();
+				fp = NULL;
 			}
 			else
-			{
-				mode_t		oumask;		/* Pre-existing umask value */
+				fp = stdout;
+		}
+		else
+		{
+			mode_t		oumask;		/* Pre-existing umask value */
 
             oumask = umask((mode_t) 022);
 #ifndef __CYGWIN32__
-				fp = AllocateFile(filename, "w");
+			fp = AllocateFile(filename, "w");
 #else
-				fp = AllocateFile(filename, "wb");
+			fp = AllocateFile(filename, "wb");
 #endif
-				umask(oumask);
-				if (fp == NULL)
-					elog(ERROR, "COPY command, running in backend with "
-						 "effective uid %d, could not open file '%s' for "
-						 "writing.  Errno = %s (%d).",
-						 geteuid(), filename, strerror(errno), errno);
-			}
-			CopyTo(rel, binary, oids, fp, delim, null_print);
-		}
-		if (!pipe)
-		{
-			FreeFile(fp);
-		}
-		else if (!from)
-		{
-			if (!binary)
-				CopySendData("\\.\n", 3, fp);
-			if (IsUnderPostmaster)
-				pq_endcopyout(false);
+			umask(oumask);
+			if (fp == NULL)
+				elog(ERROR, "COPY command, running in backend with "
+					 "effective uid %d, could not open file '%s' for "
+					 "writing.  Errno = %s (%d).",
+					 geteuid(), filename, strerror(errno), errno);
 		}
+		CopyTo(rel, binary, oids, fp, delim, null_print);
+	}
+
+	if (!pipe)
+	{
+		FreeFile(fp);
+	}
+	else if (!from)
+	{
+		if (!binary)
+			CopySendData("\\.\n", 3, fp);
+		if (IsUnderPostmaster)
+			pq_endcopyout(false);
+	}
+	pfree(attribute_buf.data);
 
 	/*
 	 * Close the relation.  If reading, we can release the AccessShareLock
@@ -717,7 +742,6 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null
 					loaded_oid = oidin(string);
 					if (loaded_oid < BootstrapObjectIdData)
 						elog(ERROR, "COPY TEXT: Invalid Oid. line: %d", lineno);
-					pfree(string);
 				}
 			}
 			for (i = 0; i < attr_count && !done; i++)
@@ -727,8 +751,6 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null
 				{
 					values[i] = PointerGetDatum(NULL);
 					nulls[i] = 'n';
-					if (string)
-						pfree(string);
 				}
 				else if (string == NULL)
 					done = 1;
@@ -745,7 +767,6 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null
 					if (!PointerIsValid(values[i]) &&
 						!(rel->rd_att->attrs[i]->attbyval))
 						elog(ERROR, "copy from line %d: Bad file format", lineno);
-					pfree(string);
 				}
 			}
 			if (!done)
@@ -1115,9 +1136,10 @@ CopyReadNewline(FILE *fp, int *newline)
 /*
  * Read the value of a single attribute.
  *
- * Result is either a palloc'd string, or NULL (if EOF or a null attribute).
- * *isnull is set true if a null attribute, else false.
+ * Result is either a string, or NULL (if EOF or a null attribute).
+ * Note that the caller should not pfree the string!
  *
+ * *isnull is set true if a null attribute, else false.
  * delim is the string of acceptable delimiter characters(s).
  * *newline remembers whether we've seen a newline ending this tuple.
  * null_print says how NULL values are represented
@@ -1126,19 +1148,20 @@ CopyReadNewline(FILE *fp, int *newline)
 static char *
 CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_print)
 {
-	StringInfoData	attribute_buf;
 	char		c;
 #ifdef MULTIBYTE
 	int			mblen;
-	int			encoding;
 	unsigned char s[2];
 	char	   *cvt;
 	int			j;
 
-	encoding = pg_get_client_encoding();
 	s[1] = 0;
 #endif
 
+	/* reset attribute_buf to empty */
+	attribute_buf.len = 0;
+	attribute_buf.data[0] = '\0';
+
 	/* if last delimiter was a newline return a NULL attribute */
 	if (*newline)
 	{
@@ -1148,8 +1171,6 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_
 
 	*isnull = (bool) false;		/* set default */
 
-	initStringInfo(&attribute_buf);
-
 	if (CopyGetEof(fp))
 		goto endOfFile;
 
@@ -1265,17 +1286,20 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_
 									   attribute_buf.len);
 	if (cvt != attribute_buf.data)
 	{
-		pfree(attribute_buf.data);
-		return cvt;
+		/* transfer converted data back to attribute_buf */
+		attribute_buf.len = 0;
+		attribute_buf.data[0] = '\0';
+		appendBinaryStringInfo(&attribute_buf, cvt, strlen(cvt));
+		pfree(cvt);
 	}
 #endif
+
     if (strcmp(attribute_buf.data, null_print)==0)
         *isnull = true;
 
 	return attribute_buf.data;
 
 endOfFile:
-	pfree(attribute_buf.data);
 	return NULL;
 }
 
@@ -1286,13 +1310,11 @@ CopyAttributeOut(FILE *fp, char *server_string, char *delim)
 	char		c;
 #ifdef MULTIBYTE
 	char	   *string_start;
-	int			encoding;
 	int			mblen;
 	int			i;
 #endif
 
 #ifdef MULTIBYTE
-	encoding = pg_get_client_encoding();
 	string = (char *) pg_server_to_client((unsigned char *) server_string,
 										  strlen(server_string));
 	string_start = string;
-- 
GitLab