From 558730ac6bbc668a75c7a7619beae5a1b34d965f Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 2 Aug 2005 19:02:32 +0000
Subject: [PATCH] Clean up CREATE DATABASE processing to make it more robust
 and get rid of special case for Windows port.  Put a PG_TRY around most of
 createdb() to ensure that we remove copied subdirectories on failure, even if
 the failure happens while creating the pg_database row.  (I think this
 explains Oliver Siegmar's recent report.)  Having done that, there's no need
 for the fragile assumption that copydir() mustn't ereport(ERROR), so simplify
 its API.  Eliminate the old code that used system("cp ...") to copy
 subdirectories, in favor of using copydir() on all platforms.  This not only
 should allow much better error reporting, but allows us to fsync the created
 files before trusting that the copy has succeeded.

---
 configure                         |   8 -
 configure.in                      |   3 +-
 src/Makefile.global.in            |   8 +-
 src/backend/commands/dbcommands.c | 379 +++++++++++++-----------------
 src/include/port.h                |   6 +-
 src/port/copydir.c                | 146 ++++++++----
 6 files changed, 273 insertions(+), 277 deletions(-)

diff --git a/configure b/configure
index 84ce2bcc834..1f2f679c642 100755
--- a/configure
+++ b/configure
@@ -14915,14 +14915,6 @@ fi
 
 # Win32 support
 if test "$PORTNAME" = "win32"; then
-case $LIBOBJS in
-    "copydir.$ac_objext"   | \
-  *" copydir.$ac_objext"   | \
-    "copydir.$ac_objext "* | \
-  *" copydir.$ac_objext "* ) ;;
-  *) LIBOBJS="$LIBOBJS copydir.$ac_objext" ;;
-esac
-
 case $LIBOBJS in
     "gettimeofday.$ac_objext"   | \
   *" gettimeofday.$ac_objext"   | \
diff --git a/configure.in b/configure.in
index 3baed6c79f0..8eb5c91e2c7 100644
--- a/configure.in
+++ b/configure.in
@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-dnl $PostgreSQL: pgsql/configure.in,v 1.417 2005/07/06 21:04:13 momjian Exp $
+dnl $PostgreSQL: pgsql/configure.in,v 1.418 2005/08/02 19:02:30 tgl Exp $
 dnl
 dnl Developers, please strive to achieve this order:
 dnl
@@ -913,7 +913,6 @@ fi
 
 # Win32 support
 if test "$PORTNAME" = "win32"; then
-AC_LIBOBJ(copydir)
 AC_LIBOBJ(gettimeofday)
 AC_LIBOBJ(kill)
 AC_LIBOBJ(open)
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index e6c594d55eb..e59ef2bfb98 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -1,5 +1,5 @@
 # -*-makefile-*-
-# $PostgreSQL: pgsql/src/Makefile.global.in,v 1.218 2005/07/28 03:15:52 tgl Exp $
+# $PostgreSQL: pgsql/src/Makefile.global.in,v 1.219 2005/08/02 19:02:31 tgl Exp $
 
 #------------------------------------------------------------------------------
 # All PostgreSQL makefiles include this file and use the variables it sets,
@@ -388,11 +388,10 @@ endif
 
 ##########################################################################
 #
-# substitute implementations of C library routines
+# substitute implementations of C library routines (see src/port/)
 
-LIBOBJS = @LIBOBJS@ dirmod.o exec.o noblock.o path.o pipe.o pgsleep.o pgstrcasecmp.o sprompt.o thread.o
+LIBOBJS = @LIBOBJS@ copydir.o dirmod.o exec.o noblock.o path.o pipe.o pgsleep.o pgstrcasecmp.o sprompt.o thread.o
 
-ifneq (,$(LIBOBJS))
 LIBS := -lpgport $(LIBS)
 # add location of libpgport.a to LDFLAGS
 ifdef PGXS
@@ -400,7 +399,6 @@ override LDFLAGS := -L$(libdir) $(LDFLAGS)
 else
 override LDFLAGS := -L$(top_builddir)/src/port $(LDFLAGS)
 endif
-endif
 
 # to make ws2_32.lib the last library, and always link with shfolder,
 # so SHGetFolderName isn't picked up from shell32.dll
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 295ae955d1f..01b53e06933 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.168 2005/07/31 17:19:17 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.169 2005/08/02 19:02:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -79,14 +79,14 @@ createdb(const CreatedbStmt *stmt)
 	TransactionId src_vacuumxid;
 	TransactionId src_frozenxid;
 	Oid			src_deftablespace;
-	Oid			dst_deftablespace;
-	Relation	pg_database_rel;
+	volatile Oid dst_deftablespace;
+	volatile Relation pg_database_rel = NULL;
 	HeapTuple	tuple;
 	TupleDesc	pg_database_dsc;
 	Datum		new_record[Natts_pg_database];
 	char		new_record_nulls[Natts_pg_database];
 	Oid			dboid;
-	Oid			datdba;
+	volatile Oid datdba;
 	ListCell   *option;
 	DefElem    *dtablespacename = NULL;
 	DefElem    *downer = NULL;
@@ -96,12 +96,8 @@ createdb(const CreatedbStmt *stmt)
 	char	   *dbname = stmt->dbname;
 	char	   *dbowner = NULL;
 	const char *dbtemplate = NULL;
-	int			encoding = -1;
-	int			dbconnlimit = -1;
-
-#ifndef WIN32
-	char		buf[2 * MAXPGPATH + 100];
-#endif
+	volatile int encoding = -1;
+	volatile int dbconnlimit = -1;
 
 	/* don't call this in a transaction block */
 	PreventTransactionChain((void *) stmt, "CREATE DATABASE");
@@ -363,207 +359,186 @@ createdb(const CreatedbStmt *stmt)
 	BufferSync();
 
 	/*
-	 * Close virtual file descriptors so the kernel has more available for
-	 * the system() calls below.
+	 * Once we start copying subdirectories, we need to be able to clean
+	 * 'em up if we fail.  Establish a TRY block to make sure this happens.
+	 * (This is not a 100% solution, because of the possibility of failure
+	 * during transaction commit after we leave this routine, but it should
+	 * handle most scenarios.)
 	 */
-	closeAllVfds();
-
-	/*
-	 * Iterate through all tablespaces of the template database, and copy
-	 * each one to the new database.
-	 */
-	rel = heap_open(TableSpaceRelationId, AccessShareLock);
-	scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
-	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	PG_TRY();
 	{
-		Oid			srctablespace = HeapTupleGetOid(tuple);
-		Oid			dsttablespace;
-		char	   *srcpath;
-		char	   *dstpath;
-		struct stat st;
-
-		/* No need to copy global tablespace */
-		if (srctablespace == GLOBALTABLESPACE_OID)
-			continue;
-
-		srcpath = GetDatabasePath(src_dboid, srctablespace);
-
-		if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
-			directory_is_empty(srcpath))
+		/*
+		 * Iterate through all tablespaces of the template database,
+		 * and copy each one to the new database.
+		 */
+		rel = heap_open(TableSpaceRelationId, AccessShareLock);
+		scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
+		while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 		{
-			/* Assume we can ignore it */
-			pfree(srcpath);
-			continue;
-		}
-
-		if (srctablespace == src_deftablespace)
-			dsttablespace = dst_deftablespace;
-		else
-			dsttablespace = srctablespace;
-
-		dstpath = GetDatabasePath(dboid, dsttablespace);
+			Oid			srctablespace = HeapTupleGetOid(tuple);
+			Oid			dsttablespace;
+			char	   *srcpath;
+			char	   *dstpath;
+			struct stat st;
 
-		if (stat(dstpath, &st) == 0 || errno != ENOENT)
-		{
-			remove_dbtablespaces(dboid);
-			ereport(ERROR,
-					(errmsg("could not initialize database directory"),
-					 errdetail("Directory \"%s\" already exists.",
-							   dstpath)));
+			/* No need to copy global tablespace */
+			if (srctablespace == GLOBALTABLESPACE_OID)
+				continue;
+
+			srcpath = GetDatabasePath(src_dboid, srctablespace);
+
+			if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
+				directory_is_empty(srcpath))
+			{
+				/* Assume we can ignore it */
+				pfree(srcpath);
+				continue;
+			}
+
+			if (srctablespace == src_deftablespace)
+				dsttablespace = dst_deftablespace;
+			else
+				dsttablespace = srctablespace;
+
+			dstpath = GetDatabasePath(dboid, dsttablespace);
+
+			/*
+			 * Copy this subdirectory to the new location
+			 *
+			 * We don't need to copy subdirectories
+			 */
+			copydir(srcpath, dstpath, false);
+
+			/* Record the filesystem change in XLOG */
+			{
+				xl_dbase_create_rec xlrec;
+				XLogRecData rdata[1];
+
+				xlrec.db_id = dboid;
+				xlrec.tablespace_id = dsttablespace;
+				xlrec.src_db_id = src_dboid;
+				xlrec.src_tablespace_id = srctablespace;
+
+				rdata[0].data = (char *) &xlrec;
+				rdata[0].len = sizeof(xl_dbase_create_rec);
+				rdata[0].buffer = InvalidBuffer;
+				rdata[0].next = NULL;
+
+				(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+			}
 		}
-
-#ifndef WIN32
+		heap_endscan(scan);
+		heap_close(rel, AccessShareLock);
 
 		/*
-		 * Copy this subdirectory to the new location
-		 *
-		 * XXX use of cp really makes this code pretty grotty, particularly
-		 * with respect to lack of ability to report errors well.  Someday
-		 * rewrite to do it for ourselves.
+		 * Now OK to grab exclusive lock on pg_database.
 		 */
+		pg_database_rel = heap_open(DatabaseRelationId, ExclusiveLock);
 
-		/* We might need to use cp -R one day for portability */
-		snprintf(buf, sizeof(buf), "cp -r '%s' '%s'",
-				 srcpath, dstpath);
-		if (system(buf) != 0)
-		{
-			remove_dbtablespaces(dboid);
-			ereport(ERROR,
-					(errmsg("could not initialize database directory"),
-					 errdetail("Failing system command was: %s", buf),
-					 errhint("Look in the postmaster's stderr log for more information.")));
-		}
-#else							/* WIN32 */
-		if (copydir(srcpath, dstpath) != 0)
-		{
-			/* copydir should already have given details of its troubles */
-			remove_dbtablespaces(dboid);
+		/* Check to see if someone else created same DB name meanwhile. */
+		if (get_db_info(dbname, NULL, NULL, NULL,
+						NULL, NULL, NULL, NULL, NULL, NULL))
 			ereport(ERROR,
-					(errmsg("could not initialize database directory")));
-		}
-#endif   /* WIN32 */
-
-		/* Record the filesystem change in XLOG */
-		{
-			xl_dbase_create_rec xlrec;
-			XLogRecData rdata[1];
-
-			xlrec.db_id = dboid;
-			xlrec.tablespace_id = dsttablespace;
-			xlrec.src_db_id = src_dboid;
-			xlrec.src_tablespace_id = srctablespace;
+					(errcode(ERRCODE_DUPLICATE_DATABASE),
+					 errmsg("database \"%s\" already exists", dbname)));
 
-			rdata[0].data = (char *) &xlrec;
-			rdata[0].len = sizeof(xl_dbase_create_rec);
-			rdata[0].buffer = InvalidBuffer;
-			rdata[0].next = NULL;
-
-			(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
-		}
-	}
-	heap_endscan(scan);
-	heap_close(rel, AccessShareLock);
-
-	/*
-	 * Now OK to grab exclusive lock on pg_database.
-	 */
-	pg_database_rel = heap_open(DatabaseRelationId, ExclusiveLock);
+		/*
+		 * Insert a new tuple into pg_database
+		 */
+		pg_database_dsc = RelationGetDescr(pg_database_rel);
+
+		/* Form tuple */
+		MemSet(new_record, 0, sizeof(new_record));
+		MemSet(new_record_nulls, ' ', sizeof(new_record_nulls));
+
+		new_record[Anum_pg_database_datname - 1] =
+			DirectFunctionCall1(namein, CStringGetDatum(dbname));
+		new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba);
+		new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding);
+		new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(false);
+		new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(true);
+		new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
+		new_record[Anum_pg_database_datlastsysoid - 1] = ObjectIdGetDatum(src_lastsysoid);
+		new_record[Anum_pg_database_datvacuumxid - 1] = TransactionIdGetDatum(src_vacuumxid);
+		new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
+		new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
 
-	/* Check to see if someone else created same DB name meanwhile. */
-	if (get_db_info(dbname, NULL, NULL, NULL,
-					NULL, NULL, NULL, NULL, NULL, NULL))
-	{
-		/* Don't hold lock while doing recursive remove */
-		heap_close(pg_database_rel, ExclusiveLock);
-		remove_dbtablespaces(dboid);
-		ereport(ERROR,
-				(errcode(ERRCODE_DUPLICATE_DATABASE),
-				 errmsg("database \"%s\" already exists", dbname)));
-	}
+		/*
+		 * We deliberately set datconfig and datacl to defaults (NULL), rather
+		 * than copying them from the template database.  Copying datacl would
+		 * be a bad idea when the owner is not the same as the template's
+		 * owner. It's more debatable whether datconfig should be copied.
+		 */
+		new_record_nulls[Anum_pg_database_datconfig - 1] = 'n';
+		new_record_nulls[Anum_pg_database_datacl - 1] = 'n';
 
-	/*
-	 * Insert a new tuple into pg_database
-	 */
-	pg_database_dsc = RelationGetDescr(pg_database_rel);
+		tuple = heap_formtuple(pg_database_dsc, new_record, new_record_nulls);
 
-	/* Form tuple */
-	MemSet(new_record, 0, sizeof(new_record));
-	MemSet(new_record_nulls, ' ', sizeof(new_record_nulls));
+		HeapTupleSetOid(tuple, dboid);		/* override heap_insert's OID
+											 * selection */
 
-	new_record[Anum_pg_database_datname - 1] =
-		DirectFunctionCall1(namein, CStringGetDatum(dbname));
-	new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba);
-	new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding);
-	new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(false);
-	new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(true);
-	new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
-	new_record[Anum_pg_database_datlastsysoid - 1] = ObjectIdGetDatum(src_lastsysoid);
-	new_record[Anum_pg_database_datvacuumxid - 1] = TransactionIdGetDatum(src_vacuumxid);
-	new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
-	new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
+		simple_heap_insert(pg_database_rel, tuple);
 
-	/*
-	 * We deliberately set datconfig and datacl to defaults (NULL), rather
-	 * than copying them from the template database.  Copying datacl would
-	 * be a bad idea when the owner is not the same as the template's
-	 * owner. It's more debatable whether datconfig should be copied.
-	 */
-	new_record_nulls[Anum_pg_database_datconfig - 1] = 'n';
-	new_record_nulls[Anum_pg_database_datacl - 1] = 'n';
+		/* Update indexes */
+		CatalogUpdateIndexes(pg_database_rel, tuple);
 
-	tuple = heap_formtuple(pg_database_dsc, new_record, new_record_nulls);
+		/* Register owner dependency */
+		recordDependencyOnOwner(DatabaseRelationId, dboid, datdba);
 
-	HeapTupleSetOid(tuple, dboid);		/* override heap_insert's OID
-										 * selection */
+		/* Create pg_shdepend entries for objects within database */
+		copyTemplateDependencies(src_dboid, dboid);
 
-	simple_heap_insert(pg_database_rel, tuple);
+		/*
+		 * We force a checkpoint before committing.  This effectively means
+		 * that committed XLOG_DBASE_CREATE operations will never need to be
+		 * replayed (at least not in ordinary crash recovery; we still have
+		 * to make the XLOG entry for the benefit of PITR operations).
+		 * This avoids two nasty scenarios:
+		 *
+		 * #1: When PITR is off, we don't XLOG the contents of newly created
+		 * indexes; therefore the drop-and-recreate-whole-directory behavior
+		 * of DBASE_CREATE replay would lose such indexes.
+		 *
+		 * #2: Since we have to recopy the source database during DBASE_CREATE
+		 * replay, we run the risk of copying changes in it that were committed
+		 * after the original CREATE DATABASE command but before the system
+		 * crash that led to the replay.  This is at least unexpected and at
+		 * worst could lead to inconsistencies, eg duplicate table names.
+		 *
+		 * (Both of these were real bugs in releases 8.0 through 8.0.3.)
+		 *
+		 * In PITR replay, the first of these isn't an issue, and the second
+		 * is only a risk if the CREATE DATABASE and subsequent template
+		 * database change both occur while a base backup is being taken.
+		 * There doesn't seem to be much we can do about that except document
+		 * it as a limitation.
+		 *
+		 * Perhaps if we ever implement CREATE DATABASE in a less cheesy
+		 * way, we can avoid this.
+		 */
+		RequestCheckpoint(true, false);
 
-	/* Update indexes */
-	CatalogUpdateIndexes(pg_database_rel, tuple);
+		/*
+		 * Set flag to update flat database file at commit.
+		 */
+		database_file_update_needed();
+	}
+	PG_CATCH();
+	{
+		/* Don't hold pg_database lock while doing recursive remove */
+		if (pg_database_rel != NULL)
+			heap_close(pg_database_rel, ExclusiveLock);
 
-	/* Register owner dependency */
-	recordDependencyOnOwner(DatabaseRelationId, dboid, datdba);
+		/* Throw away any successfully copied subdirectories */
+		remove_dbtablespaces(dboid);
 
-	/* Create pg_shdepend entries for objects within database */
-	copyTemplateDependencies(src_dboid, dboid);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
 
 	/* Close pg_database, but keep exclusive lock till commit */
+	/* This has to be outside the PG_TRY */
 	heap_close(pg_database_rel, NoLock);
-
-	/*
-	 * We force a checkpoint before committing.  This effectively means
-	 * that committed XLOG_DBASE_CREATE operations will never need to be
-	 * replayed (at least not in ordinary crash recovery; we still have
-	 * to make the XLOG entry for the benefit of PITR operations).
-	 * This avoids two nasty scenarios:
-	 *
-	 * #1: When PITR is off, we don't XLOG the contents of newly created
-	 * indexes; therefore the drop-and-recreate-whole-directory behavior
-	 * of DBASE_CREATE replay would lose such indexes.
-	 *
-	 * #2: Since we have to recopy the source database during DBASE_CREATE
-	 * replay, we run the risk of copying changes in it that were committed
-	 * after the original CREATE DATABASE command but before the system
-	 * crash that led to the replay.  This is at least unexpected and at
-	 * worst could lead to inconsistencies, eg duplicate table names.
-	 *
-	 * (Both of these were real bugs in releases 8.0 through 8.0.3.)
-	 *
-	 * In PITR replay, the first of these isn't an issue, and the second
-	 * is only a risk if the CREATE DATABASE and subsequent template
-	 * database change both occur while a base backup is being taken.
-	 * There doesn't seem to be much we can do about that except document
-	 * it as a limitation.
-	 *
-	 * Perhaps if we ever implement CREATE DATABASE in a less cheesy
-	 * way, we can avoid this.
-	 */
-	RequestCheckpoint(true, false);
-
-	/*
-	 * Set flag to update flat database file at commit.
-	 */
-	database_file_update_needed();
 }
 
 
@@ -1348,10 +1323,6 @@ dbase_redo(XLogRecPtr lsn, XLogRecord *record)
 		char	   *dst_path;
 		struct stat st;
 
-#ifndef WIN32
-		char		buf[2 * MAXPGPATH + 100];
-#endif
-
 		src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
 		dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
 
@@ -1365,8 +1336,8 @@ dbase_redo(XLogRecPtr lsn, XLogRecord *record)
 		{
 			if (!rmtree(dst_path, true))
 				ereport(WARNING,
-					(errmsg("could not remove database directory \"%s\"",
-							dst_path)));
+						(errmsg("could not remove database directory \"%s\"",
+								dst_path)));
 		}
 
 		/*
@@ -1376,32 +1347,12 @@ dbase_redo(XLogRecPtr lsn, XLogRecord *record)
 		 */
 		BufferSync();
 
-#ifndef WIN32
-
 		/*
 		 * Copy this subdirectory to the new location
 		 *
-		 * XXX use of cp really makes this code pretty grotty, particularly
-		 * with respect to lack of ability to report errors well.  Someday
-		 * rewrite to do it for ourselves.
+		 * We don't need to copy subdirectories
 		 */
-
-		/* We might need to use cp -R one day for portability */
-		snprintf(buf, sizeof(buf), "cp -r '%s' '%s'",
-				 src_path, dst_path);
-		if (system(buf) != 0)
-			ereport(ERROR,
-					(errmsg("could not initialize database directory"),
-					 errdetail("Failing system command was: %s", buf),
-					 errhint("Look in the postmaster's stderr log for more information.")));
-#else							/* WIN32 */
-		if (copydir(src_path, dst_path) != 0)
-		{
-			/* copydir should already have given details of its troubles */
-			ereport(ERROR,
-					(errmsg("could not initialize database directory")));
-		}
-#endif   /* WIN32 */
+		copydir(src_path, dst_path, false);
 	}
 	else if (info == XLOG_DBASE_DROP)
 	{
diff --git a/src/include/port.h b/src/include/port.h
index 76bec0a4ba6..95e5531c931 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/port.h,v 1.79 2005/07/06 21:40:09 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/port.h,v 1.80 2005/08/02 19:02:32 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -206,6 +206,8 @@ extern int	pgsymlink(const char *oldpath, const char *newpath);
 
 #endif /* defined(WIN32) || defined(__CYGWIN__) */
 
+extern void copydir(char *fromdir, char *todir, bool recurse);
+
 extern bool rmtree(char *path, bool rmtopdir);
 
 #if defined(WIN32) && !defined(__CYGWIN__)
@@ -223,8 +225,6 @@ extern int	win32_open(const char *, int,...);
 #define pclose(a) _pclose(a)
 #endif
 
-extern int	copydir(char *fromdir, char *todir);
-
 /* Missing rand functions */
 extern long lrand48(void);
 extern void srand48(long seed);
diff --git a/src/port/copydir.c b/src/port/copydir.c
index 6062da96a84..a9339e79f90 100644
--- a/src/port/copydir.c
+++ b/src/port/copydir.c
@@ -11,84 +11,140 @@
  *	as a service.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/port/copydir.c,v 1.11 2005/03/24 02:11:20 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/port/copydir.c,v 1.12 2005/08/02 19:02:32 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
 #include "storage/fd.h"
 
-#undef mkdir					/* no reason to use that macro because we
-								 * ignore the 2nd arg */
+
+static void copy_file(char *fromfile, char *tofile);
 
 
 /*
- * copydir: copy a directory (we only need to go one level deep)
- *
- * Return 0 on success, nonzero on failure.
+ * copydir: copy a directory
  *
- * NB: do not elog(ERROR) on failure.  Return to caller so it can try to
- * clean up.
+ * If recurse is false, subdirectories are ignored.  Anything that's not
+ * a directory or a regular file is ignored.
  */
-int
-copydir(char *fromdir, char *todir)
+void
+copydir(char *fromdir, char *todir, bool recurse)
 {
 	DIR		   *xldir;
 	struct dirent *xlde;
-	char		fromfl[MAXPGPATH];
-	char		tofl[MAXPGPATH];
+	char		fromfile[MAXPGPATH];
+	char		tofile[MAXPGPATH];
 
-	if (mkdir(todir) != 0)
-	{
-		ereport(WARNING,
+	if (mkdir(todir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
+		ereport(ERROR,
 				(errcode_for_file_access(),
 				 errmsg("could not create directory \"%s\": %m", todir)));
-		return -1;
-	}
+
 	xldir = AllocateDir(fromdir);
 	if (xldir == NULL)
-	{
-		ereport(WARNING,
+		ereport(ERROR,
 				(errcode_for_file_access(),
 				 errmsg("could not open directory \"%s\": %m", fromdir)));
-		return -1;
+
+	while ((xlde = ReadDir(xldir, fromdir)) != NULL)
+	{
+	    struct stat fst;
+
+	    if (strcmp(xlde->d_name, ".") == 0 ||
+			strcmp(xlde->d_name, "..") == 0)
+		    continue;
+
+		snprintf(fromfile, MAXPGPATH, "%s/%s", fromdir, xlde->d_name);
+		snprintf(tofile, MAXPGPATH, "%s/%s", todir, xlde->d_name);
+
+		if (stat(fromfile, &fst) < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not stat \"%s\": %m", fromfile)));
+
+		if (fst.st_mode & S_IFDIR)
+		{
+			/* recurse to handle subdirectories */
+			if (recurse)
+				copydir(fromfile, tofile, true);
+		}
+		else if (fst.st_mode & S_IFREG)
+			copy_file(fromfile, tofile);
 	}
 
-	errno = 0;
-	while ((xlde = readdir(xldir)) != NULL)
+	FreeDir(xldir);
+}
+
+/*
+ * copy one file
+ */
+static void
+copy_file(char *fromfile, char *tofile)
+{
+	char		buffer[8 * BLCKSZ];
+	int			srcfd;
+	int			dstfd;
+	int			nbytes;
+
+	/*
+	 * Open the files
+	 */
+	srcfd = BasicOpenFile(fromfile, O_RDONLY | PG_BINARY, 0);
+	if (srcfd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\": %m", fromfile)));
+
+	dstfd = BasicOpenFile(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+						  S_IRUSR | S_IWUSR);
+	if (dstfd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not create file \"%s\": %m", tofile)));
+
+	/*
+	 * Do the data copying.
+	 */
+	for (;;)
 	{
-		snprintf(fromfl, MAXPGPATH, "%s/%s", fromdir, xlde->d_name);
-		snprintf(tofl, MAXPGPATH, "%s/%s", todir, xlde->d_name);
-		if (CopyFile(fromfl, tofl, TRUE) < 0)
+		nbytes = read(srcfd, buffer, sizeof(buffer));
+		if (nbytes < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", fromfile)));
+		if (nbytes == 0)
+			break;
+		errno = 0;
+		if ((int) write(dstfd, buffer, nbytes) != nbytes)
 		{
-			ereport(WARNING,
+			/* if write didn't set errno, assume problem is no disk space */
+			if (errno == 0)
+				errno = ENOSPC;
+			ereport(ERROR,
 					(errcode_for_file_access(),
-					 errmsg("could not copy file \"%s\": %m", fromfl)));
-			FreeDir(xldir);
-			return -1;
+					 errmsg("could not write to file \"%s\": %m", tofile)));
 		}
-		errno = 0;
 	}
-#ifdef WIN32
 
 	/*
-	 * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
-	 * not in released version
+	 * Be paranoid here to ensure we catch problems.
 	 */
-	if (GetLastError() == ERROR_NO_MORE_FILES)
-		errno = 0;
-#endif
-	if (errno)
-	{
-		ereport(WARNING,
+	if (pg_fsync(dstfd) != 0)
+		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not read directory \"%s\": %m", fromdir)));
-		FreeDir(xldir);
-		return -1;
-	}
+				 errmsg("could not fsync file \"%s\": %m", tofile)));
 
-	FreeDir(xldir);
-	return 0;
+	if (close(dstfd))
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\": %m", tofile)));
+
+	close(srcfd);
 }
-- 
GitLab