From 4513d9dedac634aeca41a436093bc02a1aa8efec Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 19 Jan 2006 04:45:38 +0000
Subject: [PATCH] It turns out that TablespaceCreateDbspace fails badly if a
 relcache flush occurs when it tries to heap_open pg_tablespace.  When control
 returns to smgrcreate, that routine will be holding a dangling pointer to a
 closed SMgrRelation, resulting in mayhem.  This is of course a consequence of
 the violation of proper module layering inherent in having smgr.c call a
 tablespace command routine, but the simplest fix seems to be to change the
 locking mechanism.  There's no real need for TablespaceCreateDbspace to touch
 pg_tablespace at all --- it's only opening it as a way of locking against a
 parallel DROP TABLESPACE command.  A much better answer is to create a
 special-purpose LWLock to interlock these two operations. This drops
 TablespaceCreateDbspace quite a few layers down the food chain and makes it
 something reasonably safe for smgr to call.

---
 src/backend/commands/tablespace.c | 54 ++++++++++++++-----------------
 src/include/storage/lwlock.h      |  3 +-
 2 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index f83d1ab8843..19955dd81ed 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.28 2005/10/15 02:49:15 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.29 2006/01/19 04:45:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -83,12 +83,9 @@ static void set_short_version(const char *path);
  * If tablespaces are not supported, this is just a no-op; CREATE DATABASE
  * is expected to create the default subdirectory for the database.
  *
- * isRedo indicates that we are creating an object during WAL replay;
- * we can skip doing locking in that case (and should do so to avoid
- * any possible problems with pg_tablespace not being valid).
- *
- * Also, when isRedo is true, we will cope with the possibility of the
- * tablespace not being there either --- this could happen if we are
+ * isRedo indicates that we are creating an object during WAL replay.
+ * In this case we will cope with the possibility of the tablespace
+ * directory not being there either --- this could happen if we are
  * replaying an operation on a table in a subsequently-dropped tablespace.
  * We handle this by making a directory in the place where the tablespace
  * symlink would normally be.  This isn't an exact replay of course, but
@@ -118,16 +115,10 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
 		if (errno == ENOENT)
 		{
 			/*
-			 * Acquire ExclusiveLock on pg_tablespace to ensure that no DROP
-			 * TABLESPACE or TablespaceCreateDbspace is running concurrently.
-			 * Simple reads from pg_tablespace are OK.
+			 * Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE
+			 * or TablespaceCreateDbspace is running concurrently.
 			 */
-			Relation	rel;
-
-			if (!isRedo)
-				rel = heap_open(TableSpaceRelationId, ExclusiveLock);
-			else
-				rel = NULL;
+			LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
 
 			/*
 			 * Recheck to see if someone created the directory while we were
@@ -166,9 +157,7 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
 				}
 			}
 
-			/* OK to drop the exclusive lock */
-			if (!isRedo)
-				heap_close(rel, ExclusiveLock);
+			LWLockRelease(TablespaceCreateLock);
 		}
 		else
 		{
@@ -401,15 +390,11 @@ DropTableSpace(DropTableSpaceStmt *stmt)
 	/* don't call this in a transaction block */
 	PreventTransactionChain((void *) stmt, "DROP TABLESPACE");
 
-	/*
-	 * Acquire ExclusiveLock on pg_tablespace to ensure that no one else is
-	 * trying to do DROP TABLESPACE or TablespaceCreateDbspace concurrently.
-	 */
-	rel = heap_open(TableSpaceRelationId, ExclusiveLock);
-
 	/*
 	 * Find the target tuple
 	 */
+	rel = heap_open(TableSpaceRelationId, RowExclusiveLock);
+
 	ScanKeyInit(&entry[0],
 				Anum_pg_tablespace_spcname,
 				BTEqualStrategyNumber, F_NAMEEQ,
@@ -448,6 +433,12 @@ DropTableSpace(DropTableSpaceStmt *stmt)
 	 */
 	deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid);
 
+	/*
+	 * Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace
+	 * is running concurrently.
+	 */
+	LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
+
 	/*
 	 * Try to remove the physical infrastructure
 	 */
@@ -471,6 +462,11 @@ DropTableSpace(DropTableSpaceStmt *stmt)
 		(void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata);
 	}
 
+	/*
+	 * Allow TablespaceCreateDbspace again.
+	 */
+	LWLockRelease(TablespaceCreateLock);
+
 	/* We keep the lock on pg_tablespace until commit */
 	heap_close(rel, NoLock);
 #else							/* !HAVE_SYMLINK */
@@ -507,10 +503,10 @@ remove_tablespace_directories(Oid tablespaceoid, bool redo)
 	 * use the tablespace from that database will simply recreate the
 	 * subdirectory via TablespaceCreateDbspace.)
 	 *
-	 * Since we hold exclusive lock, no one else should be creating any fresh
-	 * subdirectories in parallel.	It is possible that new files are being
-	 * created within subdirectories, though, so the rmdir call could fail.
-	 * Worst consequence is a less friendly error message.
+	 * Since we hold TablespaceCreateLock, no one else should be creating any
+	 * fresh subdirectories in parallel. It is possible that new files are
+	 * being created within subdirectories, though, so the rmdir call could
+	 * fail.  Worst consequence is a less friendly error message.
 	 */
 	dirdesc = AllocateDir(location);
 	if (dirdesc == NULL)
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index ca384218a50..fb05544db1f 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.25 2006/01/04 21:06:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.26 2006/01/19 04:45:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,6 +46,7 @@ typedef enum LWLockId
 	RelCacheInitLock,
 	BgWriterCommLock,
 	TwoPhaseStateLock,
+	TablespaceCreateLock,
 	FirstLockMgrLock,			/* must be last except for MaxDynamicLWLock */
 
 	MaxDynamicLWLock = 1000000000
-- 
GitLab