From 258cef12540fa1cb244881a0f019cefd698c809e Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Tue, 11 Apr 2017 09:08:36 -0400
Subject: [PATCH] Fix possibile deadlock when dropping partitions.

heap_drop_with_catalog and RangeVarCallbackForDropRelation should
lock the parent before locking the target relation.

Amit Langote

Discussion: http://postgr.es/m/29588799-a8ce-b0a2-3dae-f39ff6d35922@lab.ntt.co.jp
---
 src/backend/catalog/heap.c       | 30 +++++++++++++++++-------------
 src/backend/commands/tablecmds.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index a264f1b9eb9..4a5f545dc67 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1759,29 +1759,33 @@ void
 heap_drop_with_catalog(Oid relid)
 {
 	Relation	rel;
+	HeapTuple	tuple;
 	Oid			parentOid;
 	Relation	parent = NULL;
 
 	/*
-	 * Open and lock the relation.
+	 * To drop a partition safely, we must grab exclusive lock on its parent,
+	 * because another backend might be about to execute a query on the parent
+	 * table.  If it relies on previously cached partition descriptor, then
+	 * it could attempt to access the just-dropped relation as its partition.
+	 * We must therefore take a table lock strong enough to prevent all
+	 * queries on the table from proceeding until we commit and send out a
+	 * shared-cache-inval notice that will make them update their index lists.
 	 */
-	rel = relation_open(relid, AccessExclusiveLock);
-
-	/*
-	 * If the relation is a partition, we must grab exclusive lock on its
-	 * parent because we need to update its partition descriptor. We must take
-	 * a table lock strong enough to prevent all queries on the parent from
-	 * proceeding until we commit and send out a shared-cache-inval notice
-	 * that will make them update their partition descriptor. Sometimes, doing
-	 * this is cycles spent uselessly, especially if the parent will be
-	 * dropped as part of the same command anyway.
-	 */
-	if (rel->rd_rel->relispartition)
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (((Form_pg_class) GETSTRUCT(tuple))->relispartition)
 	{
 		parentOid = get_partition_parent(relid);
 		parent = heap_open(parentOid, AccessExclusiveLock);
 	}
 
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Open and lock the relation.
+	 */
+	rel = relation_open(relid, AccessExclusiveLock);
+
 	/*
 	 * There can no longer be anyone *else* touching the relation, but we
 	 * might still have open queries or cursors, or pending trigger events, in
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index abb262b851c..a6a9f54b13e 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -271,6 +271,7 @@ struct DropRelationCallbackState
 {
 	char		relkind;
 	Oid			heapOid;
+	Oid			partParentOid;
 	bool		concurrent;
 };
 
@@ -1049,6 +1050,7 @@ RemoveRelations(DropStmt *drop)
 		/* Look up the appropriate relation using namespace search. */
 		state.relkind = relkind;
 		state.heapOid = InvalidOid;
+		state.partParentOid = InvalidOid;
 		state.concurrent = drop->concurrent;
 		relOid = RangeVarGetRelidExtended(rel, lockmode, true,
 										  false,
@@ -1078,6 +1080,8 @@ RemoveRelations(DropStmt *drop)
 /*
  * Before acquiring a table lock, check whether we have sufficient rights.
  * In the case of DROP INDEX, also try to lock the table before the index.
+ * Also, if the table to be dropped is a partition, we try to lock the parent
+ * first.
  */
 static void
 RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
@@ -1087,6 +1091,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
 	struct DropRelationCallbackState *state;
 	char		relkind;
 	char		expected_relkind;
+	bool		is_partition;
 	Form_pg_class classform;
 	LOCKMODE	heap_lockmode;
 
@@ -1106,6 +1111,17 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
 		state->heapOid = InvalidOid;
 	}
 
+	/*
+	 * Similarly, if we previously locked some other partition's heap, and
+	 * the name we're looking up no longer refers to that relation, release
+	 * the now-useless lock.
+	 */
+	if (relOid != oldRelOid && OidIsValid(state->partParentOid))
+	{
+		UnlockRelationOid(state->partParentOid, AccessExclusiveLock);
+		state->partParentOid = InvalidOid;
+	}
+
 	/* Didn't find a relation, so no need for locking or permission checks. */
 	if (!OidIsValid(relOid))
 		return;
@@ -1114,6 +1130,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
 	if (!HeapTupleIsValid(tuple))
 		return;					/* concurrently dropped, so nothing to do */
 	classform = (Form_pg_class) GETSTRUCT(tuple);
+	is_partition = classform->relispartition;
 
 	/*
 	 * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
@@ -1157,6 +1174,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
 		if (OidIsValid(state->heapOid))
 			LockRelationOid(state->heapOid, heap_lockmode);
 	}
+
+	/*
+	 * Similarly, if the relation is a partition, we must acquire lock on its
+	 * parent before locking the partition.  That's because queries lock the
+	 * parent before its partitions, so we risk deadlock it we do it the other
+	 * way around.
+	 */
+	if (is_partition && relOid != oldRelOid)
+	{
+		state->partParentOid = get_partition_parent(relOid);
+		if (OidIsValid(state->partParentOid))
+			LockRelationOid(state->partParentOid, AccessExclusiveLock);
+	}
 }
 
 /*
-- 
GitLab