From 7c0ca2900f7cae490fd551096cb7dc581cfe45c8 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Thu, 17 Aug 2017 15:39:17 -0400
Subject: [PATCH] Don't lock tables in RelationGetPartitionDispatchInfo.

Instead, lock them in the caller using find_all_inheritors so that
they get locked in the standard order, minimizing deadlock risks.

Also in RelationGetPartitionDispatchInfo, avoid opening tables which
are not partitioned; there's no need.

Amit Langote, reviewed by Ashutosh Bapat and Amit Khandekar

Discussion: http://postgr.es/m/91b36fa1-c197-b72f-ca6e-56c593bae68c@lab.ntt.co.jp
---
 src/backend/catalog/partition.c | 55 +++++++++++++++++----------------
 src/backend/executor/execMain.c | 10 ++++--
 src/include/catalog/partition.h |  3 +-
 3 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index 71bc4b3d105..21901380cb2 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -1000,12 +1000,16 @@ get_partition_qual_relid(Oid relid)
  * RelationGetPartitionDispatchInfo
  *		Returns information necessary to route tuples down a partition tree
  *
- * All the partitions will be locked with lockmode, unless it is NoLock.
- * A list of the OIDs of all the leaf partitions of rel is returned in
- * *leaf_part_oids.
+ * The number of elements in the returned array (that is, the number of
+ * PartitionDispatch objects for the partitioned tables in the partition tree)
+ * is returned in *num_parted and a list of the OIDs of all the leaf
+ * partitions of rel is returned in *leaf_part_oids.
+ *
+ * All the relations in the partition tree (including 'rel') must have been
+ * locked (using at least the AccessShareLock) by the caller.
  */
 PartitionDispatch *
-RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
+RelationGetPartitionDispatchInfo(Relation rel,
 								 int *num_parted, List **leaf_part_oids)
 {
 	PartitionDispatchData **pd;
@@ -1020,14 +1024,18 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 				offset;
 
 	/*
-	 * Lock partitions and make a list of the partitioned ones to prepare
-	 * their PartitionDispatch objects below.
+	 * We rely on the relcache to traverse the partition tree to build both
+	 * the leaf partition OIDs list and the array of PartitionDispatch objects
+	 * for the partitioned tables in the tree.  That means every partitioned
+	 * table in the tree must be locked, which is fine since we require the
+	 * caller to lock all the partitions anyway.
 	 *
-	 * Cannot use find_all_inheritors() here, because then the order of OIDs
-	 * in parted_rels list would be unknown, which does not help, because we
-	 * assign indexes within individual PartitionDispatch in an order that is
-	 * predetermined (determined by the order of OIDs in individual partition
-	 * descriptors).
+	 * For every partitioned table in the tree, starting with the root
+	 * partitioned table, add its relcache entry to parted_rels, while also
+	 * queuing its partitions (in the order in which they appear in the
+	 * partition descriptor) to be looked at later in the same loop.  This is
+	 * a bit tricky but works because the foreach() macro doesn't fetch the
+	 * next list element until the bottom of the loop.
 	 */
 	*num_parted = 1;
 	parted_rels = list_make1(rel);
@@ -1036,29 +1044,24 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 	APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents);
 	forboth(lc1, all_parts, lc2, all_parents)
 	{
-		Relation	partrel = heap_open(lfirst_oid(lc1), lockmode);
+		Oid			partrelid = lfirst_oid(lc1);
 		Relation	parent = lfirst(lc2);
-		PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
 
-		/*
-		 * If this partition is a partitioned table, add its children to the
-		 * end of the list, so that they are processed as well.
-		 */
-		if (partdesc)
+		if (get_rel_relkind(partrelid) == RELKIND_PARTITIONED_TABLE)
 		{
+			/*
+			 * Already locked by the caller.  Note that it is the
+			 * responsibility of the caller to close the below relcache entry,
+			 * once done using the information being collected here (for
+			 * example, in ExecEndModifyTable).
+			 */
+			Relation	partrel = heap_open(partrelid, NoLock);
+
 			(*num_parted)++;
 			parted_rels = lappend(parted_rels, partrel);
 			parted_rel_parents = lappend(parted_rel_parents, parent);
 			APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents);
 		}
-		else
-			heap_close(partrel, NoLock);
-
-		/*
-		 * We keep the partitioned ones open until we're done using the
-		 * information being collected here (for example, see
-		 * ExecEndModifyTable).
-		 */
 	}
 
 	/*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 6671a25ffb3..74071eede6e 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -43,6 +43,7 @@
 #include "access/xact.h"
 #include "catalog/namespace.h"
 #include "catalog/partition.h"
+#include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_publication.h"
 #include "commands/matview.h"
 #include "commands/trigger.h"
@@ -3249,9 +3250,12 @@ ExecSetupPartitionTupleRouting(Relation rel,
 	int			i;
 	ResultRelInfo *leaf_part_rri;
 
-	/* Get the tuple-routing information and lock partitions */
-	*pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, num_parted,
-										   &leaf_parts);
+	/*
+	 * Get the information about the partition tree after locking all the
+	 * partitions.
+	 */
+	(void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
+	*pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts);
 	*num_partitions = list_length(leaf_parts);
 	*partitions = (ResultRelInfo *) palloc(*num_partitions *
 										   sizeof(ResultRelInfo));
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
index 434ded37d74..7e415fe8727 100644
--- a/src/include/catalog/partition.h
+++ b/src/include/catalog/partition.h
@@ -87,8 +87,7 @@ extern Expr *get_partition_qual_relid(Oid relid);
 
 /* For tuple routing */
 extern PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
-								 int lockmode, int *num_parted,
-								 List **leaf_part_oids);
+								 int *num_parted, List **leaf_part_oids);
 extern void FormPartitionKeyDatum(PartitionDispatch pd,
 					  TupleTableSlot *slot,
 					  EState *estate,
-- 
GitLab