diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 24426f391f221fb5435be3846fb7ff1ddda821fe..dd6218cbfe201434fdb30ef57732e9ce1773c66a 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.96 2010/01/02 16:57:35 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.97 2010/02/04 00:09:13 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1185,10 +1185,25 @@ toast_save_datum(Relation rel, Datum value, int options)
 		toast_pointer.va_extsize = data_todo;
 	}
 
+	/*
+	 * Insert the correct table OID into the result TOAST pointer.
+	 *
+	 * Normally this is the actual OID of the target toast table, but during
+	 * table-rewriting operations such as CLUSTER, we have to insert the OID
+	 * of the table's real permanent toast table instead.  rd_toastoid is
+	 * set if we have to substitute such an OID.
+	 */
+	if (OidIsValid(rel->rd_toastoid))
+		toast_pointer.va_toastrelid = rel->rd_toastoid;
+	else
+		toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
+
+	/*
+	 * Choose an unused OID within the toast table for this toast value.
+	 */
 	toast_pointer.va_valueid = GetNewOidWithIndex(toastrel,
 												  RelationGetRelid(toastidx),
 												  (AttrNumber) 1);
-	toast_pointer.va_toastrelid = rel->rd_rel->reltoastrelid;
 
 	/*
 	 * Initialize constant parts of the tuple data
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 2d2ac3e89400420a16a55b7ec1fed028f74335de..cf2ac19d5336687357304fea0be51bec86d7a983 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.196 2010/02/02 19:12:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.197 2010/02/04 00:09:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -63,8 +63,9 @@ typedef struct
 
 static void rebuild_relation(Relation OldHeap, Oid indexOid,
 							 int freeze_min_age, int freeze_table_age);
-static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap,
-					Oid OIDOldIndex, int freeze_min_age, int freeze_table_age);
+static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
+			   int freeze_min_age, int freeze_table_age,
+			   bool *pSwapToastByContent, TransactionId *pFreezeXid);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
 
 
@@ -584,10 +585,8 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
 	Oid			tableOid = RelationGetRelid(OldHeap);
 	Oid			tableSpace = OldHeap->rd_rel->reltablespace;
 	Oid			OIDNewHeap;
-	char		NewHeapName[NAMEDATALEN];
+	bool		swap_toast_by_content;
 	TransactionId frozenXid;
-	ObjectAddress object;
-	Relation	newrel;
 
 	/* Mark the correct index as clustered */
 	if (OidIsValid(indexOid))
@@ -596,98 +595,39 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
 	/* Close relcache entry, but keep lock until transaction commit */
 	heap_close(OldHeap, NoLock);
 
-	/*
-	 * Create the new heap, using a temporary name in the same namespace as
-	 * the existing table.	NOTE: there is some risk of collision with user
-	 * relnames.  Working around this seems more trouble than it's worth; in
-	 * particular, we can't create the new heap in a different namespace from
-	 * the old, or we will have problems with the TEMP status of temp tables.
-	 */
-	snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", tableOid);
-
-	OIDNewHeap = make_new_heap(tableOid, NewHeapName, tableSpace);
-
-	/*
-	 * We don't need CommandCounterIncrement() because make_new_heap did it.
-	 */
-
-	/*
-	 * Copy the heap data into the new table in the desired order.
-	 */
-	frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid,
-							   freeze_min_age, freeze_table_age);
-
-	/* To make the new heap's data visible (probably not needed?). */
-	CommandCounterIncrement();
-
-	/* Swap the physical files of the old and new heaps. */
-	swap_relation_files(tableOid, OIDNewHeap, frozenXid);
-
-	CommandCounterIncrement();
-
-	/* Destroy new heap with old filenode */
-	object.classId = RelationRelationId;
-	object.objectId = OIDNewHeap;
-	object.objectSubId = 0;
-
-	/*
-	 * The new relation is local to our transaction and we know nothing
-	 * depends on it, so DROP_RESTRICT should be OK.
-	 */
-	performDeletion(&object, DROP_RESTRICT);
+	/* Create the transient table that will receive the re-ordered data */
+	OIDNewHeap = make_new_heap(tableOid, tableSpace);
 
-	/* performDeletion does CommandCounterIncrement at end */
+	/* Copy the heap data into the new table in the desired order */
+	copy_heap_data(OIDNewHeap, tableOid, indexOid,
+				   freeze_min_age, freeze_table_age,
+				   &swap_toast_by_content, &frozenXid);
 
-	/*
-	 * Rebuild each index on the relation (but not the toast table, which is
-	 * all-new at this point).	We do not need CommandCounterIncrement()
-	 * because reindex_relation does it.
-	 */
-	reindex_relation(tableOid, false);
+	/* Swap the physical files of the old and new heaps */
+	swap_relation_files(tableOid, OIDNewHeap,
+						swap_toast_by_content, frozenXid);
 
-	/*
-	 * At this point, everything is kosher except that the toast table's name
-	 * corresponds to the temporary table.	The name is irrelevant to the
-	 * backend because it's referenced by OID, but users looking at the
-	 * catalogs could be confused.	Rename it to prevent this problem.
-	 *
-	 * Note no lock required on the relation, because we already hold an
-	 * exclusive lock on it.
-	 */
-	newrel = heap_open(tableOid, NoLock);
-	if (OidIsValid(newrel->rd_rel->reltoastrelid))
-	{
-		Relation	toastrel;
-		Oid			toastidx;
-		Oid			toastnamespace;
-		char		NewToastName[NAMEDATALEN];
-
-		toastrel = relation_open(newrel->rd_rel->reltoastrelid, AccessShareLock);
-		toastidx = toastrel->rd_rel->reltoastidxid;
-		toastnamespace = toastrel->rd_rel->relnamespace;
-		relation_close(toastrel, AccessShareLock);
-
-		/* rename the toast table ... */
-		snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u", tableOid);
-		RenameRelationInternal(newrel->rd_rel->reltoastrelid, NewToastName,
-							   toastnamespace);
-
-		/* ... and its index too */
-		snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index", tableOid);
-		RenameRelationInternal(toastidx, NewToastName,
-							   toastnamespace);
-	}
-	relation_close(newrel, NoLock);
+	/* Destroy the new heap, removing the old data along with it */
+	cleanup_heap_swap(tableOid, OIDNewHeap, swap_toast_by_content);
 }
 
+
 /*
- * Create the new table that we will fill with correctly-ordered data.
+ * Create the transient table that will be filled with new data during
+ * CLUSTER, ALTER TABLE, and similar operations.  The transient table
+ * duplicates the logical structure of the OldHeap, but is placed in
+ * NewTableSpace which might be different from OldHeap's.
+ *
+ * After this, the caller should load the new heap with transferred/modified
+ * data, then call swap_relation_files, and finally call cleanup_heap_swap to
+ * remove the debris.
  */
 Oid
-make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
+make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
 {
 	TupleDesc	OldHeapDesc,
 				tupdesc;
+	char		NewHeapName[NAMEDATALEN];
 	Oid			OIDNewHeap;
 	Oid			toastid;
 	Relation	OldHeap;
@@ -708,7 +648,7 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
 	tupdesc = CreateTupleDescCopy(OldHeapDesc);
 
 	/*
-	 * Use options of the old heap for new heap.
+	 * But we do want to use reloptions of the old heap for new heap.
 	 */
 	tuple = SearchSysCache(RELOID,
 						   ObjectIdGetDatum(OIDOldHeap),
@@ -720,7 +660,16 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
 	if (isNull)
 		reloptions = (Datum) 0;
 
-	OIDNewHeap = heap_create_with_catalog(NewName,
+	/*
+	 * Create the new heap, using a temporary name in the same namespace as
+	 * the existing table.	NOTE: there is some risk of collision with user
+	 * relnames.  Working around this seems more trouble than it's worth; in
+	 * particular, we can't create the new heap in a different namespace from
+	 * the old, or we will have problems with the TEMP status of temp tables.
+	 */
+	snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
+
+	OIDNewHeap = heap_create_with_catalog(NewHeapName,
 										  RelationGetNamespace(OldHeap),
 										  NewTableSpace,
 										  InvalidOid,
@@ -776,12 +725,16 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
 }
 
 /*
- * Do the physical copying of heap data.  Returns the TransactionId used as
- * freeze cutoff point for the tuples.
+ * Do the physical copying of heap data.
+ *
+ * There are two output parameters:
+ * *pSwapToastByContent is set true if toast tables must be swapped by content.
+ * *pFreezeXid receives the TransactionId used as freeze cutoff point.
  */
-static TransactionId
+static void
 copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
-			   int freeze_min_age, int freeze_table_age)
+			   int freeze_min_age, int freeze_table_age,
+			   bool *pSwapToastByContent, TransactionId *pFreezeXid)
 {
 	Relation	NewHeap,
 				OldHeap,
@@ -842,13 +795,41 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 	/* use_wal off requires rd_targblock be initially invalid */
 	Assert(NewHeap->rd_targblock == InvalidBlockNumber);
 
+	/*
+	 * If both tables have TOAST tables, perform toast swap by content.  It is
+	 * possible that the old table has a toast table but the new one doesn't,
+	 * if toastable columns have been dropped.  In that case we have to do
+	 * swap by links.  This is okay because swap by content is only essential
+	 * for system catalogs, and we don't support schema changes for them.
+	 */
+	if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
+	{
+		*pSwapToastByContent = true;
+
+		/*
+		 * When doing swap by content, any toast pointers written into NewHeap
+		 * must use the old toast table's OID, because that's where the toast
+		 * data will eventually be found.  Set this up by setting rd_toastoid.
+		 * Note that we must hold NewHeap open until we are done writing data,
+		 * since the relcache will not guarantee to remember this setting once
+		 * the relation is closed.  Also, this technique depends on the fact
+		 * that no one will try to read from the NewHeap until after we've
+		 * finished writing it and swapping the rels --- otherwise they could
+		 * follow the toast pointers to the wrong place.
+		 */
+		NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
+	}
+	else
+		*pSwapToastByContent = false;
+
 	/*
 	 * compute xids used to freeze and weed out dead tuples.  We use -1
 	 * freeze_min_age to avoid having CLUSTER freeze tuples earlier than a
 	 * plain VACUUM would.
 	 */
 	vacuum_set_xid_limits(freeze_min_age, freeze_table_age,
-				OldHeap->rd_rel->relisshared, &OldestXmin, &FreezeXid, NULL);
+						  OldHeap->rd_rel->relisshared,
+						  &OldestXmin, &FreezeXid, NULL);
 
 	/*
 	 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
@@ -857,20 +838,23 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 	if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
 		FreezeXid = OldHeap->rd_rel->relfrozenxid;
 
+	/* return selected value to caller */
+	*pFreezeXid = FreezeXid;
+
 	/* Initialize the rewrite operation */
 	rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal);
 
 	/*
-	 * Scan through the OldHeap in OldIndex order and copy each tuple into the
-	 * NewHeap.  To ensure we see recently-dead tuples that still need to be
-	 * copied, we scan with SnapshotAny and use HeapTupleSatisfiesVacuum for
-	 * the visibility test.
+	 * Scan through the OldHeap, either in OldIndex order or sequentially,
+	 * and copy each tuple into the NewHeap.  To ensure we see recently-dead
+	 * tuples that still need to be copied, we scan with SnapshotAny and use
+	 * HeapTupleSatisfiesVacuum for the visibility test.
 	 */
 	if (OldIndex != NULL)
 	{
 		heapScan = NULL;
 		indexScan = index_beginscan(OldHeap, OldIndex,
-						   SnapshotAny, 0, (ScanKey) NULL);
+									SnapshotAny, 0, (ScanKey) NULL);
 	}
 	else
 	{
@@ -1005,6 +989,10 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 	/* Write out any remaining tuples, and fsync if needed */
 	end_heap_rewrite(rwstate);
 
+	/* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
+	NewHeap->rd_toastoid = InvalidOid;
+
+	/* Clean up */
 	pfree(values);
 	pfree(isnull);
 
@@ -1012,8 +1000,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 		index_close(OldIndex, NoLock);
 	heap_close(OldHeap, NoLock);
 	heap_close(NewHeap, NoLock);
-
-	return FreezeXid;
 }
 
 /*
@@ -1022,18 +1008,23 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
  * We swap the physical identity (reltablespace and relfilenode) while
  * keeping the same logical identities of the two relations.
  *
- * Also swap any TOAST links, so that the toast data moves along with
- * the main-table data.
+ * We can swap associated TOAST data in either of two ways: recursively swap
+ * the physical content of the toast tables (and their indexes), or swap the
+ * TOAST links in the given relations' pg_class entries.  The former is needed
+ * to manage rewrites of shared catalogs (where we cannot change the pg_class
+ * links) while the latter is the only way to handle cases in which a toast
+ * table is added or removed altogether.
  *
  * Additionally, the first relation is marked with relfrozenxid set to
  * frozenXid.  It seems a bit ugly to have this here, but all callers would
- * have to do it anyway, so having it here saves a heap_update.  Note: the
- * TOAST table needs no special handling, because since we swapped the links,
- * the entry for the TOAST table will now contain RecentXmin in relfrozenxid,
- * which is the correct value.
+ * have to do it anyway, so having it here saves a heap_update.  Note: in
+ * the swap-toast-links case, we assume we don't need to change the toast
+ * table's relfrozenxid: the new version of the toast table should already
+ * have relfrozenxid set to RecentXmin, which is good enough.
  */
 void
-swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
+swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
+					TransactionId frozenXid)
 {
 	Relation	relRelation;
 	HeapTuple	reltup1,
@@ -1071,15 +1062,26 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
 	relform1->reltablespace = relform2->reltablespace;
 	relform2->reltablespace = swaptemp;
 
-	swaptemp = relform1->reltoastrelid;
-	relform1->reltoastrelid = relform2->reltoastrelid;
-	relform2->reltoastrelid = swaptemp;
+	if (!swap_toast_by_content)
+	{
+		swaptemp = relform1->reltoastrelid;
+		relform1->reltoastrelid = relform2->reltoastrelid;
+		relform2->reltoastrelid = swaptemp;
 
-	/* we should not swap reltoastidxid */
+		/* we should not swap reltoastidxid */
+	}
+
+	/*
+	 * In the case of a shared catalog, these next few steps only affect our
+	 * own database's pg_class row; but that's okay.
+	 */
 
 	/* set rel1's frozen Xid */
-	Assert(TransactionIdIsNormal(frozenXid));
-	relform1->relfrozenxid = frozenXid;
+	if (relform1->relkind != RELKIND_INDEX)
+	{
+		Assert(TransactionIdIsNormal(frozenXid));
+		relform1->relfrozenxid = frozenXid;
+	}
 
 	/* swap size statistics too, since new rel has freshly-updated stats */
 	{
@@ -1107,63 +1109,96 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
 
 	/*
 	 * If we have toast tables associated with the relations being swapped,
-	 * change their dependency links to re-associate them with their new
-	 * owning relations.  Otherwise the wrong one will get dropped ...
-	 *
-	 * NOTE: it is possible that only one table has a toast table; this can
-	 * happen in CLUSTER if there were dropped columns in the old table, and
-	 * in ALTER TABLE when adding or changing type of columns.
-	 *
-	 * NOTE: at present, a TOAST table's only dependency is the one on its
-	 * owning table.  If more are ever created, we'd need to use something
-	 * more selective than deleteDependencyRecordsFor() to get rid of only the
-	 * link we want.
+	 * deal with them too.
 	 */
 	if (relform1->reltoastrelid || relform2->reltoastrelid)
 	{
-		ObjectAddress baseobject,
-					toastobject;
-		long		count;
-
-		/* Delete old dependencies */
-		if (relform1->reltoastrelid)
+		if (swap_toast_by_content)
 		{
-			count = deleteDependencyRecordsFor(RelationRelationId,
-											   relform1->reltoastrelid);
-			if (count != 1)
-				elog(ERROR, "expected one dependency record for TOAST table, found %ld",
-					 count);
+			if (relform1->reltoastrelid && relform2->reltoastrelid)
+			{
+				/* Recursively swap the contents of the toast tables */
+				swap_relation_files(relform1->reltoastrelid,
+									relform2->reltoastrelid,
+									true,
+									frozenXid);
+			}
+			else
+			{
+				/* caller messed up */
+				elog(ERROR, "cannot swap toast files by content when there's only one");
+			}
 		}
-		if (relform2->reltoastrelid)
+		else
 		{
-			count = deleteDependencyRecordsFor(RelationRelationId,
-											   relform2->reltoastrelid);
-			if (count != 1)
-				elog(ERROR, "expected one dependency record for TOAST table, found %ld",
-					 count);
-		}
+			/*
+			 * We swapped the ownership links, so we need to change dependency
+			 * data to match.
+			 *
+			 * NOTE: it is possible that only one table has a toast table.
+			 *
+			 * NOTE: at present, a TOAST table's only dependency is the one on
+			 * its owning table.  If more are ever created, we'd need to use
+			 * something more selective than deleteDependencyRecordsFor() to
+			 * get rid of just the link we want.
+			 */
+			ObjectAddress baseobject,
+						toastobject;
+			long		count;
+
+			/* Delete old dependencies */
+			if (relform1->reltoastrelid)
+			{
+				count = deleteDependencyRecordsFor(RelationRelationId,
+												   relform1->reltoastrelid);
+				if (count != 1)
+					elog(ERROR, "expected one dependency record for TOAST table, found %ld",
+						 count);
+			}
+			if (relform2->reltoastrelid)
+			{
+				count = deleteDependencyRecordsFor(RelationRelationId,
+												   relform2->reltoastrelid);
+				if (count != 1)
+					elog(ERROR, "expected one dependency record for TOAST table, found %ld",
+						 count);
+			}
 
-		/* Register new dependencies */
-		baseobject.classId = RelationRelationId;
-		baseobject.objectSubId = 0;
-		toastobject.classId = RelationRelationId;
-		toastobject.objectSubId = 0;
+			/* Register new dependencies */
+			baseobject.classId = RelationRelationId;
+			baseobject.objectSubId = 0;
+			toastobject.classId = RelationRelationId;
+			toastobject.objectSubId = 0;
 
-		if (relform1->reltoastrelid)
-		{
-			baseobject.objectId = r1;
-			toastobject.objectId = relform1->reltoastrelid;
-			recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
-		}
+			if (relform1->reltoastrelid)
+			{
+				baseobject.objectId = r1;
+				toastobject.objectId = relform1->reltoastrelid;
+				recordDependencyOn(&toastobject, &baseobject,
+								   DEPENDENCY_INTERNAL);
+			}
 
-		if (relform2->reltoastrelid)
-		{
-			baseobject.objectId = r2;
-			toastobject.objectId = relform2->reltoastrelid;
-			recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
+			if (relform2->reltoastrelid)
+			{
+				baseobject.objectId = r2;
+				toastobject.objectId = relform2->reltoastrelid;
+				recordDependencyOn(&toastobject, &baseobject,
+								   DEPENDENCY_INTERNAL);
+			}
 		}
 	}
 
+	/*
+	 * If we're swapping two toast tables by content, do the same for their
+	 * indexes.
+	 */
+	if (swap_toast_by_content &&
+		relform1->reltoastidxid && relform2->reltoastidxid)
+			swap_relation_files(relform1->reltoastidxid,
+								relform2->reltoastidxid,
+								true,
+								InvalidTransactionId);
+
 	/*
 	 * Blow away the old relcache entries now.	We need this kluge because
 	 * relcache.c keeps a link to the smgr relation for the physical file, and
@@ -1187,6 +1222,85 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
 	heap_close(relRelation, RowExclusiveLock);
 }
 
+/*
+ * Remove the transient table that was built by make_new_heap, and finish
+ * cleaning up (including rebuilding all indexes on the old heap).
+ */
+void
+cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool swap_toast_by_content)
+{
+	ObjectAddress object;
+
+	/* Make swap_relation_files' changes visible in the catalogs. */
+	CommandCounterIncrement();
+
+	/* Destroy new heap with old filenode */
+	object.classId = RelationRelationId;
+	object.objectId = OIDNewHeap;
+	object.objectSubId = 0;
+
+	/*
+	 * The new relation is local to our transaction and we know nothing
+	 * depends on it, so DROP_RESTRICT should be OK.
+	 */
+	performDeletion(&object, DROP_RESTRICT);
+
+	/* performDeletion does CommandCounterIncrement at end */
+
+	/*
+	 * Rebuild each index on the relation (but not the toast table, which is
+	 * all-new at this point).	We do not need CommandCounterIncrement()
+	 * because reindex_relation does it.
+	 */
+	reindex_relation(OIDOldHeap, false);
+
+	/*
+	 * At this point, everything is kosher except that, if we did toast swap
+	 * by links, the toast table's name corresponds to the transient table.
+	 * The name is irrelevant to the backend because it's referenced by OID,
+	 * but users looking at the catalogs could be confused.  Rename it to
+	 * prevent this problem.
+	 *
+	 * Note no lock required on the relation, because we already hold an
+	 * exclusive lock on it.
+	 */
+	if (!swap_toast_by_content)
+	{
+		Relation	newrel;
+
+		newrel = heap_open(OIDOldHeap, NoLock);
+		if (OidIsValid(newrel->rd_rel->reltoastrelid))
+		{
+			Relation	toastrel;
+			Oid			toastidx;
+			Oid			toastnamespace;
+			char		NewToastName[NAMEDATALEN];
+
+			toastrel = relation_open(newrel->rd_rel->reltoastrelid,
+									 AccessShareLock);
+			toastidx = toastrel->rd_rel->reltoastidxid;
+			toastnamespace = toastrel->rd_rel->relnamespace;
+			relation_close(toastrel, AccessShareLock);
+
+			/* rename the toast table ... */
+			snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
+					 OIDOldHeap);
+			RenameRelationInternal(newrel->rd_rel->reltoastrelid,
+								   NewToastName,
+								   toastnamespace);
+
+			/* ... and its index too */
+			snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
+					 OIDOldHeap);
+			RenameRelationInternal(toastidx,
+								   NewToastName,
+								   toastnamespace);
+		}
+		relation_close(newrel, NoLock);
+	}
+}
+
+
 /*
  * Get a list of tables that the current user owns and
  * have indisclustered set.  Return the list in a List * of rvsToCluster
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index a729adb0550ec6fe0aba3f4104ee43b4ba8ac61c..dba5f29d661bced4cc720a9e00b54d1af4ef28d6 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.323 2010/02/03 10:01:29 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.324 2010/02/04 00:09:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2866,11 +2866,9 @@ ATRewriteTables(List **wqueue)
 		if (tab->newvals != NIL || tab->new_changeoids)
 		{
 			/* Build a temporary relation and copy data */
+			Relation	OldHeap;
 			Oid			OIDNewHeap;
-			char		NewHeapName[NAMEDATALEN];
 			Oid			NewTableSpace;
-			Relation	OldHeap;
-			ObjectAddress object;
 
 			OldHeap = heap_open(tab->relid, NoLock);
 
@@ -2905,18 +2903,8 @@ ATRewriteTables(List **wqueue)
 
 			heap_close(OldHeap, NoLock);
 
-			/*
-			 * Create the new heap, using a temporary name in the same
-			 * namespace as the existing table.  NOTE: there is some risk of
-			 * collision with user relnames.  Working around this seems more
-			 * trouble than it's worth; in particular, we can't create the new
-			 * heap in a different namespace from the old, or we will have
-			 * problems with the TEMP status of temp tables.
-			 */
-			snprintf(NewHeapName, sizeof(NewHeapName),
-					 "pg_temp_%u", tab->relid);
-
-			OIDNewHeap = make_new_heap(tab->relid, NewHeapName, NewTableSpace);
+			/* Create transient table that will receive the modified data */
+			OIDNewHeap = make_new_heap(tab->relid, NewTableSpace);
 
 			/*
 			 * Copy the heap data into the new table with the desired
@@ -2929,30 +2917,14 @@ ATRewriteTables(List **wqueue)
 			 * Swap the physical files of the old and new heaps.  Since we are
 			 * generating a new heap, we can use RecentXmin for the table's
 			 * new relfrozenxid because we rewrote all the tuples on
-			 * ATRewriteTable, so no older Xid remains on the table.
-			 */
-			swap_relation_files(tab->relid, OIDNewHeap, RecentXmin);
-
-			CommandCounterIncrement();
-
-			/* Destroy new heap with old filenode */
-			object.classId = RelationRelationId;
-			object.objectId = OIDNewHeap;
-			object.objectSubId = 0;
-
-			/*
-			 * The new relation is local to our transaction and we know
-			 * nothing depends on it, so DROP_RESTRICT should be OK.
+			 * ATRewriteTable, so no older Xid remains in the table.  Also,
+			 * we never try to swap toast tables by content, since we have
+			 * no interest in letting this code work on system catalogs.
 			 */
-			performDeletion(&object, DROP_RESTRICT);
-			/* performDeletion does CommandCounterIncrement at end */
+			swap_relation_files(tab->relid, OIDNewHeap, false, RecentXmin);
 
-			/*
-			 * Rebuild each index on the relation (but not the toast table,
-			 * which is all-new anyway).  We do not need
-			 * CommandCounterIncrement() because reindex_relation does it.
-			 */
-			reindex_relation(tab->relid, false);
+			/* Destroy the new heap, removing the old data along with it. */
+			cleanup_heap_swap(tab->relid, OIDNewHeap, false);
 		}
 		else
 		{
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index ba09331aba9c0d135b0c2d2b8a049565f7f6ead8..e71416c0f70996cb7c40b221e2e2db4c3f8ee945 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.301 2010/02/03 01:14:17 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.302 2010/02/04 00:09:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1925,13 +1925,13 @@ RelationClearRelation(Relation relation, bool rebuild)
 		 * new entry, and this shouldn't happen often enough for that to be
 		 * a big problem.
 		 *
-		 * When rebuilding an open relcache entry, we must preserve ref count
-		 * and rd_createSubid/rd_newRelfilenodeSubid state.  Also attempt to
-		 * preserve the pg_class entry (rd_rel), tupledesc, and rewrite-rule
-		 * substructures in place, because various places assume that these
-		 * structures won't move while they are working with an open relcache
-		 * entry.  (Note: the refcount mechanism for tupledescs might someday
-		 * allow us to remove this hack for the tupledesc.)
+		 * When rebuilding an open relcache entry, we must preserve ref count,
+		 * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state.  Also
+		 * attempt to preserve the pg_class entry (rd_rel), tupledesc, and
+		 * rewrite-rule substructures in place, because various places assume
+		 * that these structures won't move while they are working with an
+		 * open relcache entry.  (Note: the refcount mechanism for tupledescs
+		 * might someday allow us to remove this hack for the tupledesc.)
 		 *
 		 * Note that this process does not touch CurrentResourceOwner; which
 		 * is good because whatever ref counts the entry may have do not
@@ -2005,6 +2005,8 @@ RelationClearRelation(Relation relation, bool rebuild)
 			SWAPFIELD(RuleLock *, rd_rules);
 			SWAPFIELD(MemoryContext, rd_rulescxt);
 		}
+		/* toast OID override must be preserved */
+		SWAPFIELD(Oid, rd_toastoid);
 		/* pgstat_info must be preserved */
 		SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
 
diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index c58013c640344ef86e2d0945113b8abb792b84de..f53578143677a00f0d46db8d8f800c6dacfc014e 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994-5, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/commands/cluster.h,v 1.38 2010/01/06 05:31:14 itagaki Exp $
+ * $PostgreSQL: pgsql/src/include/commands/cluster.h,v 1.39 2010/02/04 00:09:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,8 +23,11 @@ extern void cluster_rel(Oid tableOid, Oid indexOid, bool recheck,
 extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid,
 						   bool recheck);
 extern void mark_index_clustered(Relation rel, Oid indexOid);
-extern Oid make_new_heap(Oid OIDOldHeap, const char *NewName,
-			  Oid NewTableSpace);
-extern void swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid);
+
+extern Oid	make_new_heap(Oid OIDOldHeap, Oid NewTableSpace);
+extern void swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
+					TransactionId frozenXid);
+extern void cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
+							  bool swap_toast_by_content);
 
 #endif   /* CLUSTER_H */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 344f37a68729f0277e2b8209e52d93fee52641e4..3f5795d0eaec6a62d3c4d05feeeec0c65444facd 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.120 2010/01/17 22:56:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.121 2010/02/04 00:09:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -202,6 +202,16 @@ typedef struct RelationData
 	uint16	   *rd_exclstrats;	/* exclusion ops' strategy numbers, if any */
 	void	   *rd_amcache;		/* available for use by index AM */
 
+	/*
+	 * Hack for CLUSTER, rewriting ALTER TABLE, etc: when writing a new
+	 * version of a table, we need to make any toast pointers inserted into it
+	 * have the existing toast table's OID, not the OID of the transient toast
+	 * table.  If rd_toastoid isn't InvalidOid, it is the OID to place in
+	 * toast pointers inserted into this rel.  (Note it's set on the new
+	 * version of the main heap, not the toast table itself.)
+	 */
+	Oid			rd_toastoid;	/* Real TOAST table's OID, or InvalidOid */
+
 	/*
 	 * sizes of the free space and visibility map forks, or InvalidBlockNumber
 	 * if not known yet