From 69995c3b3fd64361bb4d3938315f3e88ccc01e53 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 27 Jul 2016 17:44:34 -0400
Subject: [PATCH] Fix cost_rescan() to account for multi-batch hashing
 correctly.

cost_rescan assumed that we don't need to rebuild the hash table when
rescanning a hash join.  However, that's currently only true for
single-batch joins; for a multi-batch join we must charge full freight.

This probably has escaped notice because we'd be unlikely to put a hash
join on the inside of a nestloop anyway.  Nonetheless, it's wrong.
Fix in HEAD, but don't backpatch for fear of destabilizing plans in
stable releases.
---
 src/backend/optimizer/path/costsize.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1c20edcdfeb..2a49639f125 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3114,11 +3114,21 @@ cost_rescan(PlannerInfo *root, Path *path,
 		case T_HashJoin:
 
 			/*
-			 * Assume that all of the startup cost represents hash table
-			 * building, which we won't have to do over.
+			 * If it's a single-batch join, we don't need to rebuild the hash
+			 * table during a rescan.
 			 */
-			*rescan_startup_cost = 0;
-			*rescan_total_cost = path->total_cost - path->startup_cost;
+			if (((HashPath *) path)->num_batches == 1)
+			{
+				/* Startup cost is exactly the cost of hash table building */
+				*rescan_startup_cost = 0;
+				*rescan_total_cost = path->total_cost - path->startup_cost;
+			}
+			else
+			{
+				/* Otherwise, no special treatment */
+				*rescan_startup_cost = path->startup_cost;
+				*rescan_total_cost = path->total_cost;
+			}
 			break;
 		case T_CteScan:
 		case T_WorkTableScan:
-- 
GitLab