diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 7c1e3d6bbfd29ae9c2f8bc916262d7c7c5776381..9377550ad672f2e06f3ebfadf58f72ad2f9fb7fc 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3262,6 +3262,8 @@ create_grouping_paths(PlannerInfo *root,
 	RelOptInfo *grouped_rel;
 	PathTarget *partial_grouping_target = NULL;
 	AggClauseCosts agg_costs;
+	AggClauseCosts agg_partial_costs;	/* parallel only */
+	AggClauseCosts agg_final_costs;		/* parallel only */
 	Size		hashaggtablesize;
 	double		dNumGroups;
 	double		dNumPartialGroups = 0;
@@ -3346,8 +3348,10 @@ create_grouping_paths(PlannerInfo *root,
 	MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
 	if (parse->hasAggs)
 	{
-		count_agg_clauses(root, (Node *) target->exprs, &agg_costs);
-		count_agg_clauses(root, parse->havingQual, &agg_costs);
+		count_agg_clauses(root, (Node *) target->exprs, &agg_costs, true,
+						  false, false);
+		count_agg_clauses(root, parse->havingQual, &agg_costs, true, false,
+						  false);
 	}
 
 	/*
@@ -3422,6 +3426,25 @@ create_grouping_paths(PlannerInfo *root,
 												 NIL,
 												 NIL);
 
+		/*
+		 * Collect statistics about aggregates for estimating costs of
+		 * performing aggregation in parallel.
+		 */
+		MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
+		MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
+		if (parse->hasAggs)
+		{
+			/* partial phase */
+			count_agg_clauses(root, (Node *) partial_grouping_target->exprs,
+							  &agg_partial_costs, false, false, true);
+
+			/* final phase */
+			count_agg_clauses(root, (Node *) target->exprs, &agg_final_costs,
+							  true, true, true);
+			count_agg_clauses(root, parse->havingQual, &agg_final_costs, true,
+							  true, true);
+		}
+
 		if (can_sort)
 		{
 			/* Checked in set_grouped_rel_consider_parallel() */
@@ -3457,7 +3480,7 @@ create_grouping_paths(PlannerInfo *root,
 								parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 													parse->groupClause,
 													NIL,
-													&agg_costs,
+													&agg_partial_costs,
 													dNumPartialGroups,
 													false,
 													false,
@@ -3482,7 +3505,7 @@ create_grouping_paths(PlannerInfo *root,
 
 			hashaggtablesize =
 				estimate_hashagg_tablesize(cheapest_partial_path,
-										   &agg_costs,
+										   &agg_partial_costs,
 										   dNumPartialGroups);
 
 			/*
@@ -3499,7 +3522,7 @@ create_grouping_paths(PlannerInfo *root,
 											AGG_HASHED,
 											parse->groupClause,
 											NIL,
-											&agg_costs,
+											&agg_partial_costs,
 											dNumPartialGroups,
 											false,
 											false,
@@ -3631,7 +3654,7 @@ create_grouping_paths(PlannerInfo *root,
 								parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											parse->groupClause,
 											(List *) parse->havingQual,
-											&agg_costs,
+											&agg_final_costs,
 											dNumGroups,
 											true,
 											true,
@@ -3691,7 +3714,7 @@ create_grouping_paths(PlannerInfo *root,
 			Path   *path = (Path *) linitial(grouped_rel->partial_pathlist);
 
 			hashaggtablesize = estimate_hashagg_tablesize(path,
-														  &agg_costs,
+														  &agg_final_costs,
 														  dNumGroups);
 
 			if (hashaggtablesize < work_mem * 1024L)
@@ -3713,7 +3736,7 @@ create_grouping_paths(PlannerInfo *root,
 											AGG_HASHED,
 											parse->groupClause,
 											(List *) parse->havingQual,
-											&agg_costs,
+											&agg_final_costs,
 											dNumGroups,
 											true,
 											true,
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 5674a73dfe0f834e4ab73ac2c06052dd8c7e495c..759566ad4616fff050e393d65b54bc2c7b9a0a5c 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -61,6 +61,9 @@ typedef struct
 {
 	PlannerInfo *root;
 	AggClauseCosts *costs;
+	bool		finalizeAggs;
+	bool		combineStates;
+	bool		serialStates;
 } count_agg_clauses_context;
 
 typedef struct
@@ -540,12 +543,16 @@ contain_agg_clause_walker(Node *node, void *context)
  * are no subqueries.  There mustn't be outer-aggregate references either.
  */
 void
-count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs)
+count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs,
+				  bool finalizeAggs, bool combineStates, bool serialStates)
 {
 	count_agg_clauses_context context;
 
 	context.root = root;
 	context.costs = costs;
+	context.finalizeAggs = finalizeAggs;
+	context.combineStates = combineStates;
+	context.serialStates = serialStates;
 	(void) count_agg_clauses_walker(clause, &context);
 }
 
@@ -562,6 +569,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
 		Form_pg_aggregate aggform;
 		Oid			aggtransfn;
 		Oid			aggfinalfn;
+		Oid			aggcombinefn;
+		Oid			aggserialfn;
+		Oid			aggdeserialfn;
 		Oid			aggtranstype;
 		int32		aggtransspace;
 		QualCost	argcosts;
@@ -583,6 +593,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
 		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
 		aggtransfn = aggform->aggtransfn;
 		aggfinalfn = aggform->aggfinalfn;
+		aggcombinefn = aggform->aggcombinefn;
+		aggserialfn = aggform->aggserialfn;
+		aggdeserialfn = aggform->aggdeserialfn;
 		aggtranstype = aggform->aggtranstype;
 		aggtransspace = aggform->aggtransspace;
 		ReleaseSysCache(aggTuple);
@@ -592,28 +605,58 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
 		if (aggref->aggorder != NIL || aggref->aggdistinct != NIL)
 			costs->numOrderedAggs++;
 
-		/* add component function execution costs to appropriate totals */
-		costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
-		if (OidIsValid(aggfinalfn))
-			costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+		/*
+		 * Add the appropriate component function execution costs to
+		 * appropriate totals.
+		 */
+		if (context->combineStates)
+		{
+			/* charge for combining previously aggregated states */
+			costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
 
-		/* also add the input expressions' cost to per-input-row costs */
-		cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
-		costs->transCost.startup += argcosts.startup;
-		costs->transCost.per_tuple += argcosts.per_tuple;
+			/* charge for deserialization, when appropriate */
+			if (context->serialStates && OidIsValid(aggdeserialfn))
+				costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
+		}
+		else
+			costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
+
+		if (context->finalizeAggs)
+		{
+			if (OidIsValid(aggfinalfn))
+				costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+		}
+		else if (context->serialStates)
+		{
+			if (OidIsValid(aggserialfn))
+				costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
+		}
 
 		/*
-		 * Add any filter's cost to per-input-row costs.
-		 *
-		 * XXX Ideally we should reduce input expression costs according to
-		 * filter selectivity, but it's not clear it's worth the trouble.
+		 * Some costs will already have been incurred by the initial aggregate
+		 * node, so we mustn't include these again.
 		 */
-		if (aggref->aggfilter)
+		if (!context->combineStates)
 		{
-			cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
-								context->root);
+			/* add the input expressions' cost to per-input-row costs */
+			cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
 			costs->transCost.startup += argcosts.startup;
 			costs->transCost.per_tuple += argcosts.per_tuple;
+
+			/*
+			 * Add any filter's cost to per-input-row costs.
+			 *
+			 * XXX Ideally we should reduce input expression costs according
+			 * to filter selectivity, but it's not clear it's worth the
+			 * trouble.
+			 */
+			if (aggref->aggfilter)
+			{
+				cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
+									context->root);
+				costs->transCost.startup += argcosts.startup;
+				costs->transCost.per_tuple += argcosts.per_tuple;
+			}
 		}
 
 		/*
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 3ab57f155d23736683497551c0b1a5997c3e5acc..1eb1eb4a543950fb8cbcf997cbd2e37165c5eb46 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -67,7 +67,8 @@ extern List *make_ands_implicit(Expr *clause);
 extern PartialAggType aggregates_allow_partial(Node *clause);
 extern bool contain_agg_clause(Node *clause);
 extern void count_agg_clauses(PlannerInfo *root, Node *clause,
-				  AggClauseCosts *costs);
+				  AggClauseCosts *costs, bool finalizeAggs,
+				  bool combineStates, bool serialStates);
 
 extern bool contain_window_function(Node *clause);
 extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);