diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 7c1e3d6bbfd29ae9c2f8bc916262d7c7c5776381..9377550ad672f2e06f3ebfadf58f72ad2f9fb7fc 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3262,6 +3262,8 @@ create_grouping_paths(PlannerInfo *root, RelOptInfo *grouped_rel; PathTarget *partial_grouping_target = NULL; AggClauseCosts agg_costs; + AggClauseCosts agg_partial_costs; /* parallel only */ + AggClauseCosts agg_final_costs; /* parallel only */ Size hashaggtablesize; double dNumGroups; double dNumPartialGroups = 0; @@ -3346,8 +3348,10 @@ create_grouping_paths(PlannerInfo *root, MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); if (parse->hasAggs) { - count_agg_clauses(root, (Node *) target->exprs, &agg_costs); - count_agg_clauses(root, parse->havingQual, &agg_costs); + count_agg_clauses(root, (Node *) target->exprs, &agg_costs, true, + false, false); + count_agg_clauses(root, parse->havingQual, &agg_costs, true, false, + false); } /* @@ -3422,6 +3426,25 @@ create_grouping_paths(PlannerInfo *root, NIL, NIL); + /* + * Collect statistics about aggregates for estimating costs of + * performing aggregation in parallel. + */ + MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts)); + MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts)); + if (parse->hasAggs) + { + /* partial phase */ + count_agg_clauses(root, (Node *) partial_grouping_target->exprs, + &agg_partial_costs, false, false, true); + + /* final phase */ + count_agg_clauses(root, (Node *) target->exprs, &agg_final_costs, + true, true, true); + count_agg_clauses(root, parse->havingQual, &agg_final_costs, true, + true, true); + } + if (can_sort) { /* Checked in set_grouped_rel_consider_parallel() */ @@ -3457,7 +3480,7 @@ create_grouping_paths(PlannerInfo *root, parse->groupClause ? AGG_SORTED : AGG_PLAIN, parse->groupClause, NIL, - &agg_costs, + &agg_partial_costs, dNumPartialGroups, false, false, @@ -3482,7 +3505,7 @@ create_grouping_paths(PlannerInfo *root, hashaggtablesize = estimate_hashagg_tablesize(cheapest_partial_path, - &agg_costs, + &agg_partial_costs, dNumPartialGroups); /* @@ -3499,7 +3522,7 @@ create_grouping_paths(PlannerInfo *root, AGG_HASHED, parse->groupClause, NIL, - &agg_costs, + &agg_partial_costs, dNumPartialGroups, false, false, @@ -3631,7 +3654,7 @@ create_grouping_paths(PlannerInfo *root, parse->groupClause ? AGG_SORTED : AGG_PLAIN, parse->groupClause, (List *) parse->havingQual, - &agg_costs, + &agg_final_costs, dNumGroups, true, true, @@ -3691,7 +3714,7 @@ create_grouping_paths(PlannerInfo *root, Path *path = (Path *) linitial(grouped_rel->partial_pathlist); hashaggtablesize = estimate_hashagg_tablesize(path, - &agg_costs, + &agg_final_costs, dNumGroups); if (hashaggtablesize < work_mem * 1024L) @@ -3713,7 +3736,7 @@ create_grouping_paths(PlannerInfo *root, AGG_HASHED, parse->groupClause, (List *) parse->havingQual, - &agg_costs, + &agg_final_costs, dNumGroups, true, true, diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 5674a73dfe0f834e4ab73ac2c06052dd8c7e495c..759566ad4616fff050e393d65b54bc2c7b9a0a5c 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -61,6 +61,9 @@ typedef struct { PlannerInfo *root; AggClauseCosts *costs; + bool finalizeAggs; + bool combineStates; + bool serialStates; } count_agg_clauses_context; typedef struct @@ -540,12 +543,16 @@ contain_agg_clause_walker(Node *node, void *context) * are no subqueries. There mustn't be outer-aggregate references either. */ void -count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs) +count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs, + bool finalizeAggs, bool combineStates, bool serialStates) { count_agg_clauses_context context; context.root = root; context.costs = costs; + context.finalizeAggs = finalizeAggs; + context.combineStates = combineStates; + context.serialStates = serialStates; (void) count_agg_clauses_walker(clause, &context); } @@ -562,6 +569,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context) Form_pg_aggregate aggform; Oid aggtransfn; Oid aggfinalfn; + Oid aggcombinefn; + Oid aggserialfn; + Oid aggdeserialfn; Oid aggtranstype; int32 aggtransspace; QualCost argcosts; @@ -583,6 +593,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context) aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); aggtransfn = aggform->aggtransfn; aggfinalfn = aggform->aggfinalfn; + aggcombinefn = aggform->aggcombinefn; + aggserialfn = aggform->aggserialfn; + aggdeserialfn = aggform->aggdeserialfn; aggtranstype = aggform->aggtranstype; aggtransspace = aggform->aggtransspace; ReleaseSysCache(aggTuple); @@ -592,28 +605,58 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context) if (aggref->aggorder != NIL || aggref->aggdistinct != NIL) costs->numOrderedAggs++; - /* add component function execution costs to appropriate totals */ - costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost; - if (OidIsValid(aggfinalfn)) - costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost; + /* + * Add the appropriate component function execution costs to + * appropriate totals. + */ + if (context->combineStates) + { + /* charge for combining previously aggregated states */ + costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost; - /* also add the input expressions' cost to per-input-row costs */ - cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root); - costs->transCost.startup += argcosts.startup; - costs->transCost.per_tuple += argcosts.per_tuple; + /* charge for deserialization, when appropriate */ + if (context->serialStates && OidIsValid(aggdeserialfn)) + costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost; + } + else + costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost; + + if (context->finalizeAggs) + { + if (OidIsValid(aggfinalfn)) + costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost; + } + else if (context->serialStates) + { + if (OidIsValid(aggserialfn)) + costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost; + } /* - * Add any filter's cost to per-input-row costs. - * - * XXX Ideally we should reduce input expression costs according to - * filter selectivity, but it's not clear it's worth the trouble. + * Some costs will already have been incurred by the initial aggregate + * node, so we mustn't include these again. */ - if (aggref->aggfilter) + if (!context->combineStates) { - cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter, - context->root); + /* add the input expressions' cost to per-input-row costs */ + cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root); costs->transCost.startup += argcosts.startup; costs->transCost.per_tuple += argcosts.per_tuple; + + /* + * Add any filter's cost to per-input-row costs. + * + * XXX Ideally we should reduce input expression costs according + * to filter selectivity, but it's not clear it's worth the + * trouble. + */ + if (aggref->aggfilter) + { + cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter, + context->root); + costs->transCost.startup += argcosts.startup; + costs->transCost.per_tuple += argcosts.per_tuple; + } } /* diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 3ab57f155d23736683497551c0b1a5997c3e5acc..1eb1eb4a543950fb8cbcf997cbd2e37165c5eb46 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -67,7 +67,8 @@ extern List *make_ands_implicit(Expr *clause); extern PartialAggType aggregates_allow_partial(Node *clause); extern bool contain_agg_clause(Node *clause); extern void count_agg_clauses(PlannerInfo *root, Node *clause, - AggClauseCosts *costs); + AggClauseCosts *costs, bool finalizeAggs, + bool combineStates, bool serialStates); extern bool contain_window_function(Node *clause); extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);