diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 6985b59aeb477044d27f21a3b29285f4bb58ee4e..e91044bc4adb314b570b5c6d94f1e8fad4489b70 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1837,6 +1837,22 @@ _copyPlaceHolderInfo(PlaceHolderInfo *from) return newnode; } +/* + * _copyMinMaxAggInfo + */ +static MinMaxAggInfo * +_copyMinMaxAggInfo(MinMaxAggInfo *from) +{ + MinMaxAggInfo *newnode = makeNode(MinMaxAggInfo); + + COPY_SCALAR_FIELD(aggfnoid); + COPY_SCALAR_FIELD(aggsortop); + COPY_NODE_FIELD(target); + COPY_NODE_FIELD(pathkeys); + + return newnode; +} + /* **************************************************************** * parsenodes.h copy functions * **************************************************************** @@ -3921,6 +3937,9 @@ copyObject(void *from) case T_PlaceHolderInfo: retval = _copyPlaceHolderInfo(from); break; + case T_MinMaxAggInfo: + retval = _copyMinMaxAggInfo(from); + break; /* * VALUE NODES diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 300b0b55a8f520401c0d9a58fad82c7a4c51658f..73b28f96c0e8f47d35d6ca6a4051fa2f10d3d71c 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -844,6 +844,17 @@ _equalPlaceHolderInfo(PlaceHolderInfo *a, PlaceHolderInfo *b) return true; } +static bool +_equalMinMaxAggInfo(MinMaxAggInfo *a, MinMaxAggInfo *b) +{ + COMPARE_SCALAR_FIELD(aggfnoid); + COMPARE_SCALAR_FIELD(aggsortop); + COMPARE_NODE_FIELD(target); + COMPARE_NODE_FIELD(pathkeys); + + return true; +} + /* * Stuff from parsenodes.h @@ -2568,6 +2579,9 @@ equal(void *a, void *b) case T_PlaceHolderInfo: retval = _equalPlaceHolderInfo(a, b); break; + case T_MinMaxAggInfo: + retval = _equalMinMaxAggInfo(a, b); + break; case T_List: case T_IntList: diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e53befa2f4299efca4c889dadefd36320c84557b..699cf8df83197b45ae1852ea2d5dba341112e7eb 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1608,6 +1608,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node) WRITE_NODE_FIELD(window_pathkeys); WRITE_NODE_FIELD(distinct_pathkeys); WRITE_NODE_FIELD(sort_pathkeys); + WRITE_NODE_FIELD(minmax_aggs); WRITE_FLOAT_FIELD(total_table_pages, "%.0f"); WRITE_FLOAT_FIELD(tuple_fraction, "%.4f"); WRITE_BOOL_FIELD(hasInheritedTarget); @@ -1808,6 +1809,17 @@ _outPlaceHolderInfo(StringInfo str, PlaceHolderInfo *node) WRITE_INT_FIELD(ph_width); } +static void +_outMinMaxAggInfo(StringInfo str, MinMaxAggInfo *node) +{ + WRITE_NODE_TYPE("MINMAXAGGINFO"); + + WRITE_OID_FIELD(aggfnoid); + WRITE_OID_FIELD(aggsortop); + WRITE_NODE_FIELD(target); + WRITE_NODE_FIELD(pathkeys); +} + static void _outPlannerParamItem(StringInfo str, PlannerParamItem *node) { @@ -2845,6 +2857,9 @@ _outNode(StringInfo str, void *obj) case T_PlaceHolderInfo: _outPlaceHolderInfo(str, obj); break; + case T_MinMaxAggInfo: + _outMinMaxAggInfo(str, obj); + break; case T_PlannerParamItem: _outPlannerParamItem(str, obj); break; diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 20c6d73617d507b88fce427a681320a684b47cd0..8af0c6dc482372244b15a8a5f5a4a562e34ac91b 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -912,6 +912,39 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, return pathkeys; } +/**************************************************************************** + * PATHKEYS AND AGGREGATES + ****************************************************************************/ + +/* + * make_pathkeys_for_aggregate + * Generate a pathkeys list (always a 1-item list) that represents + * the sort order needed by a MIN/MAX aggregate + * + * This is only called before EquivalenceClass merging, so we can assume + * we are not supposed to canonicalize. + */ +List * +make_pathkeys_for_aggregate(PlannerInfo *root, + Expr *aggtarget, + Oid aggsortop) +{ + PathKey *pathkey; + + /* + * We arbitrarily set nulls_first to false. Actually, a MIN/MAX agg can + * use either nulls ordering option, but that is dealt with elsewhere. + */ + pathkey = make_pathkey_from_sortinfo(root, + aggtarget, + aggsortop, + false, /* nulls_first */ + 0, + true, + false); + return list_make1(pathkey); +} + /**************************************************************************** * PATHKEYS AND MERGECLAUSES ****************************************************************************/ @@ -1379,10 +1412,11 @@ make_inner_pathkeys_for_merge(PlannerInfo *root, * PATHKEY USEFULNESS CHECKS * * We only want to remember as many of the pathkeys of a path as have some - * potential use, either for subsequent mergejoins or for meeting the query's - * requested output ordering. This ensures that add_path() won't consider - * a path to have a usefully different ordering unless it really is useful. - * These routines check for usefulness of given pathkeys. + * potential use, which can include subsequent mergejoins, meeting the query's + * requested output ordering, or implementing MIN/MAX aggregates. This + * ensures that add_path() won't consider a path to have a usefully different + * ordering unless it really is useful. These routines check for usefulness + * of given pathkeys. ****************************************************************************/ /* @@ -1403,7 +1437,7 @@ make_inner_pathkeys_for_merge(PlannerInfo *root, * that direction should be preferred, in hopes of avoiding a final sort step. * right_merge_direction() implements this heuristic. */ -int +static int pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys) { int useful = 0; @@ -1506,7 +1540,7 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey) * no good to order by just the first key(s) of the requested ordering. * So the result is always either 0 or list_length(root->query_pathkeys). */ -int +static int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) { if (root->query_pathkeys == NIL) @@ -1524,6 +1558,50 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return 0; /* path ordering not useful */ } +/* + * pathkeys_useful_for_minmax + * Count the number of pathkeys that are useful for implementing + * some MIN/MAX aggregate. + * + * Like pathkeys_useful_for_ordering, this is a yes-or-no affair, but + * there could be several MIN/MAX aggregates and we can match to any one. + * + * We can't use pathkeys_contained_in() because we would like to match + * pathkeys regardless of the nulls_first setting. However, we know that + * MIN/MAX aggregates will have at most one item in their pathkeys, so it's + * not too complicated to match by brute force. + */ +static int +pathkeys_useful_for_minmax(PlannerInfo *root, List *pathkeys) +{ + PathKey *pathkey; + ListCell *lc; + + if (pathkeys == NIL) + return 0; /* unordered path */ + pathkey = (PathKey *) linitial(pathkeys); + + foreach(lc, root->minmax_aggs) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + PathKey *mmpathkey; + + /* Ignore minmax agg if its pathkey turned out to be redundant */ + if (mminfo->pathkeys == NIL) + continue; + + Assert(list_length(mminfo->pathkeys) == 1); + mmpathkey = (PathKey *) linitial(mminfo->pathkeys); + + if (mmpathkey->pk_eclass == pathkey->pk_eclass && + mmpathkey->pk_opfamily == pathkey->pk_opfamily && + mmpathkey->pk_strategy == pathkey->pk_strategy) + return 1; + } + + return 0; /* path ordering not useful */ +} + /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -1535,11 +1613,15 @@ truncate_useless_pathkeys(PlannerInfo *root, { int nuseful; int nuseful2; + int nuseful3; nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; + nuseful3 = pathkeys_useful_for_minmax(root, pathkeys); + if (nuseful3 > nuseful) + nuseful = nuseful3; /* * Note: not safe to modify input list destructively, but we can avoid @@ -1565,8 +1647,8 @@ truncate_useless_pathkeys(PlannerInfo *root, * * We could make the test more complex, for example checking to see if any of * the joinclauses are really mergejoinable, but that likely wouldn't win - * often enough to repay the extra cycles. Queries with neither a join nor - * a sort are reasonably common, though, so this much work seems worthwhile. + * often enough to repay the extra cycles. Queries with no join, sort, or + * aggregate at all are reasonably common, so this much work seems worthwhile. */ bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) @@ -1575,5 +1657,7 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) return true; /* might be able to use pathkeys for merging */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ + if (root->minmax_aggs != NIL) + return true; /* might be able to use them for MIN/MAX */ return false; /* definitely useless */ } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 56647c93ab3688da50c531499a182c3d0572bd2c..7a84bd91239895e400862647bc25724275cab583 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -81,6 +81,7 @@ static Node *replace_nestloop_params(PlannerInfo *root, Node *expr); static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root); static List *fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, List *indexquals); +static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index); static List *get_switched_clauses(List *clauses, Relids outerrelids); static List *order_qual_clauses(PlannerInfo *root, List *clauses); static void copy_path_costsize(Plan *dest, Path *src); @@ -2396,10 +2397,8 @@ fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, /* * fix_indexqual_operand * Convert an indexqual expression to a Var referencing the index column. - * - * This is exported because planagg.c needs it. */ -Node * +static Node * fix_indexqual_operand(Node *node, IndexOptInfo *index) { /* diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index b2765613a4f7677f771b9e16b97ce1c43ec62e02..9a18be2046e28a8a569cd6eacc49cffb48925ab3 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -3,6 +3,17 @@ * planagg.c * Special planning for aggregate queries. * + * This module tries to replace MIN/MAX aggregate functions by subqueries + * of the form + * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1) + * Given a suitable index on tab.col, this can be much faster than the + * generic scan-all-the-rows aggregation plan. We can handle multiple + * MIN/MAX aggregates by generating multiple subqueries, and their + * orderings can be different. However, if the query contains any + * non-optimizable aggregates, there's no point since we'll have to + * scan all the rows anyway. + * + * * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -24,71 +35,62 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" -#include "optimizer/predtest.h" +#include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" #include "optimizer/subselect.h" -#include "parser/parse_clause.h" #include "parser/parsetree.h" #include "utils/lsyscache.h" #include "utils/syscache.h" +/* Per-aggregate info during optimize_minmax_aggregates() */ typedef struct { - Oid aggfnoid; /* pg_proc Oid of the aggregate */ - Oid aggsortop; /* Oid of its sort operator */ - Expr *target; /* expression we are aggregating on */ - NullTest *notnulltest; /* expression for "target IS NOT NULL" */ - IndexPath *path; /* access path for index scan */ + MinMaxAggInfo *mminfo; /* info gathered by preprocessing */ + Path *path; /* access path for ordered scan */ Cost pathcost; /* estimated cost to fetch first row */ - bool nulls_first; /* null ordering direction matching index */ Param *param; /* param for subplan's output */ -} MinMaxAggInfo; +} PrivateMMAggInfo; static bool find_minmax_aggs_walker(Node *node, List **context); -static bool build_minmax_path(PlannerInfo *root, RelOptInfo *rel, - MinMaxAggInfo *info); -static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info, - IndexOptInfo *index, int indexcol); -static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info); -static void attach_notnull_index_qual(MinMaxAggInfo *info, IndexScan *iplan); +static PrivateMMAggInfo *find_minmax_path(PlannerInfo *root, RelOptInfo *rel, + MinMaxAggInfo *mminfo); +static bool path_usable_for_agg(Path *path); +static void make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, + PrivateMMAggInfo *info); +static void add_notnull_qual(PlannerInfo *root, RelOptInfo *rel, + PrivateMMAggInfo *info, Path *path); static Node *replace_aggs_with_params_mutator(Node *node, List **context); static Oid fetch_agg_sort_op(Oid aggfnoid); /* - * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes + * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates * - * This checks to see if we can replace MIN/MAX aggregate functions by - * subqueries of the form - * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1) - * Given a suitable index on tab.col, this can be much faster than the - * generic scan-all-the-rows plan. + * Check to see whether the query contains MIN/MAX aggregate functions that + * might be optimizable via indexscans. If it does, and all the aggregates + * are potentially optimizable, then set up root->minmax_aggs with a list of + * these aggregates. * - * We are passed the preprocessed tlist, and the best path - * devised for computing the input of a standard Agg node. If we are able - * to optimize all the aggregates, and the result is estimated to be cheaper - * than the generic aggregate method, then generate and return a Plan that - * does it that way. Otherwise, return NULL. + * Note: we are passed the preprocessed targetlist separately, because it's + * not necessarily equal to root->parse->targetList. */ -Plan * -optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) +void +preprocess_minmax_aggregates(PlannerInfo *root, List *tlist) { Query *parse = root->parse; FromExpr *jtnode; RangeTblRef *rtr; RangeTblEntry *rte; - RelOptInfo *rel; List *aggs_list; - ListCell *l; - Cost total_cost; - Path agg_p; - Plan *plan; - Node *hqual; - QualCost tlist_cost; + ListCell *lc; + + /* minmax_aggs list should be empty at this point */ + Assert(root->minmax_aggs == NIL); /* Nothing to do if query has no aggregates */ if (!parse->hasAggs) - return NULL; + return; Assert(!parse->setOperations); /* shouldn't get here if a setop */ Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ @@ -101,63 +103,126 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) * so there's not much point in optimizing MIN/MAX. */ if (parse->groupClause || parse->hasWindowFuncs) - return NULL; + return; /* * We also restrict the query to reference exactly one table, since join * conditions can't be handled reasonably. (We could perhaps handle a * query containing cartesian-product joins, but it hardly seems worth the * trouble.) However, the single real table could be buried in several - * levels of FromExpr. + * levels of FromExpr due to subqueries. Note the single table could be + * an inheritance parent, too. */ jtnode = parse->jointree; while (IsA(jtnode, FromExpr)) { if (list_length(jtnode->fromlist) != 1) - return NULL; + return; jtnode = linitial(jtnode->fromlist); } if (!IsA(jtnode, RangeTblRef)) - return NULL; + return; rtr = (RangeTblRef *) jtnode; rte = planner_rt_fetch(rtr->rtindex, root); - if (rte->rtekind != RTE_RELATION || rte->inh) - return NULL; - rel = find_base_rel(root, rtr->rtindex); + if (rte->rtekind != RTE_RELATION) + return; /* - * Since this optimization is not applicable all that often, we want to - * fall out before doing very much work if possible. Therefore we do the - * work in several passes. The first pass scans the tlist and HAVING qual - * to find all the aggregates and verify that each of them is a MIN/MAX - * aggregate. If that succeeds, the second pass looks at each aggregate - * to see if it is optimizable; if so we make an IndexPath describing how - * we would scan it. (We do not try to optimize if only some aggs are - * optimizable, since that means we'll have to scan all the rows anyway.) - * If that succeeds, we have enough info to compare costs against the - * generic implementation. Only if that test passes do we build a Plan. + * Scan the tlist and HAVING qual to find all the aggregates and verify + * all are MIN/MAX aggregates. Stop as soon as we find one that isn't. */ - - /* Pass 1: find all the aggregates */ aggs_list = NIL; if (find_minmax_aggs_walker((Node *) tlist, &aggs_list)) - return NULL; + return; if (find_minmax_aggs_walker(parse->havingQual, &aggs_list)) + return; + + /* + * OK, there is at least the possibility of performing the optimization. + * Build pathkeys (and thereby EquivalenceClasses) for each aggregate. + * The existence of the EquivalenceClasses will prompt the path generation + * logic to try to build paths matching the desired sort ordering(s). + * + * Note: the pathkeys are non-canonical at this point. They'll be fixed + * later by canonicalize_all_pathkeys(). + */ + foreach(lc, aggs_list) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + + mminfo->pathkeys = make_pathkeys_for_aggregate(root, + mminfo->target, + mminfo->aggsortop); + } + + /* + * We're done until path generation is complete. Save info for later. + */ + root->minmax_aggs = aggs_list; +} + +/* + * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes + * + * Check to see whether all the aggregates are in fact optimizable into + * indexscans. If so, and the result is estimated to be cheaper than the + * generic aggregate method, then generate and return a Plan that does it + * that way. Otherwise, return NULL. + * + * We are passed the preprocessed tlist, as well as the best path devised for + * computing the input of a standard Agg node. + */ +Plan * +optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) +{ + Query *parse = root->parse; + FromExpr *jtnode; + RangeTblRef *rtr; + RelOptInfo *rel; + List *aggs_list; + ListCell *lc; + Cost total_cost; + Path agg_p; + Plan *plan; + Node *hqual; + QualCost tlist_cost; + + /* Nothing to do if preprocess_minmax_aggs rejected the query */ + if (root->minmax_aggs == NIL) return NULL; - /* Pass 2: see if each one is optimizable */ + /* Re-locate the one real table identified by preprocess_minmax_aggs */ + jtnode = parse->jointree; + while (IsA(jtnode, FromExpr)) + { + Assert(list_length(jtnode->fromlist) == 1); + jtnode = linitial(jtnode->fromlist); + } + Assert(IsA(jtnode, RangeTblRef)); + rtr = (RangeTblRef *) jtnode; + rel = find_base_rel(root, rtr->rtindex); + + /* + * Examine each agg to see if we can find a suitable ordered path for it. + * Give up if any agg isn't indexable. + */ + aggs_list = NIL; total_cost = 0; - foreach(l, aggs_list) + foreach(lc, root->minmax_aggs) { - MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l); + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + PrivateMMAggInfo *info; - if (!build_minmax_path(root, rel, info)) + info = find_minmax_path(root, rel, mminfo); + if (!info) return NULL; + aggs_list = lappend(aggs_list, info); total_cost += info->pathcost; } /* - * Make the cost comparison. + * Now we have enough info to compare costs against the generic aggregate + * implementation. * * Note that we don't include evaluation cost of the tlist here; this is * OK since it isn't included in best_path's cost either, and should be @@ -173,12 +238,12 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) /* * OK, we are going to generate an optimized plan. + * + * First, generate a subplan and output Param node for each agg. */ - - /* Pass 3: generate subplans and output Param nodes */ - foreach(l, aggs_list) + foreach(lc, aggs_list) { - make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l)); + make_agg_subplan(root, rel, (PrivateMMAggInfo *) lfirst(lc)); } /* @@ -241,36 +306,43 @@ find_minmax_aggs_walker(Node *node, List **context) Aggref *aggref = (Aggref *) node; Oid aggsortop; TargetEntry *curTarget; - MinMaxAggInfo *info; + MinMaxAggInfo *mminfo; ListCell *l; Assert(aggref->agglevelsup == 0); if (list_length(aggref->args) != 1 || aggref->aggorder != NIL) return true; /* it couldn't be MIN/MAX */ /* note: we do not care if DISTINCT is mentioned ... */ + curTarget = (TargetEntry *) linitial(aggref->args); aggsortop = fetch_agg_sort_op(aggref->aggfnoid); if (!OidIsValid(aggsortop)) return true; /* not a MIN/MAX aggregate */ + if (contain_mutable_functions((Node *) curTarget->expr)) + return true; /* not potentially indexable */ + + if (type_is_rowtype(exprType((Node *) curTarget->expr))) + return true; /* IS NOT NULL would have weird semantics */ + /* * Check whether it's already in the list, and add it if not. */ - curTarget = (TargetEntry *) linitial(aggref->args); foreach(l, *context) { - info = (MinMaxAggInfo *) lfirst(l); - if (info->aggfnoid == aggref->aggfnoid && - equal(info->target, curTarget->expr)) + mminfo = (MinMaxAggInfo *) lfirst(l); + if (mminfo->aggfnoid == aggref->aggfnoid && + equal(mminfo->target, curTarget->expr)) return false; } - info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo)); - info->aggfnoid = aggref->aggfnoid; - info->aggsortop = aggsortop; - info->target = curTarget->expr; + mminfo = makeNode(MinMaxAggInfo); + mminfo->aggfnoid = aggref->aggfnoid; + mminfo->aggsortop = aggsortop; + mminfo->target = curTarget->expr; + mminfo->pathkeys = NIL; /* don't compute pathkeys yet */ - *context = lappend(*context, info); + *context = lappend(*context, mminfo); /* * We need not recurse into the argument, since it can't contain any @@ -284,204 +356,151 @@ find_minmax_aggs_walker(Node *node, List **context) } /* - * build_minmax_path - * Given a MIN/MAX aggregate, try to find an index it can be optimized - * with. Build a Path describing the best such index path. - * - * Returns TRUE if successful, FALSE if not. In the TRUE case, info->path - * is filled in. + * find_minmax_path + * Given a MIN/MAX aggregate, try to find an ordered Path it can be + * optimized with. * - * XXX look at sharing more code with indxpath.c. - * - * Note: check_partial_indexes() must have been run previously. + * If successful, build and return a PrivateMMAggInfo struct. Otherwise, + * return NULL. */ -static bool -build_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *info) +static PrivateMMAggInfo * +find_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *mminfo) { - IndexPath *best_path = NULL; + PrivateMMAggInfo *info; + Path *best_path = NULL; Cost best_cost = 0; - bool best_nulls_first = false; - NullTest *ntest; - List *allquals; - ListCell *l; - - /* Build "target IS NOT NULL" expression for use below */ - ntest = makeNode(NullTest); - ntest->nulltesttype = IS_NOT_NULL; - ntest->arg = copyObject(info->target); - ntest->argisrow = type_is_rowtype(exprType((Node *) ntest->arg)); - if (ntest->argisrow) - return false; /* punt on composites */ - info->notnulltest = ntest; + double path_fraction; + PathKey *mmpathkey; + ListCell *lc; /* - * Build list of existing restriction clauses plus the notnull test. We - * cheat a bit by not bothering with a RestrictInfo node for the notnull - * test --- predicate_implied_by() won't care. + * Punt if the aggregate's pathkey turned out to be redundant, ie its + * pathkeys list is now empty. This would happen with something like + * "SELECT max(x) ... WHERE x = constant". There's no need to try to + * optimize such a case, because if there is an index that would help, + * it should already have been used with the WHERE clause. */ - allquals = list_concat(list_make1(ntest), rel->baserestrictinfo); + if (mminfo->pathkeys == NIL) + return NULL; - foreach(l, rel->indexlist) - { - IndexOptInfo *index = (IndexOptInfo *) lfirst(l); - ScanDirection indexscandir = NoMovementScanDirection; - int indexcol; - int prevcol; - List *restrictclauses; - IndexPath *new_path; - Cost new_cost; - bool found_clause; + /* + * Search the paths that were generated for the rel to see if there are + * any with the desired ordering. There could be multiple such paths, + * in which case take the cheapest (as measured according to how fast it + * will be to fetch the first row). + * + * We can't use pathkeys_contained_in() to check the ordering, because we + * would like to match pathkeys regardless of the nulls_first setting. + * However, we know that MIN/MAX aggregates will have at most one item in + * their pathkeys, so it's not too complicated to match by brute force. + * + * Note: this test ignores the possible costs associated with skipping + * NULL tuples. We assume that adding the not-null criterion to the + * indexqual doesn't really cost anything. + */ + if (rel->rows > 1.0) + path_fraction = 1.0 / rel->rows; + else + path_fraction = 1.0; - /* Ignore non-btree indexes */ - if (index->relam != BTREE_AM_OID) - continue; + Assert(list_length(mminfo->pathkeys) == 1); + mmpathkey = (PathKey *) linitial(mminfo->pathkeys); - /* - * Ignore partial indexes that do not match the query --- unless their - * predicates can be proven from the baserestrict list plus the IS NOT - * NULL test. In that case we can use them. - */ - if (index->indpred != NIL && !index->predOK && - !predicate_implied_by(index->indpred, allquals)) - continue; + foreach(lc, rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + PathKey *pathkey; + Cost path_cost; - /* - * Look for a match to one of the index columns. (In a stupidly - * designed index, there could be multiple matches, but we only care - * about the first one.) - */ - for (indexcol = 0; indexcol < index->ncolumns; indexcol++) - { - indexscandir = match_agg_to_index_col(info, index, indexcol); - if (!ScanDirectionIsNoMovement(indexscandir)) - break; - } - if (ScanDirectionIsNoMovement(indexscandir)) - continue; + if (path->pathkeys == NIL) + continue; /* unordered path */ + pathkey = (PathKey *) linitial(path->pathkeys); - /* - * If the match is not at the first index column, we have to verify - * that there are "x = something" restrictions on all the earlier - * index columns. Since we'll need the restrictclauses list anyway to - * build the path, it's convenient to extract that first and then look - * through it for the equality restrictions. - */ - restrictclauses = group_clauses_by_indexkey(index, - index->rel->baserestrictinfo, - NIL, - NULL, - SAOP_FORBID, - &found_clause); - - if (list_length(restrictclauses) < indexcol) - continue; /* definitely haven't got enough */ - for (prevcol = 0; prevcol < indexcol; prevcol++) + if (mmpathkey->pk_eclass == pathkey->pk_eclass && + mmpathkey->pk_opfamily == pathkey->pk_opfamily && + mmpathkey->pk_strategy == pathkey->pk_strategy) { - List *rinfos = (List *) list_nth(restrictclauses, prevcol); - ListCell *ll; - - foreach(ll, rinfos) + /* + * OK, it has the right ordering; is it acceptable otherwise? + * (We test in this order because the pathkey check is cheap.) + */ + if (path_usable_for_agg(path)) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll); - int strategy; - - /* Could be an IS_NULL test, if so ignore */ - if (!is_opclause(rinfo->clause)) - continue; - strategy = - get_op_opfamily_strategy(((OpExpr *) rinfo->clause)->opno, - index->opfamily[prevcol]); - if (strategy == BTEqualStrategyNumber) - break; + /* + * It'll work; but is it the cheapest? + * + * Note: cost calculation here should match + * compare_fractional_path_costs(). + */ + path_cost = path->startup_cost + + path_fraction * (path->total_cost - path->startup_cost); + + if (best_path == NULL || path_cost < best_cost) + { + best_path = path; + best_cost = path_cost; + } } - if (ll == NULL) - break; /* none are Equal for this index col */ - } - if (prevcol < indexcol) - continue; /* didn't find all Equal clauses */ - - /* - * Build the access path. We don't bother marking it with pathkeys. - */ - new_path = create_index_path(root, index, - restrictclauses, - NIL, - indexscandir, - NULL); - - /* - * Estimate actual cost of fetching just one row. - */ - if (new_path->rows > 1.0) - new_cost = new_path->path.startup_cost + - (new_path->path.total_cost - new_path->path.startup_cost) - * 1.0 / new_path->rows; - else - new_cost = new_path->path.total_cost; - - /* - * Keep if first or if cheaper than previous best. - */ - if (best_path == NULL || new_cost < best_cost) - { - best_path = new_path; - best_cost = new_cost; - if (ScanDirectionIsForward(indexscandir)) - best_nulls_first = index->nulls_first[indexcol]; - else - best_nulls_first = !index->nulls_first[indexcol]; } } + /* Fail if no suitable path */ + if (best_path == NULL) + return NULL; + + /* Construct private state for further processing */ + info = (PrivateMMAggInfo *) palloc(sizeof(PrivateMMAggInfo)); + info->mminfo = mminfo; info->path = best_path; info->pathcost = best_cost; - info->nulls_first = best_nulls_first; - return (best_path != NULL); + info->param = NULL; /* will be set later */ + + return info; } /* - * match_agg_to_index_col - * Does an aggregate match an index column? - * - * It matches if its argument is equal to the index column's data and its - * sortop is either the forward or reverse sort operator for the column. - * - * We return ForwardScanDirection if match the forward sort operator, - * BackwardScanDirection if match the reverse sort operator, - * and NoMovementScanDirection if there's no match. + * To be usable, a Path needs to be an IndexPath on a btree index, or be a + * MergeAppendPath of such IndexPaths. This restriction is mainly because + * we need to be sure the index can handle an added NOT NULL constraint at + * minimal additional cost. If you wish to relax it, you'll need to improve + * add_notnull_qual() too. */ -static ScanDirection -match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol) +static bool +path_usable_for_agg(Path *path) { - ScanDirection result; - - /* Check for operator match first (cheaper) */ - if (info->aggsortop == index->fwdsortop[indexcol]) - result = ForwardScanDirection; - else if (info->aggsortop == index->revsortop[indexcol]) - result = BackwardScanDirection; - else - return NoMovementScanDirection; + if (IsA(path, IndexPath)) + { + IndexPath *ipath = (IndexPath *) path; - /* Check for data match */ - if (!match_index_to_operand((Node *) info->target, indexcol, index)) - return NoMovementScanDirection; + /* OK if it's a btree index */ + if (ipath->indexinfo->relam == BTREE_AM_OID) + return true; + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *) path; + ListCell *lc; - return result; + foreach(lc, mpath->subpaths) + { + if (!path_usable_for_agg((Path *) lfirst(lc))) + return false; + } + return true; + } + return false; } /* * Construct a suitable plan for a converted aggregate query */ static void -make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) +make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info) { PlannerInfo subroot; Query *subparse; Plan *plan; - IndexScan *iplan; TargetEntry *tle; - SortGroupClause *sortcl; /* * Generate a suitably modified query. Much of the work here is probably @@ -500,58 +519,37 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) subparse->groupClause = NIL; subparse->havingQual = NULL; subparse->distinctClause = NIL; + subparse->sortClause = NIL; subroot.hasHavingQual = false; /* single tlist entry that is the aggregate target */ - tle = makeTargetEntry(copyObject(info->target), + tle = makeTargetEntry(copyObject(info->mminfo->target), 1, pstrdup("agg_target"), false); subparse->targetList = list_make1(tle); - /* set up the appropriate ORDER BY entry */ - sortcl = makeNode(SortGroupClause); - sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList); - sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL); - if (!OidIsValid(sortcl->eqop)) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - info->aggsortop); - sortcl->sortop = info->aggsortop; - sortcl->nulls_first = info->nulls_first; - sortcl->hashable = false; /* no need to make this accurate */ - subparse->sortClause = list_make1(sortcl); - - /* set up LIMIT 1 */ + /* set up expressions for LIMIT 1 */ subparse->limitOffset = NULL; subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64), Int64GetDatum(1), false, FLOAT8PASSBYVAL); /* - * Generate the plan for the subquery. We already have a Path for the - * basic indexscan, but we have to convert it to a Plan and attach a LIMIT - * node above it. - * - * Also we must add a "WHERE target IS NOT NULL" restriction to the - * indexscan, to be sure we don't return a NULL, which'd be contrary to - * the standard behavior of MIN/MAX. - * - * The NOT NULL qual has to go on the actual indexscan; create_plan might - * have stuck a gating Result atop that, if there were any pseudoconstant - * quals. + * Modify the ordered Path to add an indexed "target IS NOT NULL" + * condition to each scan. We need this to ensure we don't return a NULL, + * which'd be contrary to the standard behavior of MIN/MAX. We insist on + * it being indexed, else the Path might not be as cheap as we thought. */ - plan = create_plan(&subroot, (Path *) info->path); - - plan->targetlist = copyObject(subparse->targetList); + add_notnull_qual(root, rel, info, info->path); - if (IsA(plan, Result)) - iplan = (IndexScan *) plan->lefttree; - else - iplan = (IndexScan *) plan; - if (!IsA(iplan, IndexScan)) - elog(ERROR, "result of create_plan(IndexPath) isn't an IndexScan"); + /* + * Generate the plan for the subquery. We already have a Path, but we have + * to convert it to a Plan and attach a LIMIT node above it. + */ + plan = create_plan(&subroot, info->path); - attach_notnull_index_qual(info, iplan); + plan->targetlist = subparse->targetList; plan = (Plan *) make_limit(plan, subparse->limitOffset, @@ -572,166 +570,118 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) } /* - * Add "target IS NOT NULL" to the quals of the given indexscan. - * - * This is trickier than it sounds because the new qual has to be added at an - * appropriate place in the qual list, to preserve the list's ordering by - * index column position. + * Attach a suitable NOT NULL qual to the IndexPath, or each of the member + * IndexPaths. Note we assume we can modify the paths in-place. */ static void -attach_notnull_index_qual(MinMaxAggInfo *info, IndexScan *iplan) +add_notnull_qual(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info, + Path *path) { - NullTest *ntest; - List *newindexqual; - List *newindexqualorig; - bool done; - ListCell *lc1; - ListCell *lc2; - Expr *leftop; - AttrNumber targetattno; - - /* - * We can skip adding the NOT NULL qual if it duplicates either an - * already-given WHERE condition, or a clause of the index predicate. - */ - if (list_member(iplan->indexqualorig, info->notnulltest) || - list_member(info->path->indexinfo->indpred, info->notnulltest)) - return; - - /* Need a "fixed" copy as well as the original */ - ntest = copyObject(info->notnulltest); - ntest->arg = (Expr *) fix_indexqual_operand((Node *) ntest->arg, - info->path->indexinfo); - - /* Identify the target index column from the "fixed" copy */ - leftop = ntest->arg; - - if (leftop && IsA(leftop, RelabelType)) - leftop = ((RelabelType *) leftop)->arg; - - Assert(leftop != NULL); - - if (!IsA(leftop, Var)) - elog(ERROR, "NullTest indexqual has wrong key"); - - targetattno = ((Var *) leftop)->varattno; - - /* - * list.c doesn't expose a primitive to insert a list cell at an arbitrary - * position, so our strategy is to copy the lists and insert the null test - * when we reach an appropriate spot. - */ - newindexqual = newindexqualorig = NIL; - done = false; - - forboth(lc1, iplan->indexqual, lc2, iplan->indexqualorig) + if (IsA(path, IndexPath)) { - Expr *qual = (Expr *) lfirst(lc1); - Expr *qualorig = (Expr *) lfirst(lc2); - AttrNumber varattno; + IndexPath *ipath = (IndexPath *) path; + Expr *target; + NullTest *ntest; + RestrictInfo *rinfo; + List *newquals; + bool found_clause; /* - * Identify which index column this qual is for. This code should - * match the qual disassembly code in ExecIndexBuildScanKeys. + * If we are looking at a child of the original rel, we have to adjust + * the agg target expression to match the child. */ - if (IsA(qual, OpExpr)) + if (ipath->path.parent != rel) { - /* indexkey op expression */ - leftop = (Expr *) get_leftop(qual); - - if (leftop && IsA(leftop, RelabelType)) - leftop = ((RelabelType *) leftop)->arg; + AppendRelInfo *appinfo = NULL; + ListCell *lc; - Assert(leftop != NULL); - - if (!IsA(leftop, Var)) - elog(ERROR, "indexqual doesn't have key on left side"); - - varattno = ((Var *) leftop)->varattno; + /* Search for the appropriate AppendRelInfo */ + foreach(lc, root->append_rel_list) + { + appinfo = (AppendRelInfo *) lfirst(lc); + if (appinfo->parent_relid == rel->relid && + appinfo->child_relid == ipath->path.parent->relid) + break; + appinfo = NULL; + } + if (!appinfo) + elog(ERROR, "failed to find AppendRelInfo for child rel"); + target = (Expr *) + adjust_appendrel_attrs((Node *) info->mminfo->target, + appinfo); } - else if (IsA(qual, RowCompareExpr)) + else { - /* (indexkey, indexkey, ...) op (expression, expression, ...) */ - RowCompareExpr *rc = (RowCompareExpr *) qual; - - /* - * Examine just the first column of the rowcompare, which is what - * determines its placement in the overall qual list. - */ - leftop = (Expr *) linitial(rc->largs); - - if (leftop && IsA(leftop, RelabelType)) - leftop = ((RelabelType *) leftop)->arg; - - Assert(leftop != NULL); - - if (!IsA(leftop, Var)) - elog(ERROR, "indexqual doesn't have key on left side"); - - varattno = ((Var *) leftop)->varattno; + /* Otherwise, just make a copy (may not be necessary) */ + target = copyObject(info->mminfo->target); } - else if (IsA(qual, ScalarArrayOpExpr)) - { - /* indexkey op ANY (array-expression) */ - ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual; - leftop = (Expr *) linitial(saop->args); + /* Build "target IS NOT NULL" expression */ + ntest = makeNode(NullTest); + ntest->nulltesttype = IS_NOT_NULL; + ntest->arg = target; + /* we checked it wasn't a rowtype in find_minmax_aggs_walker */ + ntest->argisrow = false; - if (leftop && IsA(leftop, RelabelType)) - leftop = ((RelabelType *) leftop)->arg; - - Assert(leftop != NULL); - - if (!IsA(leftop, Var)) - elog(ERROR, "indexqual doesn't have key on left side"); + /* + * We can skip adding the NOT NULL qual if it duplicates either an + * already-given index condition, or a clause of the index predicate. + */ + if (list_member(get_actual_clauses(ipath->indexquals), ntest) || + list_member(ipath->indexinfo->indpred, ntest)) + return; - varattno = ((Var *) leftop)->varattno; - } - else if (IsA(qual, NullTest)) - { - /* indexkey IS NULL or indexkey IS NOT NULL */ - NullTest *ntest = (NullTest *) qual; + /* Wrap it in a RestrictInfo and prepend to existing indexquals */ + rinfo = make_restrictinfo((Expr *) ntest, + true, + false, + false, + NULL, + NULL); - leftop = ntest->arg; + newquals = list_concat(list_make1(rinfo), ipath->indexquals); - if (leftop && IsA(leftop, RelabelType)) - leftop = ((RelabelType *) leftop)->arg; + /* + * We can't just stick the IS NOT NULL at the front of the list, + * though. It has to go in the right position corresponding to its + * index column, which might not be the first one. Easiest way to fix + * this is to run the quals through group_clauses_by_indexkey again. + */ + newquals = group_clauses_by_indexkey(ipath->indexinfo, + newquals, + NIL, + NULL, + SAOP_FORBID, + &found_clause); - Assert(leftop != NULL); + newquals = flatten_clausegroups_list(newquals); - if (!IsA(leftop, Var)) - elog(ERROR, "NullTest indexqual has wrong key"); + /* Trouble if we lost any quals */ + if (list_length(newquals) != list_length(ipath->indexquals) + 1) + elog(ERROR, "add_notnull_qual failed to add NOT NULL qual"); - varattno = ((Var *) leftop)->varattno; - } - else - { - elog(ERROR, "unsupported indexqual type: %d", - (int) nodeTag(qual)); - varattno = 0; /* keep compiler quiet */ - } + /* + * And update the path's indexquals. Note we don't bother adding + * to indexclauses, which is OK since this is like a generated + * index qual. + */ + ipath->indexquals = newquals; + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *) path; + ListCell *lc; - /* Insert the null test at the first place it can legally go */ - if (!done && targetattno <= varattno) + foreach(lc, mpath->subpaths) { - newindexqual = lappend(newindexqual, ntest); - newindexqualorig = lappend(newindexqualorig, info->notnulltest); - done = true; + add_notnull_qual(root, rel, info, (Path *) lfirst(lc)); } - - newindexqual = lappend(newindexqual, qual); - newindexqualorig = lappend(newindexqualorig, qualorig); } - - /* Add the null test at the end if it must follow all existing quals */ - if (!done) + else { - newindexqual = lappend(newindexqual, ntest); - newindexqualorig = lappend(newindexqualorig, info->notnulltest); + /* shouldn't get here, because of path_usable_for_agg checks */ + elog(ERROR, "add_notnull_qual failed"); } - - iplan->indexqual = newindexqual; - iplan->indexqualorig = newindexqualorig; } /* @@ -750,13 +700,13 @@ replace_aggs_with_params_mutator(Node *node, List **context) foreach(l, *context) { - MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l); + PrivateMMAggInfo *info = (PrivateMMAggInfo *) lfirst(l); - if (info->aggfnoid == aggref->aggfnoid && - equal(info->target, curTarget->expr)) + if (info->mminfo->aggfnoid == aggref->aggfnoid && + equal(info->mminfo->target, curTarget->expr)) return (Node *) info->param; } - elog(ERROR, "failed to re-find aggregate info record"); + elog(ERROR, "failed to re-find PrivateMMAggInfo record"); } Assert(!IsA(node, SubLink)); return expression_tree_mutator(node, replace_aggs_with_params_mutator, diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index fd4c6f54d0f98600fdb3476849ff9cda2741b9b3..cab6e9e25ad24af8abcd20bd62f11c8df8319574 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -30,6 +30,10 @@ #include "utils/selfuncs.h" +/* Local functions */ +static void canonicalize_all_pathkeys(PlannerInfo *root); + + /* * query_planner * Generate a path (that is, a simplified plan) for a basic query, @@ -68,9 +72,9 @@ * PlannerInfo field and not a passed parameter is that the low-level routines * in indxpath.c need to see it.) * - * Note: the PlannerInfo node also includes group_pathkeys, window_pathkeys, - * distinct_pathkeys, and sort_pathkeys, which like query_pathkeys need to be - * canonicalized once the info is available. + * Note: the PlannerInfo node includes other pathkeys fields besides + * query_pathkeys, all of which need to be canonicalized once the info is + * available. See canonicalize_all_pathkeys. * * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) @@ -118,16 +122,7 @@ query_planner(PlannerInfo *root, List *tlist, * something like "SELECT 2+2 ORDER BY 1". */ root->canon_pathkeys = NIL; - root->query_pathkeys = canonicalize_pathkeys(root, - root->query_pathkeys); - root->group_pathkeys = canonicalize_pathkeys(root, - root->group_pathkeys); - root->window_pathkeys = canonicalize_pathkeys(root, - root->window_pathkeys); - root->distinct_pathkeys = canonicalize_pathkeys(root, - root->distinct_pathkeys); - root->sort_pathkeys = canonicalize_pathkeys(root, - root->sort_pathkeys); + canonicalize_all_pathkeys(root); return; } @@ -136,7 +131,7 @@ query_planner(PlannerInfo *root, List *tlist, * for "simple" rels. * * NOTE: append_rel_list was set up by subquery_planner, so do not touch - * here; eq_classes may contain data already, too. + * here; eq_classes and minmax_aggs may contain data already, too. */ root->simple_rel_array_size = list_length(parse->rtable) + 1; root->simple_rel_array = (RelOptInfo **) @@ -212,15 +207,10 @@ query_planner(PlannerInfo *root, List *tlist, /* * We have completed merging equivalence sets, so it's now possible to - * convert the requested query_pathkeys to canonical form. Also - * canonicalize the groupClause, windowClause, distinctClause and - * sortClause pathkeys for use later. + * convert previously generated pathkeys (in particular, the requested + * query_pathkeys) to canonical form. */ - root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); - root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); - root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys); - root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys); - root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys); + canonicalize_all_pathkeys(root); /* * Examine any "placeholder" expressions generated during subquery pullup. @@ -430,3 +420,28 @@ query_planner(PlannerInfo *root, List *tlist, *cheapest_path = cheapestpath; *sorted_path = sortedpath; } + + +/* + * canonicalize_all_pathkeys + * Canonicalize all pathkeys that were generated before entering + * query_planner and then stashed in PlannerInfo. + */ +static void +canonicalize_all_pathkeys(PlannerInfo *root) +{ + ListCell *lc; + + root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); + root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); + root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys); + root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys); + root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys); + + foreach(lc, root->minmax_aggs) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + + mminfo->pathkeys = canonicalize_pathkeys(root, mminfo->pathkeys); + } +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 93daedc706126e80e1a2002085dd019345827b12..07301c77fbf4f2550367ea8ff58c685462835223 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1010,6 +1010,30 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) sub_tlist = make_subplanTargetList(root, tlist, &groupColIdx, &need_tlist_eval); + /* + * Do aggregate preprocessing, if the query has any aggs. + * + * Note: think not that we can turn off hasAggs if we find no aggs. It + * is possible for constant-expression simplification to remove all + * explicit references to aggs, but we still have to follow the + * aggregate semantics (eg, producing only one output row). + */ + if (parse->hasAggs) + { + /* + * Will need actual number of aggregates for estimating costs. + * Note: we do not attempt to detect duplicate aggregates here; a + * somewhat-overestimated count is okay for our present purposes. + */ + count_agg_clauses((Node *) tlist, &agg_counts); + count_agg_clauses(parse->havingQual, &agg_counts); + + /* + * Preprocess MIN/MAX aggregates, if any. + */ + preprocess_minmax_aggregates(root, tlist); + } + /* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize @@ -1056,23 +1080,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) tlist, false); - /* - * Will need actual number of aggregates for estimating costs. - * - * Note: we do not attempt to detect duplicate aggregates here; a - * somewhat-overestimated count is okay for our present purposes. - * - * Note: think not that we can turn off hasAggs if we find no aggs. It - * is possible for constant-expression simplification to remove all - * explicit references to aggs, but we still have to follow the - * aggregate semantics (eg, producing only one output row). - */ - if (parse->hasAggs) - { - count_agg_clauses((Node *) tlist, &agg_counts); - count_agg_clauses(parse->havingQual, &agg_counts); - } - /* * Figure out whether we want a sorted result from query_planner. * diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index cc8059c937a7b7f88f449d73d136cb8528d37bd3..e337751328bf0b05e3025a2ecc302f754b766855 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -1870,6 +1870,7 @@ substitute_multiple_relids_walker(Node *node, Assert(!IsA(node, SpecialJoinInfo)); Assert(!IsA(node, AppendRelInfo)); Assert(!IsA(node, PlaceHolderInfo)); + Assert(!IsA(node, MinMaxAggInfo)); return expression_tree_walker(node, substitute_multiple_relids_walker, (void *) context); diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 45ecde416f93bff38cbecd6776e30bc492795dcf..4686578e3bd3cccda4fe48fcced4c674871f0886 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -1641,6 +1641,7 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context) Assert(!IsA(node, SpecialJoinInfo)); Assert(!IsA(node, AppendRelInfo)); Assert(!IsA(node, PlaceHolderInfo)); + Assert(!IsA(node, MinMaxAggInfo)); /* * We have to process RestrictInfo nodes specially. (Note: although diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 399f3a95e87bcc6bff5b2d62d6e8c2a1a73a2d19..b26decc43731b8777935201830da38126d5285ac 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -838,6 +838,7 @@ flatten_join_alias_vars_mutator(Node *node, /* Shouldn't need to handle these planner auxiliary nodes here */ Assert(!IsA(node, SpecialJoinInfo)); Assert(!IsA(node, PlaceHolderInfo)); + Assert(!IsA(node, MinMaxAggInfo)); return expression_tree_mutator(node, flatten_join_alias_vars_mutator, (void *) context); diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 7440a41898f633bb82b7f86826016def0b110421..7333ce29aaea6587a4d07cb93c00497803f1f5ba 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -377,6 +377,7 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context) /* Shouldn't need to handle other planner auxiliary nodes here */ Assert(!IsA(node, SpecialJoinInfo)); Assert(!IsA(node, PlaceHolderInfo)); + Assert(!IsA(node, MinMaxAggInfo)); if (IsA(node, Query)) { @@ -544,6 +545,7 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context) /* Shouldn't need to handle other planner auxiliary nodes here */ Assert(!IsA(node, SpecialJoinInfo)); Assert(!IsA(node, PlaceHolderInfo)); + Assert(!IsA(node, MinMaxAggInfo)); if (IsA(node, Query)) { @@ -811,6 +813,7 @@ rangeTableEntry_used_walker(Node *node, Assert(!IsA(node, SpecialJoinInfo)); Assert(!IsA(node, AppendRelInfo)); Assert(!IsA(node, PlaceHolderInfo)); + Assert(!IsA(node, MinMaxAggInfo)); if (IsA(node, Query)) { diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 8e94d9803f702875cd47b85b48b1ebe29ac257b9..bc96ebf68e4d066d6901cb64d4f02e615e16a6c6 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -230,6 +230,7 @@ typedef enum NodeTag T_SpecialJoinInfo, T_AppendRelInfo, T_PlaceHolderInfo, + T_MinMaxAggInfo, T_PlannerParamItem, /* diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 6e3d0f35181955bc9f4cd97414793bc8cc775a27..f885f5a0c46f6ceec15a06e8f0c1f3b30aab3d33 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -189,6 +189,8 @@ typedef struct PlannerInfo List *distinct_pathkeys; /* distinctClause pathkeys, if any */ List *sort_pathkeys; /* sortClause pathkeys, if any */ + List *minmax_aggs; /* List of MinMaxAggInfos */ + List *initial_rels; /* RelOptInfos we are now trying to join */ MemoryContext planner_cxt; /* context holding PlannerInfo */ @@ -1357,6 +1359,23 @@ typedef struct PlaceHolderInfo int32 ph_width; /* estimated attribute width */ } PlaceHolderInfo; +/* + * For each potentially index-optimizable MIN/MAX aggregate function, + * root->minmax_aggs stores a MinMaxAggInfo describing it. + * + * Note: a MIN/MAX agg doesn't really care about the nulls_first property, + * so the pathkey's nulls_first flag should be ignored. + */ +typedef struct MinMaxAggInfo +{ + NodeTag type; + + Oid aggfnoid; /* pg_proc Oid of the aggregate */ + Oid aggsortop; /* Oid of its sort operator */ + Expr *target; /* expression we are aggregating on */ + List *pathkeys; /* pathkeys representing needed sort order */ +} MinMaxAggInfo; + /* * glob->paramlist keeps track of the PARAM_EXEC slots that we have decided * we need for the query. At runtime these slots are used to pass values diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 6bce53cce57d5c3d8551ad12c371e36100c009ff..c0ff0144fa10c0aefa9296b823aad057896a8c17 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -173,6 +173,9 @@ extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist, bool canonicalize); +extern List *make_pathkeys_for_aggregate(PlannerInfo *root, + Expr *aggtarget, + Oid aggsortop); extern void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); extern void update_mergeclause_eclasses(PlannerInfo *root, @@ -187,10 +190,6 @@ extern List *select_outer_pathkeys_for_merge(PlannerInfo *root, extern List *make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys); -extern int pathkeys_useful_for_merging(PlannerInfo *root, - RelOptInfo *rel, - List *pathkeys); -extern int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys); extern List *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 5bb0e094e5b03fd6911b320ada440a93bc3647c3..919449b4e3871464553ef69004f9d00214b1c99d 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -32,6 +32,7 @@ extern void query_planner(PlannerInfo *root, List *tlist, /* * prototypes for plan/planagg.c */ +extern void preprocess_minmax_aggregates(PlannerInfo *root, List *tlist); extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path); @@ -39,7 +40,6 @@ extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist, * prototypes for plan/createplan.c */ extern Plan *create_plan(PlannerInfo *root, Path *best_path); -extern Node *fix_indexqual_operand(Node *node, IndexOptInfo *index); extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual, Index scanrelid, Plan *subplan, List *subrtable, List *subrowmark); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index b456d7e989ab1c5006d85730286f82069e6c3585..ed3b0c4b7583fba68a330e335afb4073cc3f6ce9 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -442,29 +442,90 @@ FROM bool_test; (1 row) -- --- Test several cases that should be optimized into indexscans instead of --- the generic aggregate implementation. We can't actually verify that they --- are done as indexscans, but we can check that the results are correct. +-- Test cases that should be optimized into indexscans instead of +-- the generic aggregate implementation. -- +analyze tenk1; -- ensure we get consistent plans here -- Basic cases +explain (costs off) + select min(unique1) from tenk1; + QUERY PLAN +------------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Index Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 IS NOT NULL) +(5 rows) + +select min(unique1) from tenk1; + min +----- + 0 +(1 row) + +explain (costs off) + select max(unique1) from tenk1; + QUERY PLAN +---------------------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique1 on tenk1 + Index Cond: (unique1 IS NOT NULL) +(5 rows) + select max(unique1) from tenk1; max ------ 9999 (1 row) +explain (costs off) + select max(unique1) from tenk1 where unique1 < 42; + QUERY PLAN +------------------------------------------------------------------------ + Result + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique1 on tenk1 + Index Cond: ((unique1 IS NOT NULL) AND (unique1 < 42)) +(5 rows) + select max(unique1) from tenk1 where unique1 < 42; max ----- 41 (1 row) +explain (costs off) + select max(unique1) from tenk1 where unique1 > 42; + QUERY PLAN +------------------------------------------------------------------------ + Result + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique1 on tenk1 + Index Cond: ((unique1 IS NOT NULL) AND (unique1 > 42)) +(5 rows) + select max(unique1) from tenk1 where unique1 > 42; max ------ 9999 (1 row) +explain (costs off) + select max(unique1) from tenk1 where unique1 > 42000; + QUERY PLAN +--------------------------------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique1 on tenk1 + Index Cond: ((unique1 IS NOT NULL) AND (unique1 > 42000)) +(5 rows) + select max(unique1) from tenk1 where unique1 > 42000; max ----- @@ -472,12 +533,34 @@ select max(unique1) from tenk1 where unique1 > 42000; (1 row) -- multi-column index (uses tenk1_thous_tenthous) +explain (costs off) + select max(tenthous) from tenk1 where thousand = 33; + QUERY PLAN +-------------------------------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = 33) AND (tenthous IS NOT NULL)) +(5 rows) + select max(tenthous) from tenk1 where thousand = 33; max ------ 9033 (1 row) +explain (costs off) + select min(tenthous) from tenk1 where thousand = 33; + QUERY PLAN +-------------------------------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Index Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = 33) AND (tenthous IS NOT NULL)) +(5 rows) + select min(tenthous) from tenk1 where thousand = 33; min ----- @@ -485,8 +568,22 @@ select min(tenthous) from tenk1 where thousand = 33; (1 row) -- check parameter propagation into an indexscan subquery +explain (costs off) + select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt + from int4_tbl; + QUERY PLAN +----------------------------------------------------------------------------------------- + Seq Scan on int4_tbl + SubPlan 2 + -> Result + InitPlan 1 (returns $1) + -> Limit + -> Index Scan using tenk1_unique1 on tenk1 + Index Cond: ((unique1 IS NOT NULL) AND (unique1 > int4_tbl.f1)) +(7 rows) + select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt -from int4_tbl; + from int4_tbl; f1 | gt -------------+---- 0 | 1 @@ -497,30 +594,94 @@ from int4_tbl; (5 rows) -- check some cases that were handled incorrectly in 8.3.0 +explain (costs off) + select distinct max(unique2) from tenk1; + QUERY PLAN +---------------------------------------------------------------- + HashAggregate + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique2 on tenk1 + Index Cond: (unique2 IS NOT NULL) + -> Result +(6 rows) + select distinct max(unique2) from tenk1; max ------ 9999 (1 row) +explain (costs off) + select max(unique2) from tenk1 order by 1; + QUERY PLAN +---------------------------------------------------------------- + Sort + Sort Key: ($0) + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique2 on tenk1 + Index Cond: (unique2 IS NOT NULL) + -> Result +(7 rows) + select max(unique2) from tenk1 order by 1; max ------ 9999 (1 row) +explain (costs off) + select max(unique2) from tenk1 order by max(unique2); + QUERY PLAN +---------------------------------------------------------------- + Sort + Sort Key: ($0) + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique2 on tenk1 + Index Cond: (unique2 IS NOT NULL) + -> Result +(7 rows) + select max(unique2) from tenk1 order by max(unique2); max ------ 9999 (1 row) +explain (costs off) + select max(unique2) from tenk1 order by max(unique2)+1; + QUERY PLAN +---------------------------------------------------------------- + Sort + Sort Key: (($0 + 1)) + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique2 on tenk1 + Index Cond: (unique2 IS NOT NULL) + -> Result +(7 rows) + select max(unique2) from tenk1 order by max(unique2)+1; max ------ 9999 (1 row) +explain (costs off) + select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; + QUERY PLAN +---------------------------------------------------------------- + Sort + Sort Key: (generate_series(1, 3)) + InitPlan 1 (returns $0) + -> Limit + -> Index Scan Backward using tenk1_unique2 on tenk1 + Index Cond: (unique2 IS NOT NULL) + -> Result +(7 rows) + select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; max | g ------+--- @@ -529,6 +690,69 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; 9999 | 1 (3 rows) +-- this is an interesting special case as of 9.1 +explain (costs off) + select min(unique2) from tenk1 where unique2 = 42; + QUERY PLAN +----------------------------------------------- + Aggregate + -> Index Scan using tenk1_unique2 on tenk1 + Index Cond: (unique2 = 42) +(3 rows) + +select min(unique2) from tenk1 where unique2 = 42; + min +----- + 42 +(1 row) + +-- try it on an inheritance tree +create table minmaxtest(f1 int); +create table minmaxtest1() inherits (minmaxtest); +create table minmaxtest2() inherits (minmaxtest); +create index minmaxtesti on minmaxtest(f1); +create index minmaxtest1i on minmaxtest1(f1); +create index minmaxtest2i on minmaxtest2(f1 desc); +insert into minmaxtest values(11), (12); +insert into minmaxtest1 values(13), (14); +insert into minmaxtest2 values(15), (16); +explain (costs off) + select min(f1), max(f1) from minmaxtest; + QUERY PLAN +-------------------------------------------------------------------------------------- + Result + InitPlan 1 (returns $0) + -> Limit + -> Merge Append + Sort Key: public.minmaxtest.f1 + -> Index Scan using minmaxtesti on minmaxtest + Index Cond: (f1 IS NOT NULL) + -> Index Scan using minmaxtest1i on minmaxtest1 minmaxtest + Index Cond: (f1 IS NOT NULL) + -> Index Scan Backward using minmaxtest2i on minmaxtest2 minmaxtest + Index Cond: (f1 IS NOT NULL) + InitPlan 2 (returns $1) + -> Limit + -> Merge Append + Sort Key: public.minmaxtest.f1 + -> Index Scan Backward using minmaxtesti on minmaxtest + Index Cond: (f1 IS NOT NULL) + -> Index Scan Backward using minmaxtest1i on minmaxtest1 minmaxtest + Index Cond: (f1 IS NOT NULL) + -> Index Scan using minmaxtest2i on minmaxtest2 minmaxtest + Index Cond: (f1 IS NOT NULL) +(21 rows) + +select min(f1), max(f1) from minmaxtest; + min | max +-----+----- + 11 | 16 +(1 row) + +drop table minmaxtest cascade; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table minmaxtest1 +drop cascades to table minmaxtest2 -- -- Test combinations of DISTINCT and/or ORDER BY -- diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 8f81ba763a02e1d84e6748ee10f9bcdefc3c2d57..3825d7b302fb2efb6a6d186fc71777f94e657aaf 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -205,31 +205,81 @@ SELECT FROM bool_test; -- --- Test several cases that should be optimized into indexscans instead of --- the generic aggregate implementation. We can't actually verify that they --- are done as indexscans, but we can check that the results are correct. +-- Test cases that should be optimized into indexscans instead of +-- the generic aggregate implementation. -- +analyze tenk1; -- ensure we get consistent plans here -- Basic cases +explain (costs off) + select min(unique1) from tenk1; +select min(unique1) from tenk1; +explain (costs off) + select max(unique1) from tenk1; select max(unique1) from tenk1; +explain (costs off) + select max(unique1) from tenk1 where unique1 < 42; select max(unique1) from tenk1 where unique1 < 42; +explain (costs off) + select max(unique1) from tenk1 where unique1 > 42; select max(unique1) from tenk1 where unique1 > 42; +explain (costs off) + select max(unique1) from tenk1 where unique1 > 42000; select max(unique1) from tenk1 where unique1 > 42000; -- multi-column index (uses tenk1_thous_tenthous) +explain (costs off) + select max(tenthous) from tenk1 where thousand = 33; select max(tenthous) from tenk1 where thousand = 33; +explain (costs off) + select min(tenthous) from tenk1 where thousand = 33; select min(tenthous) from tenk1 where thousand = 33; -- check parameter propagation into an indexscan subquery +explain (costs off) + select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt + from int4_tbl; select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt -from int4_tbl; + from int4_tbl; -- check some cases that were handled incorrectly in 8.3.0 +explain (costs off) + select distinct max(unique2) from tenk1; select distinct max(unique2) from tenk1; +explain (costs off) + select max(unique2) from tenk1 order by 1; select max(unique2) from tenk1 order by 1; +explain (costs off) + select max(unique2) from tenk1 order by max(unique2); select max(unique2) from tenk1 order by max(unique2); +explain (costs off) + select max(unique2) from tenk1 order by max(unique2)+1; select max(unique2) from tenk1 order by max(unique2)+1; +explain (costs off) + select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; +-- this is an interesting special case as of 9.1 +explain (costs off) + select min(unique2) from tenk1 where unique2 = 42; +select min(unique2) from tenk1 where unique2 = 42; + +-- try it on an inheritance tree +create table minmaxtest(f1 int); +create table minmaxtest1() inherits (minmaxtest); +create table minmaxtest2() inherits (minmaxtest); +create index minmaxtesti on minmaxtest(f1); +create index minmaxtest1i on minmaxtest1(f1); +create index minmaxtest2i on minmaxtest2(f1 desc); + +insert into minmaxtest values(11), (12); +insert into minmaxtest1 values(13), (14); +insert into minmaxtest2 values(15), (16); + +explain (costs off) + select min(f1), max(f1) from minmaxtest; +select min(f1), max(f1) from minmaxtest; + +drop table minmaxtest cascade; -- -- Test combinations of DISTINCT and/or ORDER BY