diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 66630ae6612a8acaabbfe43c6f88845f323e2940..408b9b2a757e24e08b1ff44ba473c31f0e545217 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.329 2008/08/02 21:31:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1334,6 +1334,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node) WRITE_NODE_FIELD(append_rel_list); WRITE_NODE_FIELD(query_pathkeys); WRITE_NODE_FIELD(group_pathkeys); + WRITE_NODE_FIELD(distinct_pathkeys); WRITE_NODE_FIELD(sort_pathkeys); WRITE_FLOAT_FIELD(total_table_pages, "%.0f"); WRITE_FLOAT_FIELD(tuple_fraction, "%.4f"); diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 5e5da6cda4f1bd6eeff833a97ddd3373830375fa..081a7c9cebd1f37615cd42e2a49d3d29b706e42e 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.109 2008/08/05 02:43:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -66,9 +66,9 @@ * PlannerInfo field and not a passed parameter is that the low-level routines * in indxpath.c need to see it.) * - * Note: the PlannerInfo node also includes group_pathkeys and sort_pathkeys, - * which like query_pathkeys need to be canonicalized once the info is - * available. + * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys, + * and sort_pathkeys, which like query_pathkeys need to be canonicalized once + * the info is available. * * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) @@ -120,6 +120,8 @@ query_planner(PlannerInfo *root, List *tlist, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); + root->distinct_pathkeys = canonicalize_pathkeys(root, + root->distinct_pathkeys); root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys); return; @@ -237,10 +239,12 @@ query_planner(PlannerInfo *root, List *tlist, /* * We have completed merging equivalence sets, so it's now possible to * convert the requested query_pathkeys to canonical form. Also - * canonicalize the groupClause and sortClause pathkeys for use later. + * canonicalize the groupClause, distinctClause and sortClause pathkeys + * for use later. */ root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); + root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys); root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys); /* @@ -286,9 +290,11 @@ query_planner(PlannerInfo *root, List *tlist, /* * If both GROUP BY and ORDER BY are specified, we will need two * levels of sort --- and, therefore, certainly need to read all the - * tuples --- unless ORDER BY is a subset of GROUP BY. + * tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if + * we have both DISTINCT and GROUP BY. */ - if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys)) + if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) || + !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys)) tuple_fraction = 0.0; } else if (parse->hasAggs || root->hasHavingQual) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 40a9bffaf37e3db17aed317e80d69153fc207497..d8c6942e250aa49d426f51cae270fe38ee40859e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.238 2008/08/05 02:43:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -69,12 +69,17 @@ static double preprocess_limit(PlannerInfo *root, int64 *offset_est, int64 *count_est); static void preprocess_groupclause(PlannerInfo *root); static Oid *extract_grouping_ops(List *groupClause); +static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); static bool grouping_is_sortable(List *groupClause); static bool grouping_is_hashable(List *groupClause); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, double limit_tuples, Path *cheapest_path, Path *sorted_path, double dNumGroups, AggClauseCounts *agg_counts); +static bool choose_hashed_distinct(PlannerInfo *root, + Plan *input_plan, List *input_pathkeys, + double tuple_fraction, double limit_tuples, + double dNumDistinctRows); static List *make_subplanTargetList(PlannerInfo *root, List *tlist, AttrNumber **groupColIdx, bool *need_tlist_eval); static void locate_grouping_columns(PlannerInfo *root, @@ -757,7 +762,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) double limit_tuples = -1.0; Plan *result_plan; List *current_pathkeys; - List *sort_pathkeys; double dNumGroups = 0; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ @@ -829,16 +833,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * Calculate pathkeys that represent result ordering requirements */ Assert(parse->distinctClause == NIL); - sort_pathkeys = make_pathkeys_for_sortclauses(root, - parse->sortClause, - tlist, - true); + root->sort_pathkeys = make_pathkeys_for_sortclauses(root, + parse->sortClause, + tlist, + true); } else { /* No set operations, do regular planning */ List *sub_tlist; - List *group_pathkeys; AttrNumber *groupColIdx = NULL; bool need_tlist_eval = true; QualCost tlist_cost; @@ -870,14 +873,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize - * them after EquivalenceClasses have been formed. - * - * Note: for the moment, DISTINCT is always implemented via sort/uniq, - * and we set the sort_pathkeys to be the more rigorous of the - * DISTINCT and ORDER BY requirements. This should be changed - * someday, but DISTINCT ON is a bit of a problem ... + * them after EquivalenceClasses have been formed. The sortClause + * is certainly sort-able, but GROUP BY and DISTINCT might not be, + * in which case we just leave their pathkeys empty. */ - if (parse->groupClause && grouping_is_sortable(parse->groupClause)) + if (parse->groupClause && + grouping_is_sortable(parse->groupClause)) root->group_pathkeys = make_pathkeys_for_sortclauses(root, parse->groupClause, @@ -886,18 +887,21 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) else root->group_pathkeys = NIL; - if (list_length(parse->distinctClause) > list_length(parse->sortClause)) - root->sort_pathkeys = + if (parse->distinctClause && + grouping_is_sortable(parse->distinctClause)) + root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist, false); else - root->sort_pathkeys = - make_pathkeys_for_sortclauses(root, - parse->sortClause, - tlist, - false); + root->distinct_pathkeys = NIL; + + root->sort_pathkeys = + make_pathkeys_for_sortclauses(root, + parse->sortClause, + tlist, + false); /* * Will need actual number of aggregates for estimating costs. @@ -917,17 +921,27 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } /* - * Figure out whether we need a sorted result from query_planner. + * Figure out whether we want a sorted result from query_planner. * * If we have a sortable GROUP BY clause, then we want a result sorted - * properly for grouping. Otherwise, if there is an ORDER BY clause, - * we want to sort by the ORDER BY clause. (Note: if we have both, and - * ORDER BY is a superset of GROUP BY, it would be tempting to request - * sort by ORDER BY --- but that might just leave us failing to - * exploit an available sort order at all. Needs more thought...) + * properly for grouping. Otherwise, if there's a sortable DISTINCT + * clause that's more rigorous than the ORDER BY clause, we try to + * produce output that's sufficiently well sorted for the DISTINCT. + * Otherwise, if there is an ORDER BY clause, we want to sort by the + * ORDER BY clause. + * + * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a + * superset of GROUP BY, it would be tempting to request sort by ORDER + * BY --- but that might just leave us failing to exploit an available + * sort order at all. Needs more thought. The choice for DISTINCT + * versus ORDER BY is much easier, since we know that the parser + * ensured that one is a superset of the other. */ if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; + else if (list_length(root->distinct_pathkeys) > + list_length(root->sort_pathkeys)) + root->query_pathkeys = root->distinct_pathkeys; else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else @@ -942,9 +956,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) query_planner(root, sub_tlist, tuple_fraction, limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); - group_pathkeys = root->group_pathkeys; - sort_pathkeys = root->sort_pathkeys; - /* * If grouping, decide whether to use sorted or hashed grouping. */ @@ -1024,7 +1035,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Detect if we'll need an explicit sort for grouping */ if (parse->groupClause && !use_hashed_grouping && - !pathkeys_contained_in(group_pathkeys, current_pathkeys)) + !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { need_sort_for_grouping = true; /* @@ -1135,7 +1146,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) parse->groupClause, groupColIdx, result_plan); - current_pathkeys = group_pathkeys; + current_pathkeys = root->group_pathkeys; } aggstrategy = AGG_SORTED; @@ -1178,7 +1189,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) parse->groupClause, groupColIdx, result_plan); - current_pathkeys = group_pathkeys; + current_pathkeys = root->group_pathkeys; } result_plan = (Plan *) make_group(root, @@ -1214,35 +1225,129 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } /* end of if (setOperations) */ /* - * If we were not able to make the plan come out in the right order, add - * an explicit sort step. + * If there is a DISTINCT clause, add the necessary node(s). */ - if (sort_pathkeys) + if (parse->distinctClause) { - if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys)) + double dNumDistinctRows; + long numDistinctRows; + bool use_hashed_distinct; + bool can_sort; + bool can_hash; + + /* + * If there was grouping or aggregation, use the current number of + * rows as the estimated number of DISTINCT rows (ie, assume the + * result was already mostly unique). If not, use the number of + * distinct-groups calculated by query_planner. + */ + if (parse->groupClause || root->hasHavingQual || parse->hasAggs) + dNumDistinctRows = result_plan->plan_rows; + else + dNumDistinctRows = dNumGroups; + + /* Also convert to long int --- but 'ware overflow! */ + numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); + + /* + * If we have a sortable DISTINCT ON clause, we always use sorting. + * This enforces the expected behavior of DISTINCT ON. + */ + can_sort = grouping_is_sortable(parse->distinctClause); + if (can_sort && parse->hasDistinctOn) + use_hashed_distinct = false; + else { - result_plan = (Plan *) make_sort_from_pathkeys(root, - result_plan, - sort_pathkeys, - limit_tuples); - current_pathkeys = sort_pathkeys; + can_hash = grouping_is_hashable(parse->distinctClause); + if (can_hash && can_sort) + { + /* we have a meaningful choice to make ... */ + use_hashed_distinct = + choose_hashed_distinct(root, + result_plan, current_pathkeys, + tuple_fraction, limit_tuples, + dNumDistinctRows); + } + else if (can_hash) + use_hashed_distinct = true; + else if (can_sort) + use_hashed_distinct = false; + else + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement DISTINCT"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + use_hashed_distinct = false; /* keep compiler quiet */ + } + } + + if (use_hashed_distinct) + { + /* Hashed aggregate plan --- no sort needed */ + result_plan = (Plan *) make_agg(root, + result_plan->targetlist, + NIL, + AGG_HASHED, + list_length(parse->distinctClause), + extract_grouping_cols(parse->distinctClause, + result_plan->targetlist), + extract_grouping_ops(parse->distinctClause), + numDistinctRows, + 0, + result_plan); + /* Hashed aggregation produces randomly-ordered results */ + current_pathkeys = NIL; + } + else + { + /* + * Use a Unique node to implement DISTINCT. Add an explicit sort + * if we couldn't make the path come out the way the Unique node + * needs it. If we do have to sort, sort by the more rigorous + * of DISTINCT and ORDER BY, to avoid a second sort below. + */ + if (!pathkeys_contained_in(root->distinct_pathkeys, + current_pathkeys)) + { + if (list_length(root->distinct_pathkeys) >= + list_length(root->sort_pathkeys)) + current_pathkeys = root->distinct_pathkeys; + else + { + current_pathkeys = root->sort_pathkeys; + /* Assert checks that parser didn't mess up... */ + Assert(pathkeys_contained_in(root->distinct_pathkeys, + current_pathkeys)); + } + + result_plan = (Plan *) make_sort_from_pathkeys(root, + result_plan, + current_pathkeys, + -1.0); + } + + result_plan = (Plan *) make_unique(result_plan, + parse->distinctClause); + result_plan->plan_rows = dNumDistinctRows; + /* The Unique node won't change sort ordering */ } } /* - * If there is a DISTINCT clause, add the UNIQUE node. + * If ORDER BY was given and we were not able to make the plan come out in + * the right order, add an explicit sort step. */ - if (parse->distinctClause) + if (parse->sortClause) { - result_plan = (Plan *) make_unique(result_plan, parse->distinctClause); - - /* - * If there was grouping or aggregation, leave plan_rows as-is (ie, - * assume the result was already mostly unique). If not, use the - * number of distinct-groups calculated by query_planner. - */ - if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs) - result_plan->plan_rows = dNumGroups; + if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + { + result_plan = (Plan *) make_sort_from_pathkeys(root, + result_plan, + root->sort_pathkeys, + limit_tuples); + current_pathkeys = root->sort_pathkeys; + } } /* @@ -1622,6 +1727,31 @@ extract_grouping_ops(List *groupClause) return groupOperators; } +/* + * extract_grouping_cols - make an array of the grouping column resnos + * for a SortGroupClause list + */ +static AttrNumber * +extract_grouping_cols(List *groupClause, List *tlist) +{ + AttrNumber *grpColIdx; + int numCols = list_length(groupClause); + int colno = 0; + ListCell *glitem; + + grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpColIdx[colno++] = tle->resno; + } + + return grpColIdx; +} + /* * grouping_is_sortable - is it possible to implement grouping list by sorting? * @@ -1680,6 +1810,7 @@ choose_hashed_grouping(PlannerInfo *root, double cheapest_path_rows; int cheapest_path_width; Size hashentrysize; + List *target_pathkeys; List *current_pathkeys; Path hashed_p; Path sorted_p; @@ -1716,6 +1847,20 @@ choose_hashed_grouping(PlannerInfo *root, if (hashentrysize * dNumGroups > work_mem * 1024L) return false; + /* + * When we have both GROUP BY and DISTINCT, use the more-rigorous of + * DISTINCT and ORDER BY as the assumed required output sort order. + * This is an oversimplification because the DISTINCT might get + * implemented via hashing, but it's not clear that the case is common + * enough (or that our estimates are good enough) to justify trying to + * solve it exactly. + */ + if (list_length(root->distinct_pathkeys) > + list_length(root->sort_pathkeys)) + target_pathkeys = root->distinct_pathkeys; + else + target_pathkeys = root->sort_pathkeys; + /* * See if the estimated cost is no more than doing it the other way. While * avoiding the need for sorted input is usually a win, the fact that the @@ -1737,8 +1882,8 @@ choose_hashed_grouping(PlannerInfo *root, cheapest_path->startup_cost, cheapest_path->total_cost, cheapest_path_rows); /* Result of hashed agg is always unsorted */ - if (root->sort_pathkeys) - cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, + if (target_pathkeys) + cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost, dNumGroups, cheapest_path_width, limit_tuples); if (sorted_path) @@ -1770,9 +1915,9 @@ choose_hashed_grouping(PlannerInfo *root, sorted_p.startup_cost, sorted_p.total_cost, cheapest_path_rows); /* The Agg or Group node will preserve ordering */ - if (root->sort_pathkeys && - !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) - cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, + if (target_pathkeys && + !pathkeys_contained_in(target_pathkeys, current_pathkeys)) + cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost, dNumGroups, cheapest_path_width, limit_tuples); /* @@ -1791,6 +1936,111 @@ choose_hashed_grouping(PlannerInfo *root, return false; } +/* + * choose_hashed_distinct - should we use hashing for DISTINCT? + * + * This is fairly similar to choose_hashed_grouping, but there are enough + * differences that it doesn't seem worth trying to unify the two functions. + * + * But note that making the two choices independently is a bit bogus in + * itself. If the two could be combined into a single choice operation + * it'd probably be better, but that seems far too unwieldy to be practical, + * especially considering that the combination of GROUP BY and DISTINCT + * isn't very common in real queries. By separating them, we are giving + * extra preference to using a sorting implementation when a common sort key + * is available ... and that's not necessarily wrong anyway. + * + * Note: this is only applied when both alternatives are actually feasible. + */ +static bool +choose_hashed_distinct(PlannerInfo *root, + Plan *input_plan, List *input_pathkeys, + double tuple_fraction, double limit_tuples, + double dNumDistinctRows) +{ + int numDistinctCols = list_length(root->parse->distinctClause); + Size hashentrysize; + List *current_pathkeys; + Path hashed_p; + Path sorted_p; + + /* Prefer sorting when enable_hashagg is off */ + if (!enable_hashagg) + return false; + + /* + * Don't do it if it doesn't look like the hashtable will fit into + * work_mem. + */ + hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData)); + + if (hashentrysize * dNumDistinctRows > work_mem * 1024L) + return false; + + /* + * See if the estimated cost is no more than doing it the other way. While + * avoiding the need for sorted input is usually a win, the fact that the + * output won't be sorted may be a loss; so we need to do an actual cost + * comparison. + * + * We need to consider input_plan + hashagg [+ final sort] versus + * input_plan [+ sort] + group [+ final sort] where brackets indicate + * a step that may not be needed. + * + * These path variables are dummies that just hold cost fields; we don't + * make actual Paths for these steps. + */ + cost_agg(&hashed_p, root, AGG_HASHED, 0, + numDistinctCols, dNumDistinctRows, + input_plan->startup_cost, input_plan->total_cost, + input_plan->plan_rows); + /* + * Result of hashed agg is always unsorted, so if ORDER BY is present + * we need to charge for the final sort. + */ + if (root->parse->sortClause) + cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, + dNumDistinctRows, input_plan->plan_width, limit_tuples); + + /* Now for the GROUP case ... */ + sorted_p.startup_cost = input_plan->startup_cost; + sorted_p.total_cost = input_plan->total_cost; + current_pathkeys = input_pathkeys; + if (!pathkeys_contained_in(root->distinct_pathkeys, current_pathkeys)) + { + /* We don't want to sort twice */ + if (list_length(root->distinct_pathkeys) >= + list_length(root->sort_pathkeys)) + current_pathkeys = root->distinct_pathkeys; + else + current_pathkeys = root->sort_pathkeys; + cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost, + input_plan->plan_rows, input_plan->plan_width, -1.0); + } + cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows, + sorted_p.startup_cost, sorted_p.total_cost, + input_plan->plan_rows); + if (root->parse->sortClause && + !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, + dNumDistinctRows, input_plan->plan_width, limit_tuples); + + /* + * Now make the decision using the top-level tuple fraction. First we + * have to convert an absolute count (LIMIT) into fractional form. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= dNumDistinctRows; + + if (compare_fractional_path_costs(&hashed_p, &sorted_p, + tuple_fraction) < 0) + { + /* Hashed is cheaper, so use it */ + return true; + } + return false; +} + /*--------------- * make_subplanTargetList * Generate appropriate target list when grouping is required. @@ -1857,7 +2107,7 @@ make_subplanTargetList(PlannerInfo *root, /* * Otherwise, start with a "flattened" tlist (having just the vars - * mentioned in the targetlist and HAVING qual --- but not upper- level + * mentioned in the targetlist and HAVING qual --- but not upper-level * Vars; they will be replaced by Params later on). */ sub_tlist = flatten_tlist(tlist); @@ -1886,16 +2136,28 @@ make_subplanTargetList(PlannerInfo *root, SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); TargetEntry *te = NULL; - ListCell *sl; - /* Find or make a matching sub_tlist entry */ - foreach(sl, sub_tlist) + /* + * Find or make a matching sub_tlist entry. If the groupexpr + * isn't a Var, no point in searching. (Note that the parser + * won't make multiple groupClause entries for the same TLE.) + */ + if (groupexpr && IsA(groupexpr, Var)) { - te = (TargetEntry *) lfirst(sl); - if (equal(groupexpr, te->expr)) - break; + ListCell *sl; + + foreach(sl, sub_tlist) + { + TargetEntry *lte = (TargetEntry *) lfirst(sl); + + if (equal(groupexpr, lte->expr)) + { + te = lte; + break; + } + } } - if (!sl) + if (!te) { te = makeTargetEntry((Expr *) groupexpr, list_length(sub_tlist) + 1, diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 76e59c82d6edee77066ce70064769b29999ae7bd..2b04ee5e33783301dc2450cd9b22c8798e01520e 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.174 2008/08/05 02:43:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1447,9 +1447,6 @@ transformDistinctClause(ParseState *pstate, /* * Now add any remaining non-resjunk tlist items, using default * sort/group semantics for their data types. - * - * XXX for now, the planner requires distinctClause to be sortable, - * so we have to insist on that here. */ foreach(tlitem, *targetlist) { @@ -1459,8 +1456,7 @@ transformDistinctClause(ParseState *pstate, continue; /* ignore junk */ result = addTargetToGroupList(pstate, tle, result, *targetlist, - true, /* XXX for now */ - true); + false, true); } return result; @@ -1555,8 +1551,7 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist, errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); result = addTargetToGroupList(pstate, tle, result, *targetlist, - true, /* someday allow hash-only? */ - true); + false, true); } return result; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 8476d7e85c141bc0a06273cd3a5ccbad883f52d5..f8c23071661b50086aa26287915fc5082354a1ae 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.156 2008/04/21 20:54:15 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.157 2008/08/05 02:43:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -162,8 +162,9 @@ typedef struct PlannerInfo List *query_pathkeys; /* desired pathkeys for query_planner(), and * actual pathkeys afterwards */ - List *group_pathkeys; /* groupClause pathkeys, if any */ - List *sort_pathkeys; /* sortClause pathkeys, if any */ + List *group_pathkeys; /* groupClause pathkeys, if any */ + List *distinct_pathkeys; /* distinctClause pathkeys, if any */ + List *sort_pathkeys; /* sortClause pathkeys, if any */ List *initial_rels; /* RelOptInfos we are now trying to join */ diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out index c5ad36fdd32df016b14ccd5ee49823117619660d..d404d9db68121049455a14ab3a97abfbed467187 100644 --- a/src/test/regress/expected/numerology.out +++ b/src/test/regress/expected/numerology.out @@ -79,7 +79,7 @@ INSERT INTO TEMP_GROUP INSERT INTO TEMP_GROUP SELECT 2, i.f1, f.f1 FROM INT4_TBL i, FLOAT8_TBL f; -SELECT DISTINCT f1 AS two FROM TEMP_GROUP; +SELECT DISTINCT f1 AS two FROM TEMP_GROUP ORDER BY 1; two ----- 1 diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 69f0efb8d42206d2a829cbba1e9eed96b6c15add..533bac3ab6705bc6ff5be990b68a3e84f7e4cf11 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -129,7 +129,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.prorettype < p2.prorettype); + (p1.prorettype < p2.prorettype) +ORDER BY 1, 2; prorettype | prorettype ------------+------------ 25 | 1043 @@ -142,7 +143,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[0] < p2.proargtypes[0]); + (p1.proargtypes[0] < p2.proargtypes[0]) +ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- 25 | 1042 @@ -158,7 +160,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[1] < p2.proargtypes[1]); + (p1.proargtypes[1] < p2.proargtypes[1]) +ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- 23 | 28 @@ -173,7 +176,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[2] < p2.proargtypes[2]); + (p1.proargtypes[2] < p2.proargtypes[2]) +ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- 1114 | 1184 @@ -185,7 +189,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[3] < p2.proargtypes[3]); + (p1.proargtypes[3] < p2.proargtypes[3]) +ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- 1114 | 1184 @@ -197,7 +202,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[4] < p2.proargtypes[4]); + (p1.proargtypes[4] < p2.proargtypes[4]) +ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- (0 rows) @@ -208,7 +214,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[5] < p2.proargtypes[5]); + (p1.proargtypes[5] < p2.proargtypes[5]) +ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- (0 rows) @@ -219,7 +226,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[6] < p2.proargtypes[6]); + (p1.proargtypes[6] < p2.proargtypes[6]) +ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- (0 rows) @@ -230,7 +238,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[7] < p2.proargtypes[7]); + (p1.proargtypes[7] < p2.proargtypes[7]) +ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- (0 rows) diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index abe34ae7ae5966a54d21f216cfdade115667ae43..fe64fe0d9c175909a93ce388b3ffd121a5d46628 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -14,7 +14,7 @@ SELECT DISTINCT two FROM tmp; -- -- awk '{print $5;}' onek.data | sort -n | uniq -- -SELECT DISTINCT ten FROM tmp; +SELECT DISTINCT ten FROM tmp ORDER BY 1; ten ----- 0 @@ -32,7 +32,7 @@ SELECT DISTINCT ten FROM tmp; -- -- awk '{print $16;}' onek.data | sort -d | uniq -- -SELECT DISTINCT string4 FROM tmp; +SELECT DISTINCT string4 FROM tmp ORDER BY 1; string4 --------- AAAAxx diff --git a/src/test/regress/input/misc.source b/src/test/regress/input/misc.source index c82b1405978628f002254f10abb0cba52f83fef2..c5813630625ec965e79d482626157d2942fc6fc6 100644 --- a/src/test/regress/input/misc.source +++ b/src/test/regress/input/misc.source @@ -183,7 +183,8 @@ SELECT p.name, name(p.hobbies) FROM person* p; -- the next two queries demonstrate how functions generate bogus duplicates. -- this is a "feature" .. -- -SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r; +SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r + ORDER BY 1,2; SELECT hobbies_r.name, (hobbies_r.equipment).name FROM hobbies_r; diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source index e409c0a10019c2c99d1f91ba9a853e482616cafa..91e0d2b04d79dab77c3dab10c274e7a496c856f1 100644 --- a/src/test/regress/output/misc.source +++ b/src/test/regress/output/misc.source @@ -469,7 +469,8 @@ SELECT p.name, name(p.hobbies) FROM person* p; -- the next two queries demonstrate how functions generate bogus duplicates. -- this is a "feature" .. -- -SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r; +SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r + ORDER BY 1,2; name | name -------------+--------------- basketball | hightops diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql index 2220fdba38587ea1599124ac01b3a2cc8ddf0dc1..6626cf20ebc5d10a38b093b98bab812e2b9b4972 100644 --- a/src/test/regress/sql/numerology.sql +++ b/src/test/regress/sql/numerology.sql @@ -63,7 +63,7 @@ INSERT INTO TEMP_GROUP SELECT 2, i.f1, f.f1 FROM INT4_TBL i, FLOAT8_TBL f; -SELECT DISTINCT f1 AS two FROM TEMP_GROUP; +SELECT DISTINCT f1 AS two FROM TEMP_GROUP ORDER BY 1; SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float FROM TEMP_GROUP diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index e2ab6e57d4a36790b8b5b73db41f7e5ac2a85206..5017849830aa612d14a1359eb8c2cd3f9e622f7f 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -121,7 +121,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.prorettype < p2.prorettype); + (p1.prorettype < p2.prorettype) +ORDER BY 1, 2; SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0] FROM pg_proc AS p1, pg_proc AS p2 @@ -129,7 +130,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[0] < p2.proargtypes[0]); + (p1.proargtypes[0] < p2.proargtypes[0]) +ORDER BY 1, 2; SELECT DISTINCT p1.proargtypes[1], p2.proargtypes[1] FROM pg_proc AS p1, pg_proc AS p2 @@ -137,7 +139,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[1] < p2.proargtypes[1]); + (p1.proargtypes[1] < p2.proargtypes[1]) +ORDER BY 1, 2; SELECT DISTINCT p1.proargtypes[2], p2.proargtypes[2] FROM pg_proc AS p1, pg_proc AS p2 @@ -145,7 +148,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[2] < p2.proargtypes[2]); + (p1.proargtypes[2] < p2.proargtypes[2]) +ORDER BY 1, 2; SELECT DISTINCT p1.proargtypes[3], p2.proargtypes[3] FROM pg_proc AS p1, pg_proc AS p2 @@ -153,7 +157,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[3] < p2.proargtypes[3]); + (p1.proargtypes[3] < p2.proargtypes[3]) +ORDER BY 1, 2; SELECT DISTINCT p1.proargtypes[4], p2.proargtypes[4] FROM pg_proc AS p1, pg_proc AS p2 @@ -161,7 +166,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[4] < p2.proargtypes[4]); + (p1.proargtypes[4] < p2.proargtypes[4]) +ORDER BY 1, 2; SELECT DISTINCT p1.proargtypes[5], p2.proargtypes[5] FROM pg_proc AS p1, pg_proc AS p2 @@ -169,7 +175,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[5] < p2.proargtypes[5]); + (p1.proargtypes[5] < p2.proargtypes[5]) +ORDER BY 1, 2; SELECT DISTINCT p1.proargtypes[6], p2.proargtypes[6] FROM pg_proc AS p1, pg_proc AS p2 @@ -177,7 +184,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[6] < p2.proargtypes[6]); + (p1.proargtypes[6] < p2.proargtypes[6]) +ORDER BY 1, 2; SELECT DISTINCT p1.proargtypes[7], p2.proargtypes[7] FROM pg_proc AS p1, pg_proc AS p2 @@ -185,7 +193,8 @@ WHERE p1.oid != p2.oid AND p1.prosrc = p2.prosrc AND p1.prolang = 12 AND p2.prolang = 12 AND NOT p1.proisagg AND NOT p2.proisagg AND - (p1.proargtypes[7] < p2.proargtypes[7]); + (p1.proargtypes[7] < p2.proargtypes[7]) +ORDER BY 1, 2; -- Look for functions that return type "internal" and do not have any -- "internal" argument. Such a function would be a security hole since diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index c4a63aaf16fd9788b10523cc4d173bf31520bf65..7416e0194e172090810e7ba70fb4cfc00b618220 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -10,12 +10,12 @@ SELECT DISTINCT two FROM tmp; -- -- awk '{print $5;}' onek.data | sort -n | uniq -- -SELECT DISTINCT ten FROM tmp; +SELECT DISTINCT ten FROM tmp ORDER BY 1; -- -- awk '{print $16;}' onek.data | sort -d | uniq -- -SELECT DISTINCT string4 FROM tmp; +SELECT DISTINCT string4 FROM tmp ORDER BY 1; -- -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |