From 1bd25c03487dd2f96035f88c77fc0d89bb438fac Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Sun, 7 Jul 2013 22:37:32 -0400 Subject: [PATCH] Fix planning of parameterized appendrel paths with expensive join quals. The code in set_append_rel_pathlist() for building parameterized paths for append relations (inheritance and UNION ALL combinations) supposed that the cheapest regular path for a child relation would still be cheapest when reparameterized. Which might not be the case, particularly if the added join conditions are expensive to compute, as in a recent example from Jeff Janes. Fix it to compare child path costs *after* reparameterizing. We can short-circuit that if the cheapest pre-existing path is already parameterized correctly, which seems likely to be true often enough to be worth checking for. Back-patch to 9.2 where parameterized paths were introduced. --- src/backend/optimizer/path/allpaths.c | 108 +++++++++++++++++++++----- src/test/regress/expected/union.out | 34 ++++++++ src/test/regress/sql/union.sql | 21 +++++ 3 files changed, 142 insertions(+), 21 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 5c8e5c8d1f8..349040237c6 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -68,6 +68,9 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, List *live_childrels, List *all_child_pathkeys); +static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, + RelOptInfo *rel, + Relids required_outer); static List *accumulate_append_subpath(List *subpaths, Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, @@ -803,7 +806,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, foreach(l, all_child_outers) { Relids required_outer = (Relids) lfirst(l); - bool ok = true; + bool subpaths_valid = true; ListCell *lcr; /* Select the child paths for an Append with this parameterization */ @@ -811,31 +814,21 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, foreach(lcr, live_childrels) { RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); - Path *cheapest_total; + Path *subpath; - cheapest_total = - get_cheapest_path_for_pathkeys(childrel->pathlist, - NIL, - required_outer, - TOTAL_COST); - Assert(cheapest_total != NULL); - - /* Children must have exactly the desired parameterization */ - if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer)) + subpath = get_cheapest_parameterized_child_path(root, + childrel, + required_outer); + if (subpath == NULL) { - cheapest_total = reparameterize_path(root, cheapest_total, - required_outer, 1.0); - if (cheapest_total == NULL) - { - ok = false; - break; - } + /* failed to make a suitable path for this child */ + subpaths_valid = false; + break; } - - subpaths = accumulate_append_subpath(subpaths, cheapest_total); + subpaths = accumulate_append_subpath(subpaths, subpath); } - if (ok) + if (subpaths_valid) add_path(rel, (Path *) create_append_path(rel, subpaths, required_outer)); } @@ -941,6 +934,79 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, } } +/* + * get_cheapest_parameterized_child_path + * Get cheapest path for this relation that has exactly the requested + * parameterization. + * + * Returns NULL if unable to create such a path. + */ +static Path * +get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *cheapest; + ListCell *lc; + + /* + * Look up the cheapest existing path with no more than the needed + * parameterization. If it has exactly the needed parameterization, we're + * done. + */ + cheapest = get_cheapest_path_for_pathkeys(rel->pathlist, + NIL, + required_outer, + TOTAL_COST); + Assert(cheapest != NULL); + if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer)) + return cheapest; + + /* + * Otherwise, we can "reparameterize" an existing path to match the given + * parameterization, which effectively means pushing down additional + * joinquals to be checked within the path's scan. However, some existing + * paths might check the available joinquals already while others don't; + * therefore, it's not clear which existing path will be cheapest after + * reparameterization. We have to go through them all and find out. + */ + cheapest = NULL; + foreach(lc, rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + /* Can't use it if it needs more than requested parameterization */ + if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer)) + continue; + + /* + * Reparameterization can only increase the path's cost, so if it's + * already more expensive than the current cheapest, forget it. + */ + if (cheapest != NULL && + compare_path_costs(cheapest, path, TOTAL_COST) <= 0) + continue; + + /* Reparameterize if needed, then recheck cost */ + if (!bms_equal(PATH_REQ_OUTER(path), required_outer)) + { + path = reparameterize_path(root, path, required_outer, 1.0); + if (path == NULL) + continue; /* failed to reparameterize this one */ + Assert(bms_equal(PATH_REQ_OUTER(path), required_outer)); + + if (cheapest != NULL && + compare_path_costs(cheapest, path, TOTAL_COST) <= 0) + continue; + } + + /* We have a new best path */ + cheapest = path; + } + + /* Return the best path, or NULL if we found no suitable candidate */ + return cheapest; +} + /* * accumulate_append_subpath * Add a subpath to the list being built for an Append or MergeAppend diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index aafa514e435..b7c4c7b3121 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -605,3 +605,37 @@ WHERE x > 3; 2 | 4 (1 row) +-- Test proper handling of parameterized appendrel paths when the +-- potential join qual is expensive +create function expensivefunc(int) returns int +language plpgsql immutable strict cost 10000 +as $$begin return $1; end$$; +create temp table t3 as select generate_series(-1000,1000) as x; +create index t3i on t3 (expensivefunc(x)); +analyze t3; +explain (costs off) +select * from + (select * from t3 a union all select * from t3 b) ss + join int4_tbl on f1 = expensivefunc(x); + QUERY PLAN +------------------------------------------------------------ + Nested Loop + -> Seq Scan on int4_tbl + -> Append + -> Index Scan using t3i on t3 a + Index Cond: (expensivefunc(x) = int4_tbl.f1) + -> Index Scan using t3i on t3 b + Index Cond: (expensivefunc(x) = int4_tbl.f1) +(7 rows) + +select * from + (select * from t3 a union all select * from t3 b) ss + join int4_tbl on f1 = expensivefunc(x); + x | f1 +---+---- + 0 | 0 + 0 | 0 +(2 rows) + +drop table t3; +drop function expensivefunc(int); diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index f3c9d113827..6194ce47511 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -249,3 +249,24 @@ SELECT * FROM UNION SELECT 2 AS t, 4 AS x) ss WHERE x > 3; + +-- Test proper handling of parameterized appendrel paths when the +-- potential join qual is expensive +create function expensivefunc(int) returns int +language plpgsql immutable strict cost 10000 +as $$begin return $1; end$$; + +create temp table t3 as select generate_series(-1000,1000) as x; +create index t3i on t3 (expensivefunc(x)); +analyze t3; + +explain (costs off) +select * from + (select * from t3 a union all select * from t3 b) ss + join int4_tbl on f1 = expensivefunc(x); +select * from + (select * from t3 a union all select * from t3 b) ss + join int4_tbl on f1 = expensivefunc(x); + +drop table t3; +drop function expensivefunc(int); -- GitLab