diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 88cdbc3294737499bff61f838b0283b1f886eb22..6cae9e8b48935a112975673fbfc36e669b663ab3 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -702,17 +702,37 @@ intermediate layers of joins, for example: -> Index Scan using C_Z_IDX on C Index Condition: C.Z = A.X -If all joins are plain inner joins then this is unnecessary, because -it's always possible to reorder the joins so that a parameter is used +If all joins are plain inner joins then this is usually unnecessary, +because it's possible to reorder the joins so that a parameter is used immediately below the nestloop node that provides it. But in the -presence of outer joins, join reordering may not be possible, and then -this option can be critical. Before version 9.2, Postgres used ad-hoc -methods for planning and executing such queries, and those methods could -not handle passing parameters down through multiple join levels. +presence of outer joins, such join reordering may not be possible. + +Also, the bottom-level scan might require parameters from more than one +other relation. In principle we could join the other relations first +so that all the parameters are supplied from a single nestloop level. +But if those other relations have no join clause in common (which is +common in star-schema queries for instance), the planner won't consider +joining them directly to each other. In such a case we need to be able +to create a plan like + + NestLoop + -> Seq Scan on SmallTable1 A + NestLoop + -> Seq Scan on SmallTable2 B + NestLoop + -> Index Scan using XYIndex on LargeTable C + Index Condition: C.X = A.AID and C.Y = B.BID + +so we should be willing to pass down A.AID through a join even though +there is no join order constraint forcing the plan to look like this. + +Before version 9.2, Postgres used ad-hoc methods for planning and +executing nestloop queries of this kind, and those methods could not +handle passing parameters down through multiple join levels. To plan such queries, we now use a notion of a "parameterized path", which is a path that makes use of a join clause to a relation that's not -scanned by the path. In the example just above, we would construct a +scanned by the path. In the example two above, we would construct a path representing the possibility of doing this: -> Index Scan using C_Z_IDX on C diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index e6aa21c45f5909fb74d3dcade29893d6144616cb..1da953f6d335d89729f48db3423473016333eb56 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -117,13 +117,14 @@ add_paths_to_joinrel(PlannerInfo *root, /* * Decide whether it's sensible to generate parameterized paths for this * joinrel, and if so, which relations such paths should require. There - * is no need to create a parameterized result path unless there is a join - * order restriction that prevents joining one of our input rels directly - * to the parameter source rel instead of joining to the other input rel. - * This restriction reduces the number of parameterized paths we have to - * deal with at higher join levels, without compromising the quality of - * the resulting plan. We express the restriction as a Relids set that - * must overlap the parameterization of any proposed join path. + * is usually no need to create a parameterized result path unless there + * is a join order restriction that prevents joining one of our input rels + * directly to the parameter source rel instead of joining to the other + * input rel. (But see exception in try_nestloop_path.) This restriction + * reduces the number of parameterized paths we have to deal with at + * higher join levels, without compromising the quality of the resulting + * plan. We express the restriction as a Relids set that must overlap the + * parameterization of any proposed join path. */ foreach(lc, root->join_info_list) { @@ -291,9 +292,29 @@ try_nestloop_path(PlannerInfo *root, if (required_outer && !bms_overlap(required_outer, param_source_rels)) { - /* Waste no memory when we reject a path here */ - bms_free(required_outer); - return; + /* + * We override the param_source_rels heuristic to accept nestloop + * paths in which the outer rel satisfies some but not all of the + * inner path's parameterization. This is necessary to get good plans + * for star-schema scenarios, in which a parameterized path for a + * large table may require parameters from multiple small tables that + * will not get joined directly to each other. We can handle that by + * stacking nestloops that have the small tables on the outside; but + * this breaks the rule the param_source_rels heuristic is based on, + * namely that parameters should not be passed down across joins + * unless there's a join-order-constraint-based reason to do so. So + * ignore the param_source_rels restriction when this case applies. + */ + Relids outerrelids = outer_path->parent->relids; + Relids innerparams = PATH_REQ_OUTER(inner_path); + + if (!(bms_overlap(innerparams, outerrelids) && + bms_nonempty_difference(innerparams, outerrelids))) + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + return; + } } /* diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 25011847a043270b67b499d4dfdfbf1ad86a3269..ca3a17b283ed876798f256fff1b7da8ec59d72aa 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2780,6 +2780,25 @@ where thousand = (q1 + q2); -> Seq Scan on int4_tbl (12 rows) +-- +-- test ability to generate a suitable plan for a star-schema query +-- +explain (costs off) +select * from + tenk1, int8_tbl a, int8_tbl b +where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop + -> Seq Scan on int8_tbl b + Filter: (q2 = 2) + -> Nested Loop + -> Seq Scan on int8_tbl a + Filter: (q2 = 1) + -> Index Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = a.q1) AND (tenthous = b.q1)) +(8 rows) + -- -- test extraction of restriction OR clauses from join OR clause -- (we used to only do this for indexable clauses) diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 718e1d9fc381ff11814bc5745f89d6a8d9487b04..6005476c16289a96a6e24b81c3763701bcea03da 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -763,6 +763,15 @@ select * from int4(sin(0)) q2 where thousand = (q1 + q2); +-- +-- test ability to generate a suitable plan for a star-schema query +-- + +explain (costs off) +select * from + tenk1, int8_tbl a, int8_tbl b +where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2; + -- -- test extraction of restriction OR clauses from join OR clause -- (we used to only do this for indexable clauses)