diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index fdaa9648f25185e79803843fcd29447112e50d07..99629e602aac1b672d0bc05acca4f9574724c88e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -42,6 +42,14 @@ #include "utils/lsyscache.h" +/* results of subquery_is_pushdown_safe */ +typedef struct pushdown_safety_info +{ + bool *unsafeColumns; /* which output columns are unsafe to use */ + bool unsafeVolatile; /* don't push down volatile quals */ + bool unsafeLeaky; /* don't push down leaky quals */ +} pushdown_safety_info; + /* These parameters are set by GUC */ bool enable_geqo = false; /* just in case GUC doesn't set it */ int geqo_threshold; @@ -88,14 +96,15 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *unsafeColumns); + pushdown_safety_info *safetyInfo); static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *unsafeColumns); -static void check_output_expressions(Query *subquery, bool *unsafeColumns); + pushdown_safety_info *safetyInfo); +static void check_output_expressions(Query *subquery, + pushdown_safety_info *safetyInfo); static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *unsafeColumns); + pushdown_safety_info *safetyInfo); static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *unsafeColumns); + pushdown_safety_info *safetyInfo); static void subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual); static void recurse_push_qual(Node *setOp, Query *topquery, @@ -1119,7 +1128,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Query *parse = root->parse; Query *subquery = rte->subquery; Relids required_outer; - bool *unsafeColumns; + pushdown_safety_info safetyInfo; double tuple_fraction; PlannerInfo *subroot; List *pathkeys; @@ -1139,13 +1148,25 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, required_outer = rel->lateral_relids; /* - * We need a workspace for keeping track of unsafe-to-reference columns. - * unsafeColumns[i] is set TRUE if we've found that output column i of the - * subquery is unsafe to use in a pushed-down qual. + * Zero out result area for subquery_is_pushdown_safe, so that it can set + * flags as needed while recursing. In particular, we need a workspace + * for keeping track of unsafe-to-reference columns. unsafeColumns[i] + * will be set TRUE if we find that output column i of the subquery is + * unsafe to use in a pushed-down qual. */ - unsafeColumns = (bool *) + memset(&safetyInfo, 0, sizeof(safetyInfo)); + safetyInfo.unsafeColumns = (bool *) palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); + /* + * If the subquery has the "security_barrier" flag, it means the subquery + * originated from a view that must enforce row-level security. Then we + * must not push down quals that contain leaky functions. (Ideally this + * would be checked inside subquery_is_pushdown_safe, but since we don't + * currently pass the RTE to that function, we must do it here.) + */ + safetyInfo.unsafeLeaky = rte->security_barrier; + /* * If there are any restriction clauses that have been attached to the * subquery relation, consider pushing them down to become WHERE or HAVING @@ -1160,10 +1181,6 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, * pseudoconstant clauses; better to have the gating node above the * subquery. * - * Also, if the sub-query has the "security_barrier" flag, it means the - * sub-query originated from a view that must enforce row-level security. - * Then we must not push down quals that contain leaky functions. - * * Non-pushed-down clauses will get evaluated as qpquals of the * SubqueryScan node. * @@ -1171,7 +1188,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, * push down a pushable qual, because it'd result in a worse plan? */ if (rel->baserestrictinfo != NIL && - subquery_is_pushdown_safe(subquery, subquery, unsafeColumns)) + subquery_is_pushdown_safe(subquery, subquery, &safetyInfo)) { /* OK to consider pushing down individual quals */ List *upperrestrictlist = NIL; @@ -1183,9 +1200,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Node *clause = (Node *) rinfo->clause; if (!rinfo->pseudoconstant && - (!rte->security_barrier || - !contain_leaky_functions(clause)) && - qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns)) + qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo)) { /* Push it down */ subquery_push_qual(subquery, rte, rti, clause); @@ -1199,7 +1214,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, rel->baserestrictinfo = upperrestrictlist; } - pfree(unsafeColumns); + pfree(safetyInfo.unsafeColumns); /* * The upper query might not use all the subquery's output columns; if @@ -1679,19 +1694,39 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push * quals into it, because that could change the results. * - * In addition, we make several checks on the subquery's output columns - * to see if it is safe to reference them in pushed-down quals. If output - * column k is found to be unsafe to reference, we set unsafeColumns[k] to - * TRUE, but we don't reject the subquery overall since column k might - * not be referenced by some/all quals. The unsafeColumns[] array will be - * consulted later by qual_is_pushdown_safe(). It's better to do it this - * way than to make the checks directly in qual_is_pushdown_safe(), because - * when the subquery involves set operations we have to check the output + * 4. If the subquery uses DISTINCT, we cannot push volatile quals into it. + * This is because upper-level quals should semantically be evaluated only + * once per distinct row, not once per original row, and if the qual is + * volatile then extra evaluations could change the results. (This issue + * does not apply to other forms of aggregation such as GROUP BY, because + * when those are present we push into HAVING not WHERE, so that the quals + * are still applied after aggregation.) + * + * In addition, we make several checks on the subquery's output columns to see + * if it is safe to reference them in pushed-down quals. If output column k + * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k] + * to TRUE, but we don't reject the subquery overall since column k might not + * be referenced by some/all quals. The unsafeColumns[] array will be + * consulted later by qual_is_pushdown_safe(). It's better to do it this way + * than to make the checks directly in qual_is_pushdown_safe(), because when + * the subquery involves set operations we have to check the output * expressions in each arm of the set op. + * + * Note: pushing quals into a DISTINCT subquery is theoretically dubious: + * we're effectively assuming that the quals cannot distinguish values that + * the DISTINCT's equality operator sees as equal, yet there are many + * counterexamples to that assumption. However use of such a qual with a + * DISTINCT subquery would be unsafe anyway, since there's no guarantee which + * "equal" value will be chosen as the output value by the DISTINCT operation. + * So we don't worry too much about that. Another objection is that if the + * qual is expensive to evaluate, running it for each original row might cost + * more than we save by eliminating rows before the DISTINCT step. But it + * would be very hard to estimate that at this stage, and in practice pushdown + * seldom seems to make things worse, so we ignore that problem too. */ static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *unsafeColumns) + pushdown_safety_info *safetyInfo) { SetOperationStmt *topop; @@ -1703,6 +1738,10 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, if (subquery->hasWindowFuncs) return false; + /* Check point 4 */ + if (subquery->distinctClause) + safetyInfo->unsafeVolatile = true; + /* * If we're at a leaf query, check for unsafe expressions in its target * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in @@ -1710,7 +1749,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, * them.) */ if (subquery->setOperations == NULL) - check_output_expressions(subquery, unsafeColumns); + check_output_expressions(subquery, safetyInfo); /* Are we at top level, or looking at a setop component? */ if (subquery == topquery) @@ -1718,7 +1757,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, /* Top level, so check any component queries */ if (subquery->setOperations != NULL) if (!recurse_pushdown_safe(subquery->setOperations, topquery, - unsafeColumns)) + safetyInfo)) return false; } else @@ -1731,7 +1770,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, Assert(topop && IsA(topop, SetOperationStmt)); compare_tlist_datatypes(subquery->targetList, topop->colTypes, - unsafeColumns); + safetyInfo); } return true; } @@ -1741,7 +1780,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, */ static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *unsafeColumns) + pushdown_safety_info *safetyInfo) { if (IsA(setOp, RangeTblRef)) { @@ -1750,7 +1789,7 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, Query *subquery = rte->subquery; Assert(subquery != NULL); - return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns); + return subquery_is_pushdown_safe(subquery, topquery, safetyInfo); } else if (IsA(setOp, SetOperationStmt)) { @@ -1760,9 +1799,9 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, if (op->op == SETOP_EXCEPT) return false; /* Else recurse */ - if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns)) + if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo)) return false; - if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns)) + if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo)) return false; } else @@ -1793,14 +1832,12 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, * 3. If the subquery uses DISTINCT ON, we must not push down any quals that * refer to non-DISTINCT output columns, because that could change the set * of rows returned. (This condition is vacuous for DISTINCT, because then - * there are no non-DISTINCT output columns, so we needn't check. But note - * we are assuming that the qual can't distinguish values that the DISTINCT - * operator sees as equal. This is a bit shaky but we have no way to test - * for the case, and it's unlikely enough that we shouldn't refuse the - * optimization just because it could theoretically happen.) + * there are no non-DISTINCT output columns, so we needn't check. Note that + * subquery_is_pushdown_safe already reported that we can't use volatile + * quals if there's DISTINCT or DISTINCT ON.) */ static void -check_output_expressions(Query *subquery, bool *unsafeColumns) +check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo) { ListCell *lc; @@ -1812,20 +1849,20 @@ check_output_expressions(Query *subquery, bool *unsafeColumns) continue; /* ignore resjunk columns */ /* We need not check further if output col is already known unsafe */ - if (unsafeColumns[tle->resno]) + if (safetyInfo->unsafeColumns[tle->resno]) continue; /* Functions returning sets are unsafe (point 1) */ if (expression_returns_set((Node *) tle->expr)) { - unsafeColumns[tle->resno] = true; + safetyInfo->unsafeColumns[tle->resno] = true; continue; } /* Volatile functions are unsafe (point 2) */ if (contain_volatile_functions((Node *) tle->expr)) { - unsafeColumns[tle->resno] = true; + safetyInfo->unsafeColumns[tle->resno] = true; continue; } @@ -1834,7 +1871,7 @@ check_output_expressions(Query *subquery, bool *unsafeColumns) !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) { /* non-DISTINCT column, so mark it unsafe */ - unsafeColumns[tle->resno] = true; + safetyInfo->unsafeColumns[tle->resno] = true; continue; } } @@ -1855,11 +1892,11 @@ check_output_expressions(Query *subquery, bool *unsafeColumns) * * tlist is a subquery tlist. * colTypes is an OID list of the top-level setop's output column types. - * unsafeColumns[] is the result array. + * safetyInfo->unsafeColumns[] is the result array. */ static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *unsafeColumns) + pushdown_safety_info *safetyInfo) { ListCell *l; ListCell *colType = list_head(colTypes); @@ -1873,7 +1910,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes, if (colType == NULL) elog(ERROR, "wrong number of tlist entries"); if (exprType((Node *) tle->expr) != lfirst_oid(colType)) - unsafeColumns[tle->resno] = true; + safetyInfo->unsafeColumns[tle->resno] = true; colType = lnext(colType); } if (colType != NULL) @@ -1892,15 +1929,20 @@ compare_tlist_datatypes(List *tlist, List *colTypes, * it will work correctly: sublinks will already have been transformed into * subplans in the qual, but not in the subquery). * - * 2. The qual must not refer to the whole-row output of the subquery + * 2. If unsafeVolatile is set, the qual must not contain any volatile + * functions. + * + * 3. If unsafeLeaky is set, the qual must not contain any leaky functions. + * + * 4. The qual must not refer to the whole-row output of the subquery * (since there is no easy way to name that within the subquery itself). * - * 3. The qual must not refer to any subquery output columns that were + * 5. The qual must not refer to any subquery output columns that were * found to be unsafe to reference by subquery_is_pushdown_safe(). */ static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *unsafeColumns) + pushdown_safety_info *safetyInfo) { bool safe = true; List *vars; @@ -1910,6 +1952,16 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, if (contain_subplans(qual)) return false; + /* Refuse volatile quals if we found they'd be unsafe (point 2) */ + if (safetyInfo->unsafeVolatile && + contain_volatile_functions(qual)) + return false; + + /* Refuse leaky quals if told to (point 3) */ + if (safetyInfo->unsafeLeaky && + contain_leaky_functions(qual)) + return false; + /* * It would be unsafe to push down window function calls, but at least for * the moment we could never see any in a qual anyhow. (The same applies @@ -1944,15 +1996,15 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, Assert(var->varno == rti); Assert(var->varattno >= 0); - /* Check point 2 */ + /* Check point 4 */ if (var->varattno == 0) { safe = false; break; } - /* Check point 3 */ - if (unsafeColumns[var->varattno]) + /* Check point 5 */ + if (safetyInfo->unsafeColumns[var->varattno]) { safe = false; break; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index ce15af7378b0c7f8bc53e896df3f15eb91561d5e..0f070ef93cd2ed08047f6451c15376e14e7bf251 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -739,3 +739,32 @@ select * from int4_tbl where 0 (1 row) +-- +-- Check that volatile quals aren't pushed down past a DISTINCT: +-- nextval() should not be called more than the nominal number of times +-- +create temp sequence ts1; +select * from + (select distinct ten from tenk1) ss + where ten < 10 + nextval('ts1') + order by 1; + ten +----- + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(10 rows) + +select nextval('ts1'); + nextval +--------- + 11 +(1 row) + diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 326fd70e4a06bea8db43f24b878e1a79377c02b4..b3fb03c97fb3e67b3b202aad8710020890d552ea 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -422,3 +422,16 @@ select * from int4_tbl where select * from int4_tbl where (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in (select ten from tenk1 b); + +-- +-- Check that volatile quals aren't pushed down past a DISTINCT: +-- nextval() should not be called more than the nominal number of times +-- +create temp sequence ts1; + +select * from + (select distinct ten from tenk1) ss + where ten < 10 + nextval('ts1') + order by 1; + +select nextval('ts1');