diff --git a/doc/src/sgml/indexcost.sgml b/doc/src/sgml/indexcost.sgml index 1211653edd2a34496a01e294de7c1a4a8a5dfe03..6c8c940c100f49e540c661b8cdea5bc8c6746e0a 100644 --- a/doc/src/sgml/indexcost.sgml +++ b/doc/src/sgml/indexcost.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.14 2003/01/14 10:19:02 petere Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.15 2003/01/28 22:13:24 tgl Exp $ --> <chapter id="indexcost"> @@ -205,7 +205,8 @@ amcostestimate (Query *root, <programlisting> *indexSelectivity = clauselist_selectivity(root, indexQuals, - lfirsti(rel->relids)); + lfirsti(rel->relids), + JOIN_INNER); </programlisting> </para> </step> diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c index 941212a649ff15c6db0219cd5db69df1af12b15f..4c09a40b1d74a2385d07ee7e05c93e8cfd6f8ab7 100644 --- a/src/backend/catalog/pg_operator.c +++ b/src/backend/catalog/pg_operator.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $ * * NOTES * these routines moved here from commands/define.c and somewhat cleaned up. @@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName, typeId[0] = INTERNALOID; /* Query */ typeId[1] = OIDOID; /* operator OID */ typeId[2] = INTERNALOID; /* args list */ + typeId[3] = INT2OID; /* jointype */ - joinOid = LookupFuncName(joinName, 3, typeId); + joinOid = LookupFuncName(joinName, 4, typeId); if (!OidIsValid(joinOid)) - func_error("OperatorDef", joinName, 3, typeId, NULL); + func_error("OperatorDef", joinName, 4, typeId, NULL); } else joinOid = InvalidOid; diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 84041a566d18b3fc7fed3e7d8985143e87475842..9df0a79478230f77bc95e13b30676e05c2e2f6a6 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause, Selectivity restrictlist_selectivity(Query *root, List *restrictinfo_list, - int varRelid) + int varRelid, + JoinType jointype) { List *clauselist = get_actual_clauses(restrictinfo_list); Selectivity result; - result = clauselist_selectivity(root, clauselist, varRelid); + result = clauselist_selectivity(root, clauselist, varRelid, jointype); freeList(clauselist); return result; } @@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root, * expression clauses. The list can be empty, in which case 1.0 * must be returned. * - * See clause_selectivity() for the meaning of the varRelid parameter. + * See clause_selectivity() for the meaning of the additional parameters. * * Our basic approach is to take the product of the selectivities of the * subclauses. However, that's only right if the subclauses have independent @@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root, Selectivity clauselist_selectivity(Query *root, List *clauses, - int varRelid) + int varRelid, + JoinType jointype) { Selectivity s1 = 1.0; RangeQueryClause *rqlist = NULL; @@ -184,7 +186,7 @@ clauselist_selectivity(Query *root, } } /* Not the right form, so treat it generically. */ - s2 = clause_selectivity(root, clause, varRelid); + s2 = clause_selectivity(root, clause, varRelid, jointype); s1 = s1 * s2; } @@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause, * * When varRelid is 0, all variables are treated as variables. This * is appropriate for ordinary join clauses and restriction clauses. + * + * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER + * if the clause isn't a join clause or the context is uncertain. */ Selectivity clause_selectivity(Query *root, Node *clause, - int varRelid) + int varRelid, + JoinType jointype) { Selectivity s1 = 1.0; /* default for any unhandled clause type */ @@ -424,14 +430,16 @@ clause_selectivity(Query *root, /* inverse of the selectivity of the underlying clause */ s1 = 1.0 - clause_selectivity(root, (Node *) get_notclausearg((Expr *) clause), - varRelid); + varRelid, + jointype); } else if (and_clause(clause)) { /* share code with clauselist_selectivity() */ s1 = clauselist_selectivity(root, ((BoolExpr *) clause)->args, - varRelid); + varRelid, + jointype); } else if (or_clause(clause)) { @@ -447,7 +455,8 @@ clause_selectivity(Query *root, { Selectivity s2 = clause_selectivity(root, (Node *) lfirst(arg), - varRelid); + varRelid, + jointype); s1 = s1 + s2 - s1 * s2; } @@ -479,7 +488,8 @@ clause_selectivity(Query *root, { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, - ((OpExpr *) clause)->args); + ((OpExpr *) clause)->args, + jointype); } else { @@ -519,14 +529,16 @@ clause_selectivity(Query *root, s1 = booltestsel(root, ((BooleanTest *) clause)->booltesttype, (Node *) ((BooleanTest *) clause)->arg, - varRelid); + varRelid, + jointype); } else if (IsA(clause, RelabelType)) { /* Not sure this case is needed, but it can't hurt */ s1 = clause_selectivity(root, (Node *) ((RelabelType *) clause)->arg, - varRelid); + varRelid, + jointype); } #ifdef SELECTIVITY_DEBUG diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d18e29ad6f4872981a91920ba9863c9b62db37db..56282406129f5ab54c30f76b0cc85bc450083fe2 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -49,7 +49,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -104,7 +104,8 @@ bool enable_hashjoin = true; static Selectivity estimate_hash_bucketsize(Query *root, Var *var, int nbuckets); static bool cost_qual_eval_walker(Node *node, QualCost *total); -static Selectivity approx_selectivity(Query *root, List *quals); +static Selectivity approx_selectivity(Query *root, List *quals, + JoinType jointype); static void set_rel_width(Query *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root) */ if (path->jointype == JOIN_IN) { - Selectivity qual_selec = approx_selectivity(root, restrictlist); + Selectivity qual_selec = approx_selectivity(root, restrictlist, + path->jointype); double qptuples; qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows); @@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root) * Note: it's probably bogus to use the normal selectivity calculation * here when either the outer or inner path is a UniquePath. */ - merge_selec = approx_selectivity(root, mergeclauses); + merge_selec = approx_selectivity(root, mergeclauses, + path->jpath.jointype); cost_qual_eval(&merge_qual_cost, mergeclauses); qpquals = set_ptrDifference(restrictlist, mergeclauses); - qp_selec = approx_selectivity(root, qpquals); + qp_selec = approx_selectivity(root, qpquals, + path->jpath.jointype); cost_qual_eval(&qp_qual_cost, qpquals); freeList(qpquals); @@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root) * Note: it's probably bogus to use the normal selectivity calculation * here when either the outer or inner path is a UniquePath. */ - hash_selec = approx_selectivity(root, hashclauses); + hash_selec = approx_selectivity(root, hashclauses, + path->jpath.jointype); cost_qual_eval(&hash_qual_cost, hashclauses); qpquals = set_ptrDifference(restrictlist, hashclauses); - qp_selec = approx_selectivity(root, qpquals); + qp_selec = approx_selectivity(root, qpquals, + path->jpath.jointype); cost_qual_eval(&qp_qual_cost, qpquals); freeList(qpquals); @@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root) * Determine bucketsize fraction for inner relation. We use the * smallest bucketsize estimated for any individual hashclause; * this is undoubtedly conservative. + * + * BUT: if inner relation has been unique-ified, we can assume it's + * good for hashing. This is important both because it's the right + * answer, and because we avoid contaminating the cache with a value + * that's wrong for non-unique-ified paths. */ - innerbucketsize = 1.0; - foreach(hcl, hashclauses) + if (IsA(inner_path, UniquePath)) + innerbucketsize = 1.0 / virtualbuckets; + else { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl); - Selectivity thisbucketsize; + innerbucketsize = 1.0; + foreach(hcl, hashclauses) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl); + Selectivity thisbucketsize; - Assert(IsA(restrictinfo, RestrictInfo)); + Assert(IsA(restrictinfo, RestrictInfo)); - /* - * First we have to figure out which side of the hashjoin clause is the - * inner side. - * - * Since we tend to visit the same clauses over and over when planning - * a large query, we cache the bucketsize estimate in the RestrictInfo - * node to avoid repeated lookups of statistics. - */ - if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids)) - { - /* righthand side is inner */ - thisbucketsize = restrictinfo->right_bucketsize; - if (thisbucketsize < 0) + /* + * First we have to figure out which side of the hashjoin clause + * is the inner side. + * + * Since we tend to visit the same clauses over and over when + * planning a large query, we cache the bucketsize estimate in the + * RestrictInfo node to avoid repeated lookups of statistics. + */ + if (is_subseti(restrictinfo->right_relids, + inner_path->parent->relids)) { - /* not cached yet */ - thisbucketsize = estimate_hash_bucketsize(root, + /* righthand side is inner */ + thisbucketsize = restrictinfo->right_bucketsize; + if (thisbucketsize < 0) + { + /* not cached yet */ + thisbucketsize = + estimate_hash_bucketsize(root, (Var *) get_rightop(restrictinfo->clause), - virtualbuckets); - restrictinfo->right_bucketsize = thisbucketsize; + virtualbuckets); + restrictinfo->right_bucketsize = thisbucketsize; + } } - } - else - { - Assert(is_subseti(restrictinfo->left_relids, - inner_path->parent->relids)); - /* lefthand side is inner */ - thisbucketsize = restrictinfo->left_bucketsize; - if (thisbucketsize < 0) + else { - /* not cached yet */ - thisbucketsize = estimate_hash_bucketsize(root, + Assert(is_subseti(restrictinfo->left_relids, + inner_path->parent->relids)); + /* lefthand side is inner */ + thisbucketsize = restrictinfo->left_bucketsize; + if (thisbucketsize < 0) + { + /* not cached yet */ + thisbucketsize = + estimate_hash_bucketsize(root, (Var *) get_leftop(restrictinfo->clause), - virtualbuckets); - restrictinfo->left_bucketsize = thisbucketsize; + virtualbuckets); + restrictinfo->left_bucketsize = thisbucketsize; + } } - } - if (innerbucketsize > thisbucketsize) - innerbucketsize = thisbucketsize; + if (innerbucketsize > thisbucketsize) + innerbucketsize = thisbucketsize; + } } /* @@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total) * seems OK to live with the approximation. */ static Selectivity -approx_selectivity(Query *root, List *quals) +approx_selectivity(Query *root, List *quals, JoinType jointype) { Selectivity total = 1.0; List *l; @@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals) restrictinfo->this_selec = clause_selectivity(root, (Node *) restrictinfo->clause, - 0); + 0, + jointype); selec = restrictinfo->this_selec; } else { /* If it's a bare expression, must always do it the hard way */ - selec = clause_selectivity(root, qual, 0); + selec = clause_selectivity(root, qual, 0, jointype); } total *= selec; } @@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel) temp = rel->tuples * restrictlist_selectivity(root, rel->baserestrictinfo, - lfirsti(rel->relids)); + lfirsti(rel->relids), + JOIN_INNER); /* * Force estimate to be at least one row, to make explain output look @@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, */ selec = restrictlist_selectivity(root, restrictlist, - 0); + 0, + jointype); /* * Basically, we multiply size of Cartesian product by selectivity. @@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, * For JOIN_IN and variants, the Cartesian product is figured with * respect to a unique-ified input, and then we can clamp to the size * of the other input. - * XXX it's not at all clear that the ordinary selectivity calculation - * is appropriate in this case. */ switch (jointype) { @@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel) temp = rel->tuples * restrictlist_selectivity(root, rel->baserestrictinfo, - lfirsti(rel->relids)); + lfirsti(rel->relids), + JOIN_INNER); /* * Force estimate to be at least one row, to make explain output look diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 443d54c64733ba0ed20edaf1151257e5460e6a49..98e4d59f2df6b4db001036a198d2408be2d4d9ee 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.133 2003/01/24 03:58:34 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.134 2003/01/28 22:13:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1599,12 +1599,16 @@ make_innerjoin_index_path(Query *root, * selectivity. However, since RestrictInfo nodes aren't copied when * linking them into different lists, it should be sufficient to use * pointer comparison to remove duplicates.) + * + * Always assume the join type is JOIN_INNER; even if some of the + * join clauses come from other contexts, that's not our problem. */ pathnode->rows = rel->tuples * restrictlist_selectivity(root, set_ptrUnion(rel->baserestrictinfo, clausegroup), - lfirsti(rel->relids)); + lfirsti(rel->relids), + JOIN_INNER); /* Like costsize.c, force estimate to be at least one row */ if (pathnode->rows < 1.0) pathnode->rows = 1.0; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 5f420f3725029c2372b868ea8f407ab08667e0cd..9f56a9f38d515ca2c977efb74ff2bd5d593078b3 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.68 2003/01/20 18:54:53 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.69 2003/01/28 22:13:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -351,7 +351,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual) qualsel = clauselist_selectivity(subquery, plan->qual, - 0); + 0, JOIN_INNER); /* Is 10% selectivity a good threshold?? */ use_material = qualsel < 0.10; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 15120fafcd8ea3ee5a8b628be27c8c3b8ced20af..4a9f63312c3a070c393fbf48f03bbe1d18f800c4 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.75 2002/11/24 21:52:14 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.76 2003/01/28 22:13:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -196,8 +196,7 @@ find_secondary_indexes(Oid relationObjectId) * This code executes registered procedures stored in the * operator relation, by calling the function manager. * - * varRelid is either 0 or a rangetable index. See clause_selectivity() - * for details about its meaning. + * See clause_selectivity() for the meaning of the additional parameters. */ Selectivity restriction_selectivity(Query *root, @@ -237,7 +236,8 @@ restriction_selectivity(Query *root, Selectivity join_selectivity(Query *root, Oid operator, - List *args) + List *args, + JoinType jointype) { RegProcedure oprjoin = get_oprjoin(operator); float8 result; @@ -249,10 +249,11 @@ join_selectivity(Query *root, if (!oprjoin) return (Selectivity) 0.5; - result = DatumGetFloat8(OidFunctionCall3(oprjoin, + result = DatumGetFloat8(OidFunctionCall4(oprjoin, PointerGetDatum(root), ObjectIdGetDatum(operator), - PointerGetDatum(args))); + PointerGetDatum(args), + Int16GetDatum(jointype))); if (result < 0.0 || result > 1.0) elog(ERROR, "join_selectivity: bad value %f", result); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 8fb4e84ad772bf15b1075dec12c9f55a03a13b2f..d099262c46fa9a83ce5e88cd3a367db0c936b77d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.130 2003/01/27 20:51:54 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.131 2003/01/28 22:13:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -56,13 +56,18 @@ * float8 oprrest (internal, oid, internal, int4); * * The call convention for a join estimator (oprjoin function) is similar - * except that varRelid is not needed: + * except that varRelid is not needed, and instead the join type is + * supplied: * * Selectivity oprjoin (Query *root, * Oid operator, - * List *args); + * List *args, + * JoinType jointype); + * + * float8 oprjoin (internal, oid, internal, int2); * - * float8 oprjoin (internal, oid, internal); + * (We deliberately make the SQL signature different to facilitate + * catching errors.) *---------- */ @@ -1009,7 +1014,8 @@ icnlikesel(PG_FUNCTION_ARGS) * booltestsel - Selectivity of BooleanTest Node. */ Selectivity -booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid) +booltestsel(Query *root, BoolTestType booltesttype, Node *arg, + int varRelid, JoinType jointype) { Var *var; Oid relid; @@ -1047,11 +1053,13 @@ booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid) break; case IS_TRUE: case IS_NOT_FALSE: - selec = (double) clause_selectivity(root, arg, varRelid); + selec = (double) clause_selectivity(root, arg, + varRelid, jointype); break; case IS_FALSE: case IS_NOT_TRUE: - selec = 1.0 - (double) clause_selectivity(root, arg, varRelid); + selec = 1.0 - (double) clause_selectivity(root, arg, + varRelid, jointype); break; default: elog(ERROR, "booltestsel: unexpected booltesttype %d", @@ -1321,6 +1329,7 @@ eqjoinsel(PG_FUNCTION_ARGS) Query *root = (Query *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); + JoinType jointype = (JoinType) PG_GETARG_INT16(3); Var *var1; Var *var2; double selec; @@ -1421,6 +1430,8 @@ eqjoinsel(PG_FUNCTION_ARGS) FmgrInfo eqproc; bool *hasmatch1; bool *hasmatch2; + double nullfrac1 = stats1->stanullfrac; + double nullfrac2 = stats2->stanullfrac; double matchprodfreq, matchfreq1, matchfreq2, @@ -1434,10 +1445,36 @@ eqjoinsel(PG_FUNCTION_ARGS) nmatches; fmgr_info(get_opcode(operator), &eqproc); - hasmatch1 = (bool *) palloc(nvalues1 * sizeof(bool)); - memset(hasmatch1, 0, nvalues1 * sizeof(bool)); - hasmatch2 = (bool *) palloc(nvalues2 * sizeof(bool)); - memset(hasmatch2, 0, nvalues2 * sizeof(bool)); + hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); + hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); + + /* + * If we are doing any variant of JOIN_IN, pretend all the values + * of the righthand relation are unique (ie, act as if it's been + * DISTINCT'd). + * + * NOTE: it might seem that we should unique-ify the lefthand + * input when considering JOIN_REVERSE_IN. But this is not so, + * because the join clause we've been handed has not been + * commuted from the way the parser originally wrote it. We know + * that the unique side of the IN clause is *always* on the right. + * + * NOTE: it would be dangerous to try to be smart about JOIN_LEFT + * or JOIN_RIGHT here, because we do not have enough information + * to determine which var is really on which side of the join. + * Perhaps someday we should pass in more information. + */ + if (jointype == JOIN_IN || + jointype == JOIN_REVERSE_IN || + jointype == JOIN_UNIQUE_INNER || + jointype == JOIN_UNIQUE_OUTER) + { + float4 oneovern = 1.0 / nd2; + + for (i = 0; i < nvalues2; i++) + numbers2[i] = oneovern; + nullfrac2 = oneovern; + } /* * Note we assume that each MCV will match at most one member @@ -1496,8 +1533,8 @@ eqjoinsel(PG_FUNCTION_ARGS) * Compute total frequency of non-null values that are not in * the MCV lists. */ - otherfreq1 = 1.0 - stats1->stanullfrac - matchfreq1 - unmatchfreq1; - otherfreq2 = 1.0 - stats2->stanullfrac - matchfreq2 - unmatchfreq2; + otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1; + otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2; CLAMP_PROBABILITY(otherfreq1); CLAMP_PROBABILITY(otherfreq2); @@ -1585,6 +1622,7 @@ neqjoinsel(PG_FUNCTION_ARGS) Query *root = (Query *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); + JoinType jointype = (JoinType) PG_GETARG_INT16(3); Oid eqop; float8 result; @@ -1595,11 +1633,11 @@ neqjoinsel(PG_FUNCTION_ARGS) eqop = get_negator(operator); if (eqop) { - result = DatumGetFloat8(DirectFunctionCall3(eqjoinsel, + result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel, PointerGetDatum(root), - ObjectIdGetDatum(eqop), - PointerGetDatum(args))); - + ObjectIdGetDatum(eqop), + PointerGetDatum(args), + Int16GetDatum(jointype))); } else { @@ -3784,7 +3822,8 @@ genericcostestimate(Query *root, RelOptInfo *rel, /* Estimate the fraction of main-table tuples that will be visited */ *indexSelectivity = clauselist_selectivity(root, selectivityQuals, - lfirsti(rel->relids)); + lfirsti(rel->relids), + JOIN_INNER); /* * Estimate the number of tuples that will be visited. We do it in diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index b679fdb5ddc78a45feb9292dfa9163ab81201a70..d234eb3289541aaa7e242862e64d1336df4a735a 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: catversion.h,v 1.173 2003/01/23 23:39:04 petere Exp $ + * $Id: catversion.h,v 1.174 2003/01/28 22:13:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200301241 +#define CATALOG_VERSION_NO 200301281 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index db907f745fa67e8f37a1099cf23ae2ac63d16ccd..d7b13a762ebbdde928115e96070760ffcda0cd3f 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.281 2003/01/09 00:58:41 tgl Exp $ + * $Id: pg_proc.h,v 1.282 2003/01/28 22:13:36 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -218,13 +218,13 @@ DATA(insert OID = 103 ( scalarltsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 DESCR("restriction selectivity of < and related operators on scalar datatypes"); DATA(insert OID = 104 ( scalargtsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" scalargtsel - _null_ )); DESCR("restriction selectivity of > and related operators on scalar datatypes"); -DATA(insert OID = 105 ( eqjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" eqjoinsel - _null_ )); +DATA(insert OID = 105 ( eqjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" eqjoinsel - _null_ )); DESCR("join selectivity of = and related operators"); -DATA(insert OID = 106 ( neqjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" neqjoinsel - _null_ )); +DATA(insert OID = 106 ( neqjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" neqjoinsel - _null_ )); DESCR("join selectivity of <> and related operators"); -DATA(insert OID = 107 ( scalarltjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" scalarltjoinsel - _null_ )); +DATA(insert OID = 107 ( scalarltjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" scalarltjoinsel - _null_ )); DESCR("join selectivity of < and related operators on scalar datatypes"); -DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" scalargtjoinsel - _null_ )); +DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" scalargtjoinsel - _null_ )); DESCR("join selectivity of > and related operators on scalar datatypes"); DATA(insert OID = 109 ( unknownin PGNSP PGUID 12 f f t f i 1 705 "2275" unknownin - _null_ )); @@ -290,7 +290,7 @@ DATA(insert OID = 138 ( box_center PGNSP PGUID 12 f f t f i 1 600 "603" bo DESCR("center of"); DATA(insert OID = 139 ( areasel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" areasel - _null_ )); DESCR("restriction selectivity for area-comparison operators"); -DATA(insert OID = 140 ( areajoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" areajoinsel - _null_ )); +DATA(insert OID = 140 ( areajoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" areajoinsel - _null_ )); DESCR("join selectivity for area-comparison operators"); DATA(insert OID = 141 ( int4mul PGNSP PGUID 12 f f t f i 2 23 "23 23" int4mul - _null_ )); DESCR("multiply"); @@ -1590,11 +1590,11 @@ DESCR("current transaction time"); DATA(insert OID = 1300 ( positionsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" positionsel - _null_ )); DESCR("restriction selectivity for position-comparison operators"); -DATA(insert OID = 1301 ( positionjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" positionjoinsel - _null_ )); +DATA(insert OID = 1301 ( positionjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" positionjoinsel - _null_ )); DESCR("join selectivity for position-comparison operators"); DATA(insert OID = 1302 ( contsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" contsel - _null_ )); DESCR("restriction selectivity for containment comparison operators"); -DATA(insert OID = 1303 ( contjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" contjoinsel - _null_ )); +DATA(insert OID = 1303 ( contjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" contjoinsel - _null_ )); DESCR("join selectivity for containment comparison operators"); DATA(insert OID = 1304 ( overlaps PGNSP PGUID 12 f f f f i 4 16 "1184 1184 1184 1184" overlaps_timestamp - _null_ )); @@ -2545,9 +2545,9 @@ DATA(insert OID = 1814 ( iclikesel PGNSP PGUID 12 f f t f s 4 701 "2281 26 228 DESCR("restriction selectivity of ILIKE"); DATA(insert OID = 1815 ( icnlikesel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" icnlikesel - _null_ )); DESCR("restriction selectivity of NOT ILIKE"); -DATA(insert OID = 1816 ( iclikejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" iclikejoinsel - _null_ )); +DATA(insert OID = 1816 ( iclikejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" iclikejoinsel - _null_ )); DESCR("join selectivity of ILIKE"); -DATA(insert OID = 1817 ( icnlikejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" icnlikejoinsel - _null_ )); +DATA(insert OID = 1817 ( icnlikejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" icnlikejoinsel - _null_ )); DESCR("join selectivity of NOT ILIKE"); DATA(insert OID = 1818 ( regexeqsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" regexeqsel - _null_ )); DESCR("restriction selectivity of regex match"); @@ -2561,17 +2561,17 @@ DATA(insert OID = 1822 ( nlikesel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 DESCR("restriction selectivity of NOT LIKE"); DATA(insert OID = 1823 ( icregexnesel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" icregexnesel - _null_ )); DESCR("restriction selectivity of case-insensitive regex non-match"); -DATA(insert OID = 1824 ( regexeqjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" regexeqjoinsel - _null_ )); +DATA(insert OID = 1824 ( regexeqjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" regexeqjoinsel - _null_ )); DESCR("join selectivity of regex match"); -DATA(insert OID = 1825 ( likejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" likejoinsel - _null_ )); +DATA(insert OID = 1825 ( likejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" likejoinsel - _null_ )); DESCR("join selectivity of LIKE"); -DATA(insert OID = 1826 ( icregexeqjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" icregexeqjoinsel - _null_ )); +DATA(insert OID = 1826 ( icregexeqjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" icregexeqjoinsel - _null_ )); DESCR("join selectivity of case-insensitive regex match"); -DATA(insert OID = 1827 ( regexnejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" regexnejoinsel - _null_ )); +DATA(insert OID = 1827 ( regexnejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" regexnejoinsel - _null_ )); DESCR("join selectivity of regex non-match"); -DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" nlikejoinsel - _null_ )); +DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" nlikejoinsel - _null_ )); DESCR("join selectivity of NOT LIKE"); -DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" icregexnejoinsel - _null_ )); +DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" icregexnejoinsel - _null_ )); DESCR("join selectivity of case-insensitive regex non-match"); /* Aggregate-related functions */ diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index aca6097bc1cc89800221d85394a7d2719774fd09..0feb56dd7c926833023cf2de84126e82c669fced 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: cost.h,v 1.51 2003/01/27 20:51:54 tgl Exp $ + * $Id: cost.h,v 1.52 2003/01/28 22:13:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -88,13 +88,16 @@ extern void set_function_size_estimates(Query *root, RelOptInfo *rel); * routines to compute clause selectivities */ extern Selectivity restrictlist_selectivity(Query *root, - List *restrictinfo_list, - int varRelid); + List *restrictinfo_list, + int varRelid, + JoinType jointype); extern Selectivity clauselist_selectivity(Query *root, - List *clauses, - int varRelid); + List *clauses, + int varRelid, + JoinType jointype); extern Selectivity clause_selectivity(Query *root, - Node *clause, - int varRelid); + Node *clause, + int varRelid, + JoinType jointype); #endif /* COST_H */ diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index abd09871feb13fa7e518020e7bd0d936931e05fc..255d196d7d713cb1f0629d7231f254b7288bc43a 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: plancat.h,v 1.27 2002/06/20 20:29:51 momjian Exp $ + * $Id: plancat.h,v 1.28 2003/01/28 22:13:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,7 +34,8 @@ extern Selectivity restriction_selectivity(Query *root, int varRelid); extern Selectivity join_selectivity(Query *root, - Oid operator, - List *args); + Oid operator, + List *args, + JoinType jointype); #endif /* PLANCAT_H */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 037c2b2f5e3833afb78bfde398b2db503eaaef24..757c0e1e1ac7eb54bfea83cff1afa235e84747a9 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: selfuncs.h,v 1.11 2003/01/20 18:55:07 tgl Exp $ + * $Id: selfuncs.h,v 1.12 2003/01/28 22:13:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -67,7 +67,7 @@ extern Datum nlikejoinsel(PG_FUNCTION_ARGS); extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); extern Selectivity booltestsel(Query *root, BoolTestType booltesttype, - Node *arg, int varRelid); + Node *arg, int varRelid, JoinType jointype); extern Selectivity nulltestsel(Query *root, NullTestType nulltesttype, Node *arg, int varRelid); diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 7ef807a95db1375103f19a0f1d55ad210dc8c48c..dcf295919c9f58ab08099dbbbf8fc92c71e6509f 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -530,16 +530,17 @@ WHERE p1.oprrest = p2.oid AND -- If oprjoin is set, the operator must be a binary boolean op, -- and it must link to a proc with the right signature -- to be a join selectivity estimator. --- The proc signature we want is: float8 proc(internal, oid, internal) +-- The proc signature we want is: float8 proc(internal, oid, internal, int2) SELECT p1.oid, p1.oprname, p2.oid, p2.proname FROM pg_operator AS p1, pg_proc AS p2 WHERE p1.oprjoin = p2.oid AND (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR p2.prorettype != 'float8'::regtype OR p2.proretset OR - p2.pronargs != 3 OR + p2.pronargs != 4 OR p2.proargtypes[0] != 'internal'::regtype OR p2.proargtypes[1] != 'oid'::regtype OR - p2.proargtypes[2] != 'internal'::regtype); + p2.proargtypes[2] != 'internal'::regtype OR + p2.proargtypes[3] != 'int2'::regtype); oid | oprname | oid | proname -----+---------+-----+--------- (0 rows) diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 8d7597863fcfd9c7ff24591d2c58798f93984619..5a2ef11c21b9cb294b74e89b8a9fdf1638f059d1 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -134,10 +134,10 @@ SELECT '' AS five, f1 AS "Correlated Field" WHERE f3 IS NOT NULL); five | Correlated Field ------+------------------ - | 1 - | 2 | 2 | 3 + | 1 + | 2 | 3 (5 rows) diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 650073cccc1dd128cd6930dd50d4b51261ad9a2d..8d543932a7c30a5d540555fc6c6bcaca19e02467 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -444,17 +444,18 @@ WHERE p1.oprrest = p2.oid AND -- If oprjoin is set, the operator must be a binary boolean op, -- and it must link to a proc with the right signature -- to be a join selectivity estimator. --- The proc signature we want is: float8 proc(internal, oid, internal) +-- The proc signature we want is: float8 proc(internal, oid, internal, int2) SELECT p1.oid, p1.oprname, p2.oid, p2.proname FROM pg_operator AS p1, pg_proc AS p2 WHERE p1.oprjoin = p2.oid AND (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR p2.prorettype != 'float8'::regtype OR p2.proretset OR - p2.pronargs != 3 OR + p2.pronargs != 4 OR p2.proargtypes[0] != 'internal'::regtype OR p2.proargtypes[1] != 'oid'::regtype OR - p2.proargtypes[2] != 'internal'::regtype); + p2.proargtypes[2] != 'internal'::regtype OR + p2.proargtypes[3] != 'int2'::regtype); -- **************** pg_aggregate ****************