From 14f84cd82103c3797980125bb44a76446e0c0dc6 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Mon, 9 Aug 1999 03:16:47 +0000 Subject: [PATCH] Store -1 in attdisbursion to signal 'no duplicates in column'. Centralize att_disbursion readout logic. --- src/backend/commands/vacuum.c | 62 +++++++++++++++----------- src/backend/optimizer/path/joinpath.c | 20 ++------- src/backend/utils/adt/selfuncs.c | 63 ++------------------------- 3 files changed, 43 insertions(+), 102 deletions(-) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 0418a8d3a38..b480b44f347 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.117 1999/08/08 17:13:10 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.118 1999/08/09 03:16:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2346,36 +2346,46 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * } else if (stats->null_cnt <= 1 && stats->best_cnt == 1) { - /* looks like we have a unique-key attribute */ - double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt); - - selratio = 1.0 / total; - } - else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt) - { - /* exact result when there are just 1 or 2 values... */ - double min_cnt_d = stats->min_cnt, - max_cnt_d = stats->max_cnt, - null_cnt_d = stats->null_cnt; - double total = ((double) stats->nonnull_cnt) + null_cnt_d; - - selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total); + /* looks like we have a unique-key attribute --- + * flag this with special -1.0 flag value. + * + * The correct disbursion is 1.0/numberOfRows, but since + * the relation row count can get updated without + * recomputing disbursion, we want to store a "symbolic" + * value and figure 1.0/numberOfRows on the fly. + */ + selratio = -1; } else { - double most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt); - double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt); + if (VacAttrStatsLtGtValid(stats) && + stats->min_cnt + stats->max_cnt == stats->nonnull_cnt) + { + /* exact result when there are just 1 or 2 values... */ + double min_cnt_d = stats->min_cnt, + max_cnt_d = stats->max_cnt, + null_cnt_d = stats->null_cnt; + double total = ((double) stats->nonnull_cnt) + null_cnt_d; - /* - * we assume count of other values are 20% of best - * count in table - */ - selratio = (most * most + 0.20 * most * (total - most)) / (total * total); + selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total); + } + else + { + double most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt); + double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt); + + /* + * we assume count of other values are 20% of best + * count in table + */ + selratio = (most * most + 0.20 * most * (total - most)) / (total * total); + } + /* Make sure calculated values are in-range */ + if (selratio < 0.0) + selratio = 0.0; + else if (selratio > 1.0) + selratio = 1.0; } - if (selratio < 0.0) - selratio = 0.0; - else if (selratio > 1.0) - selratio = 1.0; attp->attdisbursion = selratio; /* diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 57688deeb85..4a7018aa64a 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.43 1999/08/06 04:00:15 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.44 1999/08/09 03:16:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,7 +23,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "parser/parsetree.h" -#include "utils/syscache.h" +#include "utils/lsyscache.h" static Path *best_innerjoin(List *join_paths, List *outer_relid); static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, @@ -586,7 +586,6 @@ hash_inner_and_outer(Query *root, /* * Estimate disbursion of the specified Var - * Generate some kind of estimate, no matter what... * * We use a default of 0.1 if we can't figure out anything better. * This will typically discourage use of a hash rather strongly, @@ -598,24 +597,11 @@ static Cost estimate_disbursion(Query *root, Var *var) { Oid relid; - HeapTuple atp; - double disbursion; if (! IsA(var, Var)) return 0.1; relid = getrelid(var->varno, root->rtable); - atp = SearchSysCacheTuple(ATTNUM, - ObjectIdGetDatum(relid), - Int16GetDatum(var->varattno), - 0, 0); - if (! HeapTupleIsValid(atp)) - return 0.1; - - disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion; - if (disbursion > 0.0) - return disbursion; - - return 0.1; + return (Cost) get_attdisbursion(relid, var->varattno, 0.1); } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index a0e0c7ad7fc..298c7aeed16 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.37 1999/08/02 02:05:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.38 1999/08/09 03:16:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -52,7 +52,6 @@ static bool getattstatistics(Oid relid, AttrNumber attnum, Datum *commonval, Datum *loval, Datum *hival); -static double getattdisbursion(Oid relid, AttrNumber attnum); /* @@ -172,7 +171,7 @@ eqsel(Oid opid, /* No VACUUM ANALYZE stats available, so make a guess using * the disbursion stat (if we have that, which is unlikely...) */ - selec = getattdisbursion(relid, attno); + selec = get_attdisbursion(relid, attno, 0.01); } *result = (float64data) selec; @@ -374,8 +373,8 @@ eqjoinsel(Oid opid, *result = 0.1; else { - num1 = getattdisbursion(relid1, attno1); - num2 = getattdisbursion(relid2, attno2); + num1 = get_attdisbursion(relid1, attno1, 0.01); + num2 = get_attdisbursion(relid2, attno2, 0.01); max = (num1 > num2) ? num1 : num2; if (max <= 0) *result = 1.0; @@ -675,60 +674,6 @@ getattstatistics(Oid relid, AttrNumber attnum, Oid typid, int32 typmod, return true; } -/* - * getattdisbursion - * Retrieve the disbursion statistic for an attribute, - * or produce an estimate if no info is available. - */ -static double -getattdisbursion(Oid relid, AttrNumber attnum) -{ - HeapTuple atp; - double disbursion; - int32 ntuples; - - atp = SearchSysCacheTuple(ATTNUM, - ObjectIdGetDatum(relid), - Int16GetDatum(attnum), - 0, 0); - if (!HeapTupleIsValid(atp)) - { - /* this should not happen */ - elog(ERROR, "getattdisbursion: no attribute tuple %u %d", - relid, attnum); - return 0.1; - } - - disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion; - if (disbursion > 0.0) - return disbursion; - - /* VACUUM ANALYZE has not stored a disbursion statistic for us. - * Produce an estimate = 1/numtuples. This may produce - * unreasonably small estimates for large tables, so limit - * the estimate to no less than 0.01. - */ - atp = SearchSysCacheTuple(RELOID, - ObjectIdGetDatum(relid), - 0, 0, 0); - if (!HeapTupleIsValid(atp)) - { - /* this should not happen */ - elog(ERROR, "getattdisbursion: no relation tuple %u", relid); - return 0.1; - } - - ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples; - - if (ntuples > 0) - disbursion = 1.0 / (double) ntuples; - - if (disbursion < 0.01) - disbursion = 0.01; - - return disbursion; -} - float64 btreesel(Oid operatorObjectId, Oid indrelid, -- GitLab