diff --git a/src/backend/lib/hyperloglog.c b/src/backend/lib/hyperloglog.c index 094bc09a44c896b3facd443a18efec68bfcd54ab..fa7f05a2411fd502a3576905ad580ab5cd1b1d87 100644 --- a/src/backend/lib/hyperloglog.c +++ b/src/backend/lib/hyperloglog.c @@ -56,7 +56,7 @@ static inline uint8 rho(uint32 x, uint8 b); /* - * Initialize HyperLogLog track state + * Initialize HyperLogLog track state, by bit width * * bwidth is bit width (so register size will be 2 to the power of bwidth). * Must be between 4 and 16 inclusive. @@ -107,6 +107,52 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth) cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters; } +/* + * Initialize HyperLogLog track state, by error rate + * + * Instead of specifying bwidth (number of bits used for addressing the + * register), this method allows sizing the counter for particular error + * rate using a simple formula from the paper: + * + * e = 1.04 / sqrt(m) + * + * where 'm' is the number of registers, i.e. (2^bwidth). The method + * finds the lowest bwidth with 'e' below the requested error rate, and + * then uses it to initialize the counter. + * + * As bwidth has to be between 4 and 16, the worst possible error rate + * is between ~25% (bwidth=4) and 0.4% (bwidth=16). + */ +void +initHyperLogLogError(hyperLogLogState *cState, double error) +{ + uint8 bwidth = 4; + + while (bwidth < 16) + { + double m = (Size) 1 << bwidth; + + if (1.04 / sqrt(m) < error) + break; + bwidth++; + } + + initHyperLogLog(cState, bwidth); +} + +/* + * Free HyperLogLog track state + * + * Releases allocated resources, but not the state itself (in case it's not + * allocated by palloc). + */ +void +freeHyperLogLog(hyperLogLogState *cState) +{ + Assert(cState->hashesArr != NULL); + pfree(cState->hashesArr); +} + /* * Adds element to the estimator, from caller-supplied hash. * diff --git a/src/include/lib/hyperloglog.h b/src/include/lib/hyperloglog.h index 5a1d4d356aae1b7843bbf4c59c6e34b0e4ca0930..b999b3056a679777681c13a3754d6bfe2e010645 100644 --- a/src/include/lib/hyperloglog.h +++ b/src/include/lib/hyperloglog.h @@ -60,8 +60,10 @@ typedef struct hyperLogLogState } hyperLogLogState; extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth); +extern void initHyperLogLogError(hyperLogLogState *cState, double error); extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash); extern double estimateHyperLogLog(hyperLogLogState *cState); extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState); +extern void freeHyperLogLog(hyperLogLogState *cState); #endif /* HYPERLOGLOG_H */