23 #ifndef QUERYENGINE_HYPERLOGLOG_H
24 #define QUERYENGINE_HYPERLOGLOG_H
41 double alpha = 0.7213 / (1 + 1.079 / m);
47 double zl = log(zeros + 1);
48 double beta = -0.370393911 * zeros + 0.070471823 * zl + 0.17393686 * pow(zl, 2) +
49 0.16339839 * pow(zl, 3) + -0.09237745 * pow(zl, 4) +
50 0.03738027 * pow(zl, 5) + -0.005384159 * pow(zl, 6) +
51 0.00042419 * pow(zl, 7);
57 double accumulator = 0.0;
59 for (
unsigned i = 0; i < m; i++) {
60 accumulator += (1.0 / (1ULL << M[i]));
79 for (uint32_t i = 0; i < m; i++) {
88 inline size_t hll_size(
const T* M,
const size_t bitmap_sz_bits) {
89 size_t const m = size_t(1) << bitmap_sz_bits;
93 if (estimate <= 2.5 * m) {
95 estimate = m * log(static_cast<double>(m) / zeros);
98 if (bitmap_sz_bits == 14) {
106 template <
class T1,
class T2>
107 inline void hll_unify(T1* lhs, T2* rhs,
const size_t m) {
108 for (
size_t r = 0; r < m; ++r) {
109 rhs[r] = lhs[r] = std::max(static_cast<int8_t>(lhs[r]), static_cast<int8_t>(rhs[r]));
114 double err_rate{
static_cast<double>(err_percent) / 100.0};
115 double k = ceil(2 * log2(1.04 / err_rate));
118 return std::min(16, std::max(static_cast<int>(k), 4));
123 #endif // QUERYENGINE_HYPERLOGLOG_H
Descriptor for the storage layout use for (approximate) count distinct operations.
int hll_size_for_rate(const int err_percent)
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
double get_beta(const uint32_t zeros)
double get_alpha_adjusted_estimate(const size_t m, T *M)
double get_harmonic_mean_denominator(T *M, uint32_t m)
double get_beta_adjusted_estimate(const size_t m, const uint32_t z, T *M)
uint32_t count_zeros(T *M, size_t m)
double get_alpha(const size_t m)