23 #ifndef QUERYENGINE_COUNTDISTINCT_H
24 #define QUERYENGINE_COUNTDISTINCT_H
29 #include "ThirdParty/robin_hood/robin_hood.h"
37 inline size_t bitmap_set_size(
const int8_t* bitmap,
const size_t bitmap_byte_sz) {
38 const auto bitmap_word_count = bitmap_byte_sz >> 3;
39 const auto bitmap_rem_bytes = bitmap_byte_sz & 7;
40 const auto bitmap64 =
reinterpret_cast<const int64_t*
>(bitmap);
42 for (
size_t i = 0; i < bitmap_word_count; ++i) {
43 std::bitset<64> word_bitset(bitmap64[i]);
44 set_size += word_bitset.count();
46 const auto rem_bitmap =
reinterpret_cast<const int8_t*
>(&bitmap64[bitmap_word_count]);
47 for (
size_t i = 0; i < bitmap_rem_bytes; ++i) {
48 std::bitset<8> byte_bitset(rem_bitmap[i]);
49 set_size += byte_bitset.count();
55 for (
size_t i = 0; i < bitmap_sz; ++i) {
56 lhs[i] = rhs[i] = lhs[i] | rhs[i];
63 auto partial_set_vals = set_vals;
67 const auto partial_padded_size =
70 partial_set_vals += partial_padded_size;
76 const int64_t set_handle,
82 auto set_vals =
reinterpret_cast<int8_t*
>(set_handle);
86 ?
hll_size(reinterpret_cast<const int32_t*>(set_vals),
88 :
hll_size(reinterpret_cast<const int8_t*>(set_vals),
101 const int64_t new_set_handle,
102 const int64_t old_set_handle,
106 auto new_set =
reinterpret_cast<int8_t*
>(new_set_handle);
107 auto old_set =
reinterpret_cast<int8_t*
>(old_set_handle);
112 hll_unify(reinterpret_cast<int32_t*>(new_set),
113 reinterpret_cast<int32_t*>(old_set),
117 hll_unify(reinterpret_cast<int32_t*>(new_set),
118 reinterpret_cast<int8_t*>(old_set),
122 hll_unify(reinterpret_cast<int8_t*>(new_set),
123 reinterpret_cast<int32_t*>(old_set),
128 hll_unify(reinterpret_cast<int8_t*>(new_set),
129 reinterpret_cast<int8_t*>(old_set),
148 new_set->insert(old_set->begin(), old_set->end());
149 old_set->insert(new_set->begin(), new_set->end());
ExecutorDeviceType device_type
robin_hood::unordered_set< int64_t > CountDistinctSet
void count_distinct_set_union(const int64_t new_set_handle, const int64_t old_set_handle, const CountDistinctDescriptor &new_count_distinct_desc, const CountDistinctDescriptor &old_count_distinct_desc)
Descriptor for the storage layout use for (approximate) count distinct operations.
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
void bitmap_set_union(int8_t *lhs, int8_t *rhs, const size_t bitmap_sz)
CountDistinctImplType impl_type_
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
size_t bitmapSizeBytes() const
size_t bitmapPaddedSizeBytes() const
void partial_bitmap_union(int8_t *set_vals, const CountDistinctDescriptor &count_distinct_desc)
Functions used to work with HyperLogLog records.
size_t bitmap_set_size(const int8_t *bitmap, const size_t bitmap_byte_sz)