23 #ifndef QUERYENGINE_GROUPBYFASTIMPL_H
24 #define QUERYENGINE_GROUPBYFASTIMPL_H
28 #include "../../../Shared/funcannotations.h"
29 #include "../../../Shared/shard_key.h"
32 #define insert_key_cas(address, compare, val) atomicCAS(address, compare, val)
35 #define insert_key_cas(address, compare, val) \
36 InterlockedCompareExchange(reinterpret_cast<volatile long*>(address), \
37 static_cast<long>(val), \
38 static_cast<long>(compare))
40 #define insert_key_cas(address, compare, val) \
41 __sync_val_compare_and_swap(address, compare, val)
47 const int32_t invalid_slot_val) {
48 if (
insert_key_cas(entry_ptr, invalid_slot_val, idx) != invalid_slot_val) {
57 const int32_t invalid_slot_val) {
69 const int64_t min_key,
70 const int64_t translated_null_val,
71 const int64_t bucket_normalization) {
72 auto hash_slot = key / bucket_normalization - min_key + (key == translated_null_val);
73 return buff + hash_slot;
78 const int64_t min_key) {
79 return buff + (key - min_key);
85 const int64_t min_key,
86 const int64_t translated_null_val) {
87 return buff + (key - min_key) + (key == translated_null_val);
93 const int64_t min_key,
94 const int64_t translated_null_val,
95 const uint32_t entry_count_per_shard,
96 const uint32_t num_shards,
97 const uint32_t device_count,
98 const int64_t bucket_normalization) {
100 const uint32_t shard_buffer_index =
101 shard / device_count;
102 int32_t* shard_buffer = buff + shard_buffer_index * entry_count_per_shard;
103 auto hash_slot = ((key / bucket_normalization) - min_key) / num_shards +
104 (key == translated_null_val);
105 return shard_buffer + hash_slot;
111 const int64_t min_key,
112 const uint32_t entry_count_per_shard,
113 const uint32_t num_shards,
114 const uint32_t device_count) {
116 const uint32_t shard_buffer_index =
117 shard / device_count;
118 int32_t* shard_buffer = buff + shard_buffer_index * entry_count_per_shard;
119 return shard_buffer + (key - min_key) / num_shards;
125 const int64_t min_key,
126 const int64_t translated_null_val,
127 const uint32_t entry_count_per_shard,
128 const uint32_t shard,
129 const uint32_t num_shards,
130 const uint32_t device_count,
131 const int64_t bucket_normalization) {
132 const uint32_t shard_buffer_index =
133 shard / device_count;
134 int32_t* shard_buffer = buff + shard_buffer_index * entry_count_per_shard;
135 int64_t hash_slot = ((key / bucket_normalization) - min_key) / num_shards +
136 (key == translated_null_val);
137 return shard_buffer + hash_slot;
143 const int64_t min_key,
144 const uint32_t entry_count_per_shard,
145 const uint32_t shard,
146 const uint32_t num_shards,
147 const uint32_t device_count) {
148 const uint32_t shard_buffer_index =
149 shard / device_count;
150 int32_t* shard_buffer = buff + shard_buffer_index * entry_count_per_shard;
151 return shard_buffer + (key - min_key) / num_shards;
154 #endif // QUERYENGINE_GROUPBYFASTIMPL_H
ALWAYS_INLINE DEVICE int SUFFIX() fill_hashtable_for_semi_join(size_t idx, int32_t *entry_ptr, const int32_t invalid_slot_val)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_bitwise_eq(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val, const int64_t bucket_normalization)
#define insert_key_cas(address, compare, val)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded_opt(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t shard, const uint32_t num_shards, const uint32_t device_count)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
ALWAYS_INLINE DEVICE int SUFFIX() fill_one_to_one_hashtable(size_t idx, int32_t *entry_ptr, const int32_t invalid_slot_val)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot_sharded_opt(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val, const uint32_t entry_count_per_shard, const uint32_t shard, const uint32_t num_shards, const uint32_t device_count, const int64_t bucket_normalization)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t bucket_normalization)
#define SHARD_FOR_KEY(key, num_shards)