21 key_hash(
const int64_t* key,
const uint32_t key_count,
const uint32_t key_byte_width) {
22 return MurmurHash3(key, key_byte_width * key_count, 0);
26 int64_t* groups_buffer,
27 const uint32_t groups_buffer_entry_count,
29 const uint32_t key_count,
30 const uint32_t key_width,
31 const uint32_t row_size_quad) {
32 uint32_t h =
key_hash(key, key_count, key_width) % groups_buffer_entry_count;
34 groups_buffer, h, key, key_count, key_width, row_size_quad);
36 return matching_group;
38 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
39 while (h_probe != h) {
41 groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
43 return matching_group;
45 h_probe = (h_probe + 1) % groups_buffer_entry_count;
53 int64_t* groups_buffer,
54 const uint32_t groups_buffer_entry_count,
56 const uint32_t key_count,
57 const uint32_t key_width,
58 const uint32_t row_size_quad) {
59 uint32_t h =
key_hash(key, key_count, key_width) % groups_buffer_entry_count;
61 groups_buffer, h, key, key_count, key_width, row_size_quad);
63 return matching_group;
65 uint32_t watchdog_countdown = 100;
66 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
67 while (h_probe != h) {
69 groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
71 return matching_group;
73 h_probe = (h_probe + 1) % groups_buffer_entry_count;
74 if (--watchdog_countdown == 0) {
78 watchdog_countdown = 100;
86 const uint32_t groups_buffer_entry_count,
88 const uint32_t key_count,
89 const uint32_t key_width) {
90 uint32_t h =
key_hash(key, key_count, key_width) % groups_buffer_entry_count;
92 groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
93 if (matching_slot != -1) {
96 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
97 while (h_probe != h) {
99 groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
100 if (matching_slot != -1) {
103 h_probe = (h_probe + 1) % groups_buffer_entry_count;
110 const uint32_t groups_buffer_entry_count,
112 const uint32_t key_count,
113 const uint32_t key_width) {
114 uint32_t h =
key_hash(key, key_count, key_width) % groups_buffer_entry_count;
116 groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
117 if (matching_slot != -1) {
120 uint32_t watchdog_countdown = 100;
121 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
122 while (h_probe != h) {
124 groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
125 if (matching_slot != -1) {
128 h_probe = (h_probe + 1) % groups_buffer_entry_count;
129 if (--watchdog_countdown == 0) {
133 watchdog_countdown = 100;
140 int64_t* groups_buffer,
141 const uint32_t groups_buffer_entry_count,
143 const uint32_t key_qw_count) {
144 uint32_t h =
key_hash(key, key_qw_count,
sizeof(int64_t)) % groups_buffer_entry_count;
146 groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
147 if (matching_group) {
148 return matching_group;
150 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
151 while (h_probe != h) {
153 groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
154 if (matching_group) {
155 return matching_group;
157 h_probe = (h_probe + 1) % groups_buffer_entry_count;
164 const uint32_t groups_buffer_entry_count,
166 const uint32_t key_qw_count) {
167 uint32_t h =
key_hash(key, key_qw_count,
sizeof(int64_t)) % groups_buffer_entry_count;
169 groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
170 if (matching_group) {
171 return matching_group;
173 uint32_t watchdog_countdown = 100;
174 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
175 while (h_probe != h) {
177 groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
178 if (matching_group) {
179 return matching_group;
181 h_probe = (h_probe + 1) % groups_buffer_entry_count;
182 if (--watchdog_countdown == 0) {
186 watchdog_countdown = 100;
193 int64_t* groups_buffer,
195 const int64_t min_key,
196 const int64_t bucket,
197 const uint32_t row_size_quad) {
198 int64_t key_diff = key - min_key;
202 int64_t off = key_diff * row_size_quad;
204 groups_buffer[off] = key;
206 return groups_buffer + off + 1;
212 const int64_t orig_key,
213 const int64_t min_key,
214 const int64_t bucket,
215 const uint32_t row_size_quad) {
216 int64_t key_diff = key - min_key;
220 int64_t off = key_diff * row_size_quad;
222 groups_buffer[off] = orig_key;
224 return groups_buffer + off + 1;
230 const int64_t min_key,
231 const int64_t bucket) {
232 int64_t off = key - min_key;
237 key_base_ptr[off] = key;
243 int64_t* output_buffer,
244 const uint32_t output_buffer_entry_count,
246 const int64_t offset_in_fragment,
247 const uint32_t row_size_quad) {
248 uint64_t off =
static_cast<uint64_t
>(pos) * static_cast<uint64_t>(row_size_quad);
249 if (pos < output_buffer_entry_count) {
250 output_buffer[off] = offset_in_fragment;
251 return output_buffer + off + 1;
258 const uint32_t output_buffer_entry_count,
260 const int64_t offset_in_fragment) {
261 if (pos < output_buffer_entry_count) {
262 output_buffer[pos] = offset_in_fragment;
271 int64_t
const min_key,
272 int64_t
const max_key,
273 const int64_t translated_null_val,
274 int64_t bucket_normalization) {
275 if (hash_buff && key >= min_key && key <= max_key) {
278 min_key / bucket_normalization,
280 bucket_normalization);
288 const int64_t min_key,
289 const int64_t max_key) {
290 if (key >= min_key && key <= max_key) {
299 const int64_t min_key,
300 const int64_t max_key,
301 const int64_t null_val,
302 const int64_t bucket_normalization) {
303 return key != null_val
305 hash_buff, key, min_key, max_key, null_val, bucket_normalization)
312 const int64_t min_key,
313 const int64_t max_key,
314 const int64_t null_val) {
315 return key != null_val ?
hash_join_idx(hash_buff, key, min_key, max_key) : -1;
321 const int64_t min_key,
322 const int64_t max_key,
323 const int64_t null_val,
324 const int64_t translated_val,
325 const int64_t bucket_normalization) {
326 return key != null_val
328 hash_buff, key, min_key, max_key, translated_val, bucket_normalization)
334 bucket_normalization);
340 const int64_t min_key,
341 const int64_t max_key,
342 const int64_t null_val,
343 const int64_t translated_val) {
344 return key != null_val
346 :
hash_join_idx(hash_buff, translated_val, min_key, translated_val);
352 const int64_t min_key,
353 const int64_t max_key,
354 const uint32_t entry_count_per_shard,
355 const uint32_t num_shards,
356 const uint32_t device_count) {
357 if (hash_buff && key >= min_key && key <= max_key) {
361 entry_count_per_shard,
371 const int64_t min_key,
372 const int64_t max_key,
373 const uint32_t entry_count_per_shard,
374 const uint32_t num_shards,
375 const uint32_t device_count,
376 const int64_t null_val) {
381 entry_count_per_shard,
390 const int64_t min_key,
391 const int64_t max_key,
392 const uint32_t entry_count_per_shard,
393 const uint32_t num_shards,
394 const uint32_t device_count,
395 const int64_t null_val,
396 const int64_t translated_val) {
401 entry_count_per_shard,
408 entry_count_per_shard,
413 #define DEF_TRANSLATE_NULL_KEY(key_type) \
414 extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t translate_null_key_##key_type( \
415 const key_type key, const key_type null_val, const int64_t translated_val) { \
416 if (key == null_val) { \
417 return translated_val; \
427 #undef DEF_TRANSLATE_NULL_KEY
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_group_value_fast_with_original_key(int64_t *groups_buffer, const int64_t key, const int64_t orig_key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val)
__device__ bool dynamic_watchdog()
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t get_columnar_scan_output_offset(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_scan_output_slot(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment, const uint32_t row_size_quad)
#define DEF_TRANSLATE_NULL_KEY(key_type)
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t get_columnar_group_bin_offset(int64_t *key_base_ptr, const int64_t key, const int64_t min_key, const int64_t bucket)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val, const int64_t bucket_normalization)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val, const int64_t bucket_normalization)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t bucket_normalization)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_columnar_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx(int64_t hash_buff, int64_t const key, int64_t const min_key, int64_t const max_key, const int64_t translated_null_val, int64_t bucket_normalization)
RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_group_value_fast(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t key_hash(const int64_t *key, const uint32_t key_count, const uint32_t key_byte_width)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_columnar(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val, const int64_t translated_val)