23 #ifndef QUERYENGINE_HASHJOINRUNTIME_H
24 #define QUERYENGINE_HASHJOINRUNTIME_H
29 #include "../../../Shared/SqlTypesLayout.h"
30 #include "../../../Shared/sqltypes.h"
33 #include "../../DecodersImpl.h"
35 #include "../../RuntimeFunctions.h"
37 #include "../../../Shared/funcannotations.h"
55 const int64_t entry_count,
56 const int32_t invalid_slot_val,
57 const int32_t cpu_thread_idx,
58 const int32_t cpu_thread_count);
63 void init_hash_join_buff_tbb(int32_t* buff,
64 const int64_t entry_count,
65 const int32_t invalid_slot_val);
67 #endif // #ifdef HAVE_TBB
68 #endif // #ifndef __CUDACC__
71 const int64_t entry_count,
72 const int32_t invalid_slot_val);
75 const int64_t entry_count,
76 const size_t key_component_count,
77 const bool with_val_slot,
78 const int32_t invalid_slot_val,
79 const int32_t cpu_thread_idx,
80 const int32_t cpu_thread_count);
83 const int64_t entry_count,
84 const size_t key_component_count,
85 const bool with_val_slot,
86 const int32_t invalid_slot_val,
87 const int32_t cpu_thread_idx,
88 const int32_t cpu_thread_count);
93 void init_baseline_hash_join_buff_tbb_32(int8_t* hash_join_buff,
94 const int64_t entry_count,
95 const size_t key_component_count,
96 const bool with_val_slot,
97 const int32_t invalid_slot_val);
99 void init_baseline_hash_join_buff_tbb_64(int8_t* hash_join_buff,
100 const int64_t entry_count,
101 const size_t key_component_count,
102 const bool with_val_slot,
103 const int32_t invalid_slot_val);
105 #endif // #ifdef HAVE_TBB
106 #endif // #ifndef __CUDACC__
109 const int64_t entry_count,
110 const size_t key_component_count,
111 const bool with_val_slot,
112 const int32_t invalid_slot_val);
115 const int64_t entry_count,
116 const size_t key_component_count,
117 const bool with_val_slot,
118 const int32_t invalid_slot_val);
191 int32_t
const cpu_thread_idx,
192 int32_t
const cpu_thread_count);
195 int32_t
const cpu_thread_idx,
196 int32_t
const cpu_thread_count);
199 int32_t
const cpu_thread_idx,
200 int32_t
const cpu_thread_count);
216 int32_t
const cpu_thread_count);
220 int32_t
const cpu_thread_count);
233 const int64_t entry_count,
234 const int32_t invalid_slot_val,
235 const bool for_semi_join,
236 const size_t key_component_count,
237 const bool with_val_slot,
239 const int64_t num_elems,
240 const int32_t cpu_thread_idx,
241 const int32_t cpu_thread_count);
245 const int64_t entry_count,
246 const int32_t invalid_slot_val,
247 const size_t key_component_count,
248 const bool with_val_slot,
250 const int64_t num_elems,
251 const int32_t cpu_thread_idx,
252 const int32_t cpu_thread_count);
255 const size_t entry_count,
256 const int32_t invalid_slot_val,
257 const size_t key_component_count,
258 const bool with_val_slot,
260 const size_t num_elems,
261 const int32_t cpu_thread_idx,
262 const int32_t cpu_thread_count);
265 const int64_t entry_count,
266 const int32_t invalid_slot_val,
267 const bool for_semi_join,
268 const size_t key_component_count,
269 const bool with_val_slot,
271 const int64_t num_elems,
272 const int32_t cpu_thread_idx,
273 const int32_t cpu_thread_count);
277 const int64_t entry_count,
278 const int32_t invalid_slot_val,
279 const size_t key_component_count,
280 const bool with_val_slot,
282 const int64_t num_elems,
283 const int32_t cpu_thread_idx,
284 const int32_t cpu_thread_count);
287 const size_t entry_count,
288 const int32_t invalid_slot_val,
289 const size_t key_component_count,
290 const bool with_val_slot,
292 const size_t num_elems,
293 const int32_t cpu_thread_idx,
294 const int32_t cpu_thread_count);
297 const int64_t entry_count,
298 const int32_t invalid_slot_val,
299 const bool for_semi_join,
300 const size_t key_component_count,
301 const bool with_val_slot,
304 const int64_t num_elems);
307 const int64_t entry_count,
308 const int32_t invalid_slot_val,
309 const bool for_semi_join,
310 const size_t key_component_count,
311 const bool with_val_slot,
314 const int64_t num_elems);
318 const int64_t entry_count,
319 const int32_t invalid_slot_val,
320 const size_t key_component_count,
321 const bool with_val_slot,
324 const int64_t num_elems);
327 const int64_t entry_count,
328 const int32_t invalid_slot_val,
329 const size_t key_component_count,
330 const bool with_val_slot,
333 const size_t num_elems);
337 const int32_t* composite_key_dict,
338 const int64_t hash_entry_count,
339 const size_t key_component_count,
340 const std::vector<JoinColumn>& join_column_per_key,
341 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
342 const std::vector<JoinBucketInfo>& join_bucket_info,
343 const std::vector<const int32_t*>& sd_inner_to_outer_translation_maps,
344 const std::vector<int32_t>& sd_min_inner_elems,
345 const int32_t cpu_thread_count,
346 const bool is_range_join =
false,
347 const bool is_geo_compressed =
false,
348 const bool for_window_framing =
false);
352 const int64_t* composite_key_dict,
353 const int64_t hash_entry_count,
354 const size_t key_component_count,
355 const std::vector<JoinColumn>& join_column_per_key,
356 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
357 const std::vector<JoinBucketInfo>& join_bucket_info,
358 const std::vector<const int32_t*>& sd_inner_to_outer_translation_maps,
359 const std::vector<int32_t>& sd_min_inner_elems,
360 const int32_t cpu_thread_count,
361 const bool is_range_join =
false,
362 const bool is_geo_compressed =
false,
363 const bool for_window_framing =
false);
367 const int32_t* composite_key_dict,
368 const int64_t hash_entry_count,
369 const size_t key_component_count,
371 const int64_t num_elems,
372 const bool for_window_framing);
376 const int64_t* composite_key_dict,
377 const int64_t hash_entry_count,
379 const int64_t num_elems,
380 const bool for_window_framing);
384 const int64_t* composite_key_dict,
385 const int64_t hash_entry_count,
387 const int64_t num_elems);
391 const int64_t* composite_key_dict,
392 const size_t hash_entry_count,
394 const size_t num_elems);
398 const size_t padded_size_bytes,
399 const std::vector<JoinColumn>& join_column_per_key,
400 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
401 const int thread_count);
404 uint8_t* hll_buffer_all_cpus,
405 std::vector<int32_t>& row_counts,
407 const size_t padded_size_bytes,
408 const std::vector<JoinColumn>& join_column_per_key,
409 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
410 const std::vector<JoinBucketInfo>& join_buckets_per_key,
411 const int thread_count);
414 uint8_t* hll_buffer_all_cpus,
415 std::vector<int32_t>& row_counts,
417 const size_t padded_size_bytes,
418 const std::vector<JoinColumn>& join_column_per_key,
419 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
420 const std::vector<JoinBucketInfo>& join_buckets_per_key,
421 const bool is_compressed,
422 const int thread_count);
427 const int64_t num_elems);
432 int32_t* row_counts_buffer,
434 const int64_t num_elems);
439 const std::vector<double>& bucket_size_thresholds,
440 const int thread_count);
444 int32_t* row_counts_buffer,
446 const size_t num_elems,
447 const size_t block_size_x,
448 const size_t grid_size_x);
453 const double* bucket_size_thresholds);
455 #endif // QUERYENGINE_HASHJOINRUNTIME_H
const bool for_window_framing
const ColumnType column_type
void fill_hash_join_buff_on_device_sharded(OneToOnePerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info)
const JoinColumnTypeInfo type_info
const int64_t bucket_normalization
const int32_t min_inner_elem
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed, const bool for_window_framing)
void fill_one_to_many_hash_table_on_device(OneToManyPerfectJoinHashTableFillFuncArgs const args)
void fill_hash_join_buff_on_device(OneToOnePerfectJoinHashTableFillFuncArgs const args)
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed, const bool for_window_framing)
DEVICE int SUFFIX() fill_hash_join_buff_bitwise_eq(OneToOnePerfectJoinHashTableFillFuncArgs const args, int32_t const cpu_thread_idx, int32_t const cpu_thread_count)
const int64_t bucket_normalization
void range_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)
const int32_t * sd_inner_to_outer_translation_map
const JoinColumn join_column
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::vector< double > inverse_bucket_sizes_for_dimension
void fill_one_to_many_hash_table(OneToManyPerfectJoinHashTableFillFuncArgs const args, const int32_t cpu_thread_count)
DEVICE int SUFFIX() fill_hash_join_buff(OneToOnePerfectJoinHashTableFillFuncArgs const args, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
const int64_t translated_null_val
void compute_bucket_sizes_on_device(double *bucket_sizes_buffer, const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, const double *bucket_size_thresholds)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
void approximate_distinct_tuples_on_device_range(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const RangeKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
size_t col_chunks_buff_sz
const JoinColumn join_column
const BucketizedHashEntryInfo hash_entry_info
int64_t bucket_normalization
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)
void compute_bucket_sizes_on_cpu(std::vector< double > &bucket_sizes_for_dimension, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const std::vector< double > &bucket_size_thresholds, const int thread_count)
void bbox_intersect_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)
int bbox_intersect_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int bbox_intersect_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
const int8_t * col_chunks_buff
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
void approximate_distinct_tuples_on_device_bbox_intersect(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)
void range_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems)
int range_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
bool is_date_in_days() const
size_t entry_count_per_shard
size_t getNormalizedHashEntryCount() const
int range_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_hash_table_bucketized(OneToManyPerfectJoinHashTableFillFuncArgs const args, const int32_t cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void approximate_distinct_tuples_bbox_intersect(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
void fill_hash_join_buff_on_device_bucketized(OneToOnePerfectJoinHashTableFillFuncArgs const args)
void bbox_intersect_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems)
void approximate_distinct_tuples_range(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const bool is_compressed, const int thread_count)
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
const int32_t invalid_slot_val
const JoinColumnTypeInfo type_info
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
DEVICE int SUFFIX() fill_hash_join_buff_bucketized(OneToOnePerfectJoinHashTableFillFuncArgs const args, int32_t const cpu_thread_idx, int32_t const cpu_thread_count)
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_hash_table_on_device_sharded(OneToManyPerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info)
const size_t g_maximum_conditions_to_coalesce
const int32_t * sd_inner_to_outer_translation_map
const int32_t min_inner_elem
void fill_hash_join_buff_on_device_sharded_bucketized(OneToOnePerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info)
size_t bucketized_hash_entry_count
bool is_unsigned_type(const SQLTypeInfo &ti)
void fill_one_to_many_hash_table_on_device_bucketized(OneToManyPerfectJoinHashTableFillFuncArgs const args)