OmniSciDB
a5dc49c757
|
#include "HashJoinRuntime.cpp"
#include <cuda.h>
#include <thrust/device_ptr.h>
#include <thrust/scan.h>
Go to the source code of this file.
Macros | |
#define | checkCudaErrors(err) CHECK_EQ(err, cudaSuccess) |
#define | VALID_POS_FLAG 0 |
Functions | |
CUstream | getQueryEngineCudaStream () |
template<typename F , typename... ARGS> | |
void | cuda_kernel_launch_wrapper (F func, ARGS &&...args) |
__global__ void | fill_hash_join_buff_wrapper (OneToOnePerfectJoinHashTableFillFuncArgs const args) |
__global__ void | fill_hash_join_buff_bucketized_wrapper (OneToOnePerfectJoinHashTableFillFuncArgs const args) |
void | fill_hash_join_buff_on_device_bucketized (OneToOnePerfectJoinHashTableFillFuncArgs const args) |
void | fill_hash_join_buff_on_device (OneToOnePerfectJoinHashTableFillFuncArgs const args) |
__global__ void | fill_hash_join_buff_wrapper_sharded_bucketized (OneToOnePerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info) |
__global__ void | fill_hash_join_buff_wrapper_sharded (OneToOnePerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info) |
void | fill_hash_join_buff_on_device_sharded_bucketized (OneToOnePerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info) |
void | fill_hash_join_buff_on_device_sharded (OneToOnePerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info) |
__global__ void | init_hash_join_buff_wrapper (int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val) |
void | init_hash_join_buff_on_device (int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val) |
__global__ void | set_valid_pos_flag (int32_t *pos_buff, const int32_t *count_buff, const int64_t entry_count) |
__global__ void | set_valid_pos (int32_t *pos_buff, int32_t *count_buff, const int64_t entry_count) |
template<typename COUNT_MATCHES_FUNCTOR , typename FILL_ROW_IDS_FUNCTOR > | |
void | fill_one_to_many_hash_table_on_device_impl (int32_t *buff, const int64_t hash_entry_count, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, COUNT_MATCHES_FUNCTOR count_matches_func, FILL_ROW_IDS_FUNCTOR fill_row_ids_func) |
void | fill_one_to_many_hash_table_on_device (OneToManyPerfectJoinHashTableFillFuncArgs const args) |
void | fill_one_to_many_hash_table_on_device_bucketized (OneToManyPerfectJoinHashTableFillFuncArgs const args) |
void | fill_one_to_many_hash_table_on_device_sharded (OneToManyPerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info) |
template<typename T , typename KEY_HANDLER > | |
void | fill_one_to_many_baseline_hash_table_on_device (int32_t *buff, const T *composite_key_dict, const int64_t hash_entry_count, const KEY_HANDLER *key_handler, const size_t num_elems, const bool for_window_framing) |
template<typename T > | |
__global__ void | init_baseline_hash_join_buff_wrapper (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val) |
void | init_baseline_hash_join_buff_on_device_32 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val) |
void | init_baseline_hash_join_buff_on_device_64 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val) |
template<typename T , typename KEY_HANDLER > | |
__global__ void | fill_baseline_hash_join_buff_wrapper (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *err, const KEY_HANDLER *key_handler, const int64_t num_elems) |
void | fill_baseline_hash_join_buff_on_device_32 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems) |
void | fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems) |
void | bbox_intersect_fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems) |
void | range_fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems) |
void | fill_one_to_many_baseline_hash_table_on_device_32 (int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing) |
void | fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing) |
void | bbox_intersect_fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems) |
void | range_fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems) |
void | approximate_distinct_tuples_on_device_bbox_intersect (uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems) |
void | approximate_distinct_tuples_on_device_range (uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const RangeKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x) |
void | approximate_distinct_tuples_on_device (uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems) |
void | compute_bucket_sizes_on_device (double *bucket_sizes_buffer, const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, const double *bucket_sz_threshold) |
#define checkCudaErrors | ( | err | ) | CHECK_EQ(err, cudaSuccess) |
Definition at line 25 of file HashJoinRuntimeGpu.cu.
#define VALID_POS_FLAG 0 |
Definition at line 121 of file HashJoinRuntimeGpu.cu.
Referenced by set_valid_pos(), and set_valid_pos_flag().
void approximate_distinct_tuples_on_device | ( | uint8_t * | hll_buffer, |
const uint32_t | b, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 537 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by BaselineJoinHashTable::approximateTupleCount().
void approximate_distinct_tuples_on_device_bbox_intersect | ( | uint8_t * | hll_buffer, |
const uint32_t | b, | ||
int32_t * | row_counts_buffer, | ||
const BoundingBoxIntersectKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 501 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and inclusive_scan().
Referenced by BoundingBoxIntersectJoinHashTable::approximateTupleCount().
void approximate_distinct_tuples_on_device_range | ( | uint8_t * | hll_buffer, |
const uint32_t | b, | ||
int32_t * | row_counts_buffer, | ||
const RangeKeyHandler * | key_handler, | ||
const size_t | num_elems, | ||
const size_t | block_size_x, | ||
const size_t | grid_size_x | ||
) |
Definition at line 520 of file HashJoinRuntimeGpu.cu.
References checkCudaErrors, getQueryEngineCudaStream(), and inclusive_scan().
Referenced by RangeJoinHashTable::approximateTupleCount().
void bbox_intersect_fill_baseline_hash_join_buff_on_device_64 | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | dev_err_buff, | ||
const BoundingBoxIntersectKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 406 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_baseline_hash_join_buff_wrapper().
void bbox_intersect_fill_one_to_many_baseline_hash_table_on_device_64 | ( | int32_t * | buff, |
const int64_t * | composite_key_dict, | ||
const int64_t | hash_entry_count, | ||
const BoundingBoxIntersectKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 481 of file HashJoinRuntimeGpu.cu.
void compute_bucket_sizes_on_device | ( | double * | bucket_sizes_buffer, |
const JoinColumn * | join_column, | ||
const JoinColumnTypeInfo * | type_info, | ||
const double * | bucket_sz_threshold | ||
) |
Definition at line 549 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by anonymous_namespace{BoundingBoxIntersectJoinHashTable.cpp}::compute_bucket_sizes().
void cuda_kernel_launch_wrapper | ( | F | func, |
ARGS &&... | args | ||
) |
Definition at line 28 of file HashJoinRuntimeGpu.cu.
References run_benchmark_import::args, checkCudaErrors, and getQueryEngineCudaStream().
Referenced by approximate_distinct_tuples_on_device(), approximate_distinct_tuples_on_device_bbox_intersect(), bbox_intersect_fill_baseline_hash_join_buff_on_device_64(), compute_bucket_sizes_on_device(), fill_baseline_hash_join_buff_on_device_32(), fill_baseline_hash_join_buff_on_device_64(), fill_hash_join_buff_on_device(), fill_hash_join_buff_on_device_bucketized(), fill_hash_join_buff_on_device_sharded(), fill_hash_join_buff_on_device_sharded_bucketized(), fill_one_to_many_baseline_hash_table_on_device(), fill_one_to_many_hash_table_on_device(), fill_one_to_many_hash_table_on_device_bucketized(), fill_one_to_many_hash_table_on_device_impl(), fill_one_to_many_hash_table_on_device_sharded(), init_baseline_hash_join_buff_on_device_32(), init_baseline_hash_join_buff_on_device_64(), init_hash_join_buff_on_device(), and range_fill_baseline_hash_join_buff_on_device_64().
void fill_baseline_hash_join_buff_on_device_32 | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const bool | for_semi_join, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | dev_err_buff, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 362 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by fill_baseline_hash_join_buff_on_device().
void fill_baseline_hash_join_buff_on_device_64 | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const bool | for_semi_join, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | dev_err_buff, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 384 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
__global__ void fill_baseline_hash_join_buff_wrapper | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const bool | for_semi_join, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | err, | ||
const KEY_HANDLER * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 340 of file HashJoinRuntimeGpu.cu.
References fill_baseline_hash_join_buff(), SUFFIX, and heavydb.dtypes::T.
Referenced by bbox_intersect_fill_baseline_hash_join_buff_on_device_64().
__global__ void fill_hash_join_buff_bucketized_wrapper | ( | OneToOnePerfectJoinHashTableFillFuncArgs const | args | ) |
Definition at line 46 of file HashJoinRuntimeGpu.cu.
References run_benchmark_import::args, OneToOnePerfectJoinHashTableFillFuncArgs::dev_err_buff, fill_hash_join_buff_bucketized(), and SUFFIX.
Referenced by fill_hash_join_buff_on_device_bucketized().
void fill_hash_join_buff_on_device | ( | OneToOnePerfectJoinHashTableFillFuncArgs const | args | ) |
Definition at line 57 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper().
void fill_hash_join_buff_on_device_bucketized | ( | OneToOnePerfectJoinHashTableFillFuncArgs const | args | ) |
Definition at line 52 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_bucketized_wrapper().
void fill_hash_join_buff_on_device_sharded | ( | OneToOnePerfectJoinHashTableFillFuncArgs const | args, |
ShardInfo const | shard_info | ||
) |
Definition at line 102 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper_sharded().
void fill_hash_join_buff_on_device_sharded_bucketized | ( | OneToOnePerfectJoinHashTableFillFuncArgs const | args, |
ShardInfo const | shard_info | ||
) |
Definition at line 95 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper_sharded_bucketized().
__global__ void fill_hash_join_buff_wrapper | ( | OneToOnePerfectJoinHashTableFillFuncArgs const | args | ) |
Definition at line 37 of file HashJoinRuntimeGpu.cu.
References OneToOnePerfectJoinHashTableFillFuncArgs::dev_err_buff, fill_hash_join_buff(), fill_hash_join_buff_bitwise_eq(), SUFFIX, OneToOnePerfectJoinHashTableFillFuncArgs::type_info, and JoinColumnTypeInfo::uses_bw_eq.
Referenced by fill_hash_join_buff_on_device().
__global__ void fill_hash_join_buff_wrapper_sharded | ( | OneToOnePerfectJoinHashTableFillFuncArgs const | args, |
ShardInfo const | shard_info | ||
) |
Definition at line 79 of file HashJoinRuntimeGpu.cu.
References OneToOnePerfectJoinHashTableFillFuncArgs::buff, OneToOnePerfectJoinHashTableFillFuncArgs::dev_err_buff, fill_hash_join_buff_sharded(), OneToOnePerfectJoinHashTableFillFuncArgs::for_semi_join, OneToOnePerfectJoinHashTableFillFuncArgs::invalid_slot_val, OneToOnePerfectJoinHashTableFillFuncArgs::join_column, SUFFIX, and OneToOnePerfectJoinHashTableFillFuncArgs::type_info.
Referenced by fill_hash_join_buff_on_device_sharded().
__global__ void fill_hash_join_buff_wrapper_sharded_bucketized | ( | OneToOnePerfectJoinHashTableFillFuncArgs const | args, |
ShardInfo const | shard_info | ||
) |
Definition at line 61 of file HashJoinRuntimeGpu.cu.
References OneToOnePerfectJoinHashTableFillFuncArgs::bucket_normalization, OneToOnePerfectJoinHashTableFillFuncArgs::buff, OneToOnePerfectJoinHashTableFillFuncArgs::dev_err_buff, fill_hash_join_buff_sharded_bucketized(), OneToOnePerfectJoinHashTableFillFuncArgs::for_semi_join, OneToOnePerfectJoinHashTableFillFuncArgs::invalid_slot_val, OneToOnePerfectJoinHashTableFillFuncArgs::join_column, SUFFIX, and OneToOnePerfectJoinHashTableFillFuncArgs::type_info.
Referenced by fill_hash_join_buff_on_device_sharded_bucketized().
void fill_one_to_many_baseline_hash_table_on_device | ( | int32_t * | buff, |
const T * | composite_key_dict, | ||
const int64_t | hash_entry_count, | ||
const KEY_HANDLER * | key_handler, | ||
const size_t | num_elems, | ||
const bool | for_window_framing | ||
) |
Definition at line 260 of file HashJoinRuntimeGpu.cu.
References checkCudaErrors, cuda_kernel_launch_wrapper(), getQueryEngineCudaStream(), inclusive_scan(), set_valid_pos(), and set_valid_pos_flag().
void fill_one_to_many_baseline_hash_table_on_device_32 | ( | int32_t * | buff, |
const int32_t * | composite_key_dict, | ||
const int64_t | hash_entry_count, | ||
const size_t | key_component_count, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems, | ||
const bool | for_window_framing | ||
) |
Definition at line 450 of file HashJoinRuntimeGpu.cu.
Referenced by fill_one_to_many_baseline_hash_table_on_device().
void fill_one_to_many_baseline_hash_table_on_device_64 | ( | int32_t * | buff, |
const int64_t * | composite_key_dict, | ||
const int64_t | hash_entry_count, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems, | ||
const bool | for_window_framing | ||
) |
Definition at line 466 of file HashJoinRuntimeGpu.cu.
void fill_one_to_many_hash_table_on_device | ( | OneToManyPerfectJoinHashTableFillFuncArgs const | args | ) |
Definition at line 175 of file HashJoinRuntimeGpu.cu.
References run_benchmark_import::args, BucketizedHashEntryInfo::bucketized_hash_entry_count, OneToManyPerfectJoinHashTableFillFuncArgs::buff, count_matches(), cuda_kernel_launch_wrapper(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids(), OneToManyPerfectJoinHashTableFillFuncArgs::for_window_framing, OneToManyPerfectJoinHashTableFillFuncArgs::hash_entry_info, OneToManyPerfectJoinHashTableFillFuncArgs::join_column, SUFFIX, and OneToManyPerfectJoinHashTableFillFuncArgs::type_info.
void fill_one_to_many_hash_table_on_device_bucketized | ( | OneToManyPerfectJoinHashTableFillFuncArgs const | args | ) |
Definition at line 199 of file HashJoinRuntimeGpu.cu.
References run_benchmark_import::args, OneToManyPerfectJoinHashTableFillFuncArgs::bucket_normalization, OneToManyPerfectJoinHashTableFillFuncArgs::buff, count_matches_bucketized(), cuda_kernel_launch_wrapper(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids_bucketized(), BucketizedHashEntryInfo::getNormalizedHashEntryCount(), OneToManyPerfectJoinHashTableFillFuncArgs::hash_entry_info, OneToManyPerfectJoinHashTableFillFuncArgs::join_column, SUFFIX, and OneToManyPerfectJoinHashTableFillFuncArgs::type_info.
void fill_one_to_many_hash_table_on_device_impl | ( | int32_t * | buff, |
const int64_t | hash_entry_count, | ||
const JoinColumn & | join_column, | ||
const JoinColumnTypeInfo & | type_info, | ||
COUNT_MATCHES_FUNCTOR | count_matches_func, | ||
FILL_ROW_IDS_FUNCTOR | fill_row_ids_func | ||
) |
Definition at line 148 of file HashJoinRuntimeGpu.cu.
References checkCudaErrors, cuda_kernel_launch_wrapper(), getQueryEngineCudaStream(), inclusive_scan(), set_valid_pos(), and set_valid_pos_flag().
Referenced by fill_one_to_many_hash_table_on_device(), and fill_one_to_many_hash_table_on_device_bucketized().
void fill_one_to_many_hash_table_on_device_sharded | ( | OneToManyPerfectJoinHashTableFillFuncArgs const | args, |
ShardInfo const | shard_info | ||
) |
Definition at line 226 of file HashJoinRuntimeGpu.cu.
References BucketizedHashEntryInfo::bucketized_hash_entry_count, OneToManyPerfectJoinHashTableFillFuncArgs::buff, checkCudaErrors, count_matches_sharded(), cuda_kernel_launch_wrapper(), fill_row_ids_sharded(), getQueryEngineCudaStream(), OneToManyPerfectJoinHashTableFillFuncArgs::hash_entry_info, inclusive_scan(), OneToManyPerfectJoinHashTableFillFuncArgs::join_column, set_valid_pos(), set_valid_pos_flag(), SUFFIX, and OneToManyPerfectJoinHashTableFillFuncArgs::type_info.
CUstream getQueryEngineCudaStream | ( | ) |
Definition at line 3 of file QueryEngine.cpp.
void init_baseline_hash_join_buff_on_device_32 | ( | int8_t * | hash_join_buff, |
const int64_t | entry_count, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 313 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().
void init_baseline_hash_join_buff_on_device_64 | ( | int8_t * | hash_join_buff, |
const int64_t | entry_count, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 326 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().
__global__ void init_baseline_hash_join_buff_wrapper | ( | int8_t * | hash_join_buff, |
const int64_t | entry_count, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 299 of file HashJoinRuntimeGpu.cu.
References init_baseline_hash_join_buff(), SUFFIX, and heavydb.dtypes::T.
void init_hash_join_buff_on_device | ( | int32_t * | buff, |
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 114 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and init_hash_join_buff_wrapper().
Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().
__global__ void init_hash_join_buff_wrapper | ( | int32_t * | buff, |
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 108 of file HashJoinRuntimeGpu.cu.
References init_hash_join_buff(), and SUFFIX.
Referenced by init_hash_join_buff_on_device().
void range_fill_baseline_hash_join_buff_on_device_64 | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | dev_err_buff, | ||
const RangeKeyHandler * | key_handler, | ||
const size_t | num_elems | ||
) |
Definition at line 429 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
void range_fill_one_to_many_baseline_hash_table_on_device_64 | ( | int32_t * | buff, |
const int64_t * | composite_key_dict, | ||
const size_t | hash_entry_count, | ||
const RangeKeyHandler * | key_handler, | ||
const size_t | num_elems | ||
) |
Definition at line 491 of file HashJoinRuntimeGpu.cu.
__global__ void set_valid_pos | ( | int32_t * | pos_buff, |
int32_t * | count_buff, | ||
const int64_t | entry_count | ||
) |
Definition at line 135 of file HashJoinRuntimeGpu.cu.
References VALID_POS_FLAG.
Referenced by fill_one_to_many_baseline_hash_table_on_device(), fill_one_to_many_hash_table_on_device_impl(), and fill_one_to_many_hash_table_on_device_sharded().
__global__ void set_valid_pos_flag | ( | int32_t * | pos_buff, |
const int32_t * | count_buff, | ||
const int64_t | entry_count | ||
) |
Definition at line 123 of file HashJoinRuntimeGpu.cu.
References VALID_POS_FLAG.
Referenced by fill_one_to_many_baseline_hash_table_on_device(), fill_one_to_many_hash_table_on_device_impl(), and fill_one_to_many_hash_table_on_device_sharded().