38 std::make_unique<HashingSchemeRecycler>();
42 const std::shared_ptr<Analyzer::BinOper> condition,
43 const std::vector<InputTableInfo>& query_infos,
47 const int device_count,
53 decltype(std::chrono::steady_clock::now()) ts1, ts2;
55 auto hash_type = preferred_hash_type;
57 LOG(
INFO) <<
"A user's query hint forced the join operation to use OneToMany hash "
64 <<
" for qual: " << condition->toString();
65 ts1 = std::chrono::steady_clock::now();
67 auto inner_outer_pairs =
69 const auto& inner_outer_cols = inner_outer_pairs.first;
70 const auto& col_pairs_string_op_infos = inner_outer_pairs.second;
71 auto join_hash_table = std::shared_ptr<BaselineJoinHashTable>(
79 col_pairs_string_op_infos,
82 hashtable_build_dag_map,
83 table_id_to_node_map));
85 join_hash_table->reify(hash_type);
88 join_hash_table->freeHashBufferMemory();
89 throw std::runtime_error(e.what());
93 join_hash_table->freeHashBufferMemory();
94 throw HashJoinFail(std::string(
"Could not build a 1-to-1 correspondence for columns "
95 "involved in equijoin | ") +
98 throw HashJoinFail(std::string(
"Could not build hash tables for equijoin | ") +
102 std::string(
"Ran out of memory while building hash tables for equijoin | ") +
106 }
catch (
const std::exception& e) {
107 throw std::runtime_error(
108 std::string(
"Fatal error while attempting to build hash tables for join: ") +
112 ts2 = std::chrono::steady_clock::now();
113 VLOG(1) <<
"Built keyed hash table "
115 << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
118 return join_hash_table;
122 const std::shared_ptr<Analyzer::BinOper> condition,
124 const std::vector<InputTableInfo>& query_infos,
128 const std::vector<InnerOuter>& inner_outer_pairs,
129 const std::vector<InnerOuterStringOpInfos>& col_pairs_string_op_infos,
130 const int device_count,
134 : condition_(condition)
135 , join_type_(join_type)
136 , query_infos_(query_infos)
139 , column_cache_(column_cache)
140 , inner_outer_pairs_(inner_outer_pairs)
141 , inner_outer_string_op_infos_pairs_(col_pairs_string_op_infos)
143 , query_hints_(query_hints)
144 , needs_dict_translation_(
false)
145 , hashtable_build_dag_map_(hashtable_build_dag_map)
146 , table_id_to_node_map_(table_id_to_node_map)
147 , rowid_size_(sizeof(int32_t)) {
154 const Executor* executor,
155 const std::vector<InnerOuter>& inner_outer_pairs) {
156 for (
const auto& inner_outer_pair : inner_outer_pairs) {
157 const auto pair_shard_count =
get_shard_count(inner_outer_pair, executor);
158 if (pair_shard_count) {
159 return pair_shard_count;
175 auto buffer_size = hash_table->getHashTableBufferSize(device_type);
177 std::unique_ptr<int8_t[]> buffer_copy;
179 buffer_copy = std::make_unique<int8_t[]>(buffer_size);
182 auto device_allocator = std::make_unique<CudaAllocator>(
184 device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
186 auto ptr1 = buffer_copy ? buffer_copy.get() : buffer;
200 hash_table->getEntryCount(),
211 const int device_id)
const {
215 auto buffer_size = hash_table->getHashTableBufferSize(device_type);
217 std::unique_ptr<int8_t[]> buffer_copy;
219 buffer_copy = std::make_unique<int8_t[]>(buffer_size);
221 auto device_allocator = std::make_unique<CudaAllocator>(
223 device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
225 auto ptr1 = buffer_copy ? buffer_copy.get() : buffer;
232 const auto layout = hash_table->getLayout();
235 hash_table->getEntryCount(),
244 const std::vector<InnerOuter>& inner_outer_pairs,
245 const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs,
246 const Executor* executor) {
247 const auto num_col_pairs = inner_outer_pairs.size();
248 CHECK_EQ(num_col_pairs, inner_outer_string_op_infos_pairs.size());
249 for (
size_t col_pair_idx = 0; col_pair_idx < num_col_pairs; ++col_pair_idx) {
251 inner_outer_string_op_infos_pairs[col_pair_idx],
268 auto layout = preferred_layout;
280 }
catch (
const std::exception& e) {
281 VLOG(1) <<
"Caught exception while building baseline hash table for bounding box "
294 if (inner_outer_string_op_infos.first.size() ||
295 inner_outer_string_op_infos.second.size()) {
303 }
catch (
const std::exception& e) {
304 VLOG(1) <<
"Caught exception while building baseline hash table: " << e.what();
318 if (query_info.fragments.empty()) {
327 std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
328 std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>> fragments_per_device;
329 std::vector<ColumnsForDevice> columns_per_device;
331 auto entries_per_device =
336 std::vector<ChunkKey> chunk_key_per_device;
337 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
338 fragments_per_device.emplace_back(
341 : query_info.fragments);
343 dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(
346 const auto chunk_key =
genChunkKey(fragments_per_device[device_id]);
347 chunk_key_per_device.emplace_back(std::move(chunk_key));
351 auto inner_outer_pairs =
353 const auto& inner_outer_cols = inner_outer_pairs.first;
354 const auto& col_pairs_string_op_infos = inner_outer_pairs.second;
355 auto hashtable_access_path_info =
357 col_pairs_string_op_infos,
363 fragments_per_device,
367 table_keys_ = hashtable_access_path_info.table_keys;
382 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
383 const auto num_tuples =
std::accumulate(fragments_per_device[device_id].begin(),
384 fragments_per_device[device_id].end(),
386 [](
const auto& sum,
const auto& fragment) {
387 return sum + fragment.getNumTuples();
394 chunk_key_per_device[device_id]};
400 const auto invalid_cache_key =
402 if (!invalid_cache_key) {
407 std::for_each(hashtable_cache_key_.cbegin(),
408 hashtable_cache_key_.cend(),
430 auto allow_hashtable_recycling =
435 bool has_invalid_cached_hash_table =
false;
438 allow_hashtable_recycling, invalid_cache_key,
join_type_)) {
441 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
449 has_invalid_cached_hash_table =
true;
454 if (has_invalid_cached_hash_table) {
460 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
463 if (cpu_hash_table->getEntryCount()) {
477 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
478 const auto columns_for_device =
482 ? dev_buff_owners[device_id].
get()
484 columns_per_device.push_back(columns_for_device);
487 auto hashtable_layout_type = layout;
489 CHECK(!columns_per_device.front().join_columns.empty());
492 const auto entry_count = 2 * std::max(tuple_count,
size_t(1));
497 std::vector<std::future<void>> init_threads;
498 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
501 columns_per_device[device_id].join_columns.front().num_elems,
505 hashtable_layout_type,
510 columns_per_device[device_id],
511 hashtable_layout_type,
513 hash_table_entry_info,
516 for (
auto& init_thread : init_threads) {
519 for (
auto& init_thread : init_threads) {
525 const std::vector<ColumnsForDevice>& columns_per_device)
const {
537 const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
539 CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
543 std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
544 auto hll_result = &hll_buffer_all_cpus[0];
547 count_distinct_desc.bitmap_sz_bits,
549 columns_per_device.front().join_columns,
550 columns_per_device.front().join_column_types,
552 for (
int i = 1; i < thread_count; ++i) {
554 hll_result + i * padded_size_bytes,
555 size_t(1) << count_distinct_desc.bitmap_sz_bits);
557 return std::make_pair(
hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
561 std::vector<std::vector<uint8_t>> host_hll_buffers(
device_count_);
562 for (
auto& host_hll_buffer : host_hll_buffers) {
563 host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
565 std::vector<std::future<void>> approximate_distinct_device_threads;
566 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
567 approximate_distinct_device_threads.emplace_back(
std::async(
571 &count_distinct_desc,
574 auto allocator = std::make_unique<CudaAllocator>(
576 auto device_hll_buffer =
577 allocator->alloc(count_distinct_desc.bitmapPaddedSizeBytes());
578 data_mgr->getCudaMgr()->zeroDeviceMem(
580 count_distinct_desc.bitmapPaddedSizeBytes(),
583 const auto& columns_for_device = columns_per_device[device_id];
585 columns_for_device.join_columns, *allocator);
587 columns_for_device.join_column_types, *allocator);
588 const auto key_handler =
592 join_column_types_gpu,
595 const auto key_handler_gpu =
598 reinterpret_cast<uint8_t*>(device_hll_buffer),
599 count_distinct_desc.bitmap_sz_bits,
601 columns_for_device.join_columns[0].num_elems);
603 auto& host_hll_buffer = host_hll_buffers[device_id];
604 allocator->copyFromDevice(&host_hll_buffer[0],
606 count_distinct_desc.bitmapPaddedSizeBytes());
609 for (
auto& child : approximate_distinct_device_threads) {
613 auto& result_hll_buffer = host_hll_buffers.front();
614 auto hll_result =
reinterpret_cast<int32_t*
>(&result_hll_buffer[0]);
615 for (
int device_id = 1; device_id <
device_count_; ++device_id) {
616 auto& host_hll_buffer = host_hll_buffers[device_id];
618 reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
619 size_t(1) << count_distinct_desc.bitmap_sz_bits);
621 return std::make_pair(
hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
629 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments,
632 const auto effective_memory_level =
635 std::vector<JoinColumn> join_columns;
636 std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
637 std::vector<JoinColumnTypeInfo> join_column_types;
638 std::vector<JoinBucketInfo> join_bucket_info;
639 std::vector<std::shared_ptr<void>> malloc_owner;
641 const auto inner_col = inner_outer_pair.first;
643 if (inner_cd && inner_cd->isVirtualCol) {
648 effective_memory_level,
655 const auto& ti = inner_col->get_type_info();
664 return {join_columns, join_column_types, chunks_owner, join_bucket_info, malloc_owner};
675 const auto effective_memory_level =
681 effective_memory_level,
682 hash_table_entries_info,
686 std::string(
"Unrecognized error when initializing baseline hash table (") +
704 const auto inner_col = inner_outer_pair.first;
705 const auto& inner_col_ti = inner_col->get_type_info();
706 if (inner_col_ti.get_logical_size() > 4) {
707 CHECK_EQ(8, inner_col_ti.get_logical_size());
719 const std::vector<InnerOuter>& inner_outer_pairs)
const {
729 std::shared_ptr<BaselineHashTable>& cpu_hash_table,
737 CHECK(gpu_target_hash_table);
738 const auto gpu_buff = gpu_target_hash_table->getGpuBuffer();
740 auto allocator = std::make_unique<CudaAllocator>(
742 allocator->copyToDevice(
744 cpu_hash_table->getCpuBuffer(),
752 const std::vector<const StringDictionaryProxy::IdMap*>& str_proxy_translation_maps) {
756 const size_t num_translation_maps = str_proxy_translation_maps.size();
757 translation_map_ptrs_and_offsets.first.reserve(num_translation_maps);
758 translation_map_ptrs_and_offsets.second.reserve(num_translation_maps);
759 for (
const auto& str_proxy_translation_map : str_proxy_translation_maps) {
760 if (str_proxy_translation_map) {
761 translation_map_ptrs_and_offsets.first.emplace_back(
762 str_proxy_translation_map->data());
763 translation_map_ptrs_and_offsets.second.emplace_back(
764 str_proxy_translation_map->domainStart());
767 translation_map_ptrs_and_offsets.first.emplace_back(
nullptr);
768 translation_map_ptrs_and_offsets.second.emplace_back(0);
771 return translation_map_ptrs_and_offsets;
775 const std::vector<JoinColumn>& join_columns,
776 const std::vector<JoinColumnTypeInfo>& join_column_types,
777 const std::vector<JoinBucketInfo>& join_bucket_info,
781 const int device_id) {
785 decltype(std::chrono::steady_clock::now()) ts1, ts2;
786 ts1 = std::chrono::steady_clock::now();
787 auto allow_hashtable_recycling =
798 CHECK(!join_columns.empty());
804 std::shared_ptr<HashTable> hash_table{
nullptr};
805 const auto str_proxy_translation_map_ptrs_and_offsets =
809 const auto key_handler =
813 &join_column_types[0],
814 &str_proxy_translation_map_ptrs_and_offsets.first[0],
815 &str_proxy_translation_map_ptrs_and_offsets.second[0]);
821 str_proxy_translation_map_ptrs_and_offsets,
822 hash_table_entry_info,
827 ts2 = std::chrono::steady_clock::now();
828 auto hashtable_build_time =
829 std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
838 hashtable_build_time);
856 if (cpu_hash_table->getEntryCount()) {
870 auto join_column_types_gpu =
872 auto join_columns_gpu =
877 join_column_types_gpu,
884 hash_table_entry_info,
890 if (!err && allow_hashtable_recycling && hash_tables_for_device_[device_id]) {
894 hash_tables_for_device_[device_id]->getLayout(),
908 #define LL_CONTEXT executor_->cgen_state_->context_
909 #define LL_BUILDER executor_->cgen_state_->ir_builder_
910 #define LL_INT(v) executor_->cgen_state_->llInt(v)
911 #define LL_FP(v) executor_->cgen_state_->llFp(v)
912 #define ROW_FUNC executor_->cgen_state_->row_func_
915 const size_t index) {
919 CHECK(key_component_width == 4 || key_component_width == 8);
921 const auto hash_ptr =
hashPtr(index);
922 const auto key_ptr_lv =
926 return executor_->cgen_state_->emitExternalCall(
927 "baseline_hash_join_idx_" +
std::to_string(key_component_width * 8),
929 {hash_ptr, key_ptr_lv, key_size_lv,
LL_INT(hash_table->getEntryCount())});
934 const size_t index) {
939 CHECK(key_component_width == 4 || key_component_width == 8);
943 const auto composite_dict_ptr_type =
944 llvm::Type::getIntNPtrTy(
LL_CONTEXT, key_component_width * 8);
945 const auto composite_key_dict =
946 hash_ptr->getType()->isPointerTy()
947 ?
LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
948 :
LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
950 const auto key =
executor_->cgen_state_->emitExternalCall(
951 "get_composite_key_index_" +
std::to_string(key_component_width * 8),
954 LL_INT(key_component_count),
956 LL_INT(hash_table->getEntryCount())});
957 auto one_to_many_ptr = hash_ptr;
958 if (one_to_many_ptr->getType()->isPointerTy()) {
962 CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
968 {one_to_many_ptr, key,
LL_INT(int64_t(0)),
LL_INT(hash_table->getEntryCount() - 1)},
998 CHECK(key_component_width == 4 || key_component_width == 8);
1003 return hash_table->getEntryCount() * key_component_count * key_component_width;
1005 return hash_table->getEntryCount() * (key_component_count + 1) * key_component_width;
1011 return hash_table->getEntryCount() *
sizeof(int32_t);
1017 CHECK(key_component_width == 4 || key_component_width == 8);
1019 llvm::Value* key_buff_lv{
nullptr};
1020 switch (key_component_width) {
1035 const auto key_comp_dest_lv =
LL_BUILDER.CreateGEP(
1036 key_buff_lv->getType()->getScalarType()->getPointerElementType(),
1040 const auto outer_col = inner_outer_pair.second;
1042 const auto val_col_var =
1044 if (key_col_var && val_col_var &&
1049 throw std::runtime_error(
1050 "Query execution fails because the query contains not supported self-join "
1051 "pattern. We suspect the query requires multiple left-deep join tree due to "
1052 "the join condition of the self-join and is not supported for now. Please "
1053 "consider rewriting table order in "
1058 const auto key_lv_ext =
1060 LL_BUILDER.CreateStore(key_lv_ext, key_comp_dest_lv);
1068 const auto pi8_type = llvm::Type::getInt8PtrTy(
LL_CONTEXT);
1069 return hash_ptr->getType()->isPointerTy()
1070 ?
LL_BUILDER.CreatePointerCast(hash_ptr, pi8_type)
1071 :
LL_BUILDER.CreateIntToPtr(hash_ptr, pi8_type);
1091 return first_inner_col->get_rte_idx();
1100 return hash_table->getLayout();
1105 const std::vector<InnerOuter>& inner_outer_pairs) {
1106 CHECK(!inner_outer_pairs.empty());
1107 const auto first_inner_col = inner_outer_pairs.front().first;
1108 return first_inner_col->getTableKey();
1116 VLOG(1) <<
"Checking CPU hash table cache.";
1124 std::shared_ptr<HashTable> hashtable_ptr,
1126 size_t hashtable_building_time) {
1128 CHECK(hashtable_ptr && !hashtable_ptr->getGpuBuffer());
1135 hashtable_building_time);
1143 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments)
const {
1144 std::vector<int> fragment_ids;
1146 fragments.cbegin(), fragments.cend(), [&fragment_ids](
const auto& fragment) {
1147 fragment_ids.push_back(fragment.fragmentId);
1149 return fragment_ids;
bool needs_dict_translation_
size_t offsetBufferOff() const noexceptoverride
std::set< DecodedJoinHashBufferEntry > toSet(const ExecutorDeviceType device_type, const int device_id) const override
std::vector< int > ChunkKey
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
std::mutex str_proxy_translation_mutex_
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
std::vector< QueryPlanHash > hashtable_cache_key_
static bool isInvalidHashTableCacheKey(const std::vector< QueryPlanHash > &cache_keys)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
static bool canAccessHashTable(bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
static void checkHashJoinReplicationConstraint(const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Data_Namespace::MemoryLevel get_effective_memory_level(const Data_Namespace::MemoryLevel memory_level, const bool needs_dict_translation)
const InputTableInfo & get_inner_query_info(const shared::TableKey &inner_table_key, const std::vector< InputTableInfo > &query_infos)
HashTableBuildDagMap hashtable_build_dag_map_
size_t getKeyBufferSize() const noexcept
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
const TableIdToNodeMap table_id_to_node_map_
size_t getComponentBufferSize() const noexceptoverride
RegisteredQueryHint query_hints_
bool needs_dictionary_translation(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const Executor *executor)
void allocateDeviceMemory(const BaselineHashTableEntryInfo hash_table_entry_info, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
void freeHashBufferMemory()
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const JoinType join_type, const BaselineHashTableEntryInfo hash_table_entry_info, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
int getInnerTableRteIdx() const noexceptoverride
std::unordered_set< size_t > table_keys_
const JoinType join_type_
virtual ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
virtual int initHashTableForDevice(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const BaselineHashTableEntryInfo hash_table_entry_info, const int device_id)
const std::vector< InputTableInfo > & query_infos_
virtual llvm::Value * codegenKey(const CompilationOptions &)
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
size_t payloadBufferOff() const noexceptoverride
std::vector< InnerOuter > inner_outer_pairs_
const std::vector< JoinColumnTypeInfo > join_column_types
void reify(const HashType preferred_layout)
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
future< Result > async(Fn &&fn, Args &&...args)
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
HashType getHashType() const noexceptoverride
static QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForBaselineHashJoin &info)
std::vector< InnerOuterStringOpInfos > inner_outer_string_op_infos_pairs_
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
static std::unique_ptr< HashtableRecycler > hash_table_cache_
ColumnCacheMap & column_cache_
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
size_t shardCount() const
static constexpr size_t MAX_NUM_HASH_ENTRIES
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
DEVICE auto accumulate(ARGS &&...args)
BaselineJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &col_pairs_string_op_infos, const int device_count, const RegisteredQueryHint &query_hints, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< ColumnsForDevice > &) const
static std::vector< const StringDictionaryProxy::IdMap * > translateCompositeStrDictProxies(const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
HashtableCacheMetaInfo hashtable_cache_meta_info_
static std::unordered_set< size_t > getAlternativeTableKeys(const std::vector< ChunkKey > &chunk_keys, const shared::TableKey &inner_table_key)
virtual void reifyWithLayout(const HashType layout)
HashTable * getHashTableForDevice(const size_t device_id) const
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
std::pair< std::vector< const int32_t * >, std::vector< int32_t >> StrProxyTranslationMapsPtrsAndOffsets
std::unique_ptr< BaselineHashTable > getHashTable()
static std::string getHashTypeString(HashType ht) noexcept
std::optional< HashType > layout_override_
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
LocalIdsScopeGuard setNewThreadId() const
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
virtual void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const BaselineHashTableEntryInfo hash_table_entry_info, const logger::ThreadLocalIds parent_thread_local_ids)
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const StrProxyTranslationMapsPtrsAndOffsets &str_proxy_translation_maps_ptrs_and_offsets, const BaselineHashTableEntryInfo hash_table_entry_info, const JoinType join_type, const Executor *executor, const RegisteredQueryHint &query_hint)
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
CUstream getQueryEngineCudaStreamForDevice(int device_num)
const Data_Namespace::MemoryLevel memory_level_
llvm::Value * hashPtr(const size_t index)
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems)
size_t getNumTuplesUpperBound() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
bool g_enable_watchdog false
llvm::Value * codegenSlot(const CompilationOptions &, const size_t) override
bool isBitwiseEq() const override
#define DEBUG_TIMER(name)
static bool isSafeToCacheHashtable(const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_info_pairs, const shared::TableKey &table_key)
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
virtual size_t getKeyComponentCount() const
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
void copyCpuHashTableToGpu(std::shared_ptr< BaselineHashTable > &cpu_hash_table, const int device_id, Data_Namespace::DataMgr *data_mgr)
ChunkKey genChunkKey(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments) const
std::vector< const StringDictionaryProxy::IdMap * > str_proxy_translation_maps_
virtual size_t getKeyComponentWidth() const
std::mutex cpu_hash_table_buff_mutex_
device_count_(device_count)
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
Allocate GPU memory using GpuBuffers via DataMgr.
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
std::vector< JoinBucketInfo > join_buckets
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
StrProxyTranslationMapsPtrsAndOffsets decomposeStrDictTranslationMaps(const std::vector< const StringDictionaryProxy::IdMap * > &str_proxy_translation_maps)
static HashtableAccessPathInfo getHashtableAccessPathInfo(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, int device_count, int shard_count, const std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &frags_for_device, Executor *executor)
shared::TableKey getInnerTableId() const noexceptoverride
ThreadLocalIds thread_local_ids()
const std::vector< JoinColumn > join_columns
bool force_one_to_many_hash_join
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
const std::shared_ptr< Analyzer::BinOper > condition_
size_t countBufferOff() const noexceptoverride
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})
memory_level_(memory_level)