38 const std::vector<double>& inverse_bucket_sizes_for_dimension,
39 const std::vector<InnerOuter> inner_outer_pairs) {
45 const auto& inner_outer_pair = inner_outer_pairs[i];
46 const auto inner_col = inner_outer_pair.first;
47 const auto& ti = inner_col->get_type_info();
48 const auto elem_ti = ti.get_elem_type();
52 elem_ti.get_type() ==
kDOUBLE});
62 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
65 std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
67 std::vector<std::shared_ptr<void>>& malloc_owner,
70 static std::mutex fragment_fetch_mutex;
71 std::lock_guard<std::mutex> fragment_fetch_lock(fragment_fetch_mutex);
76 effective_memory_level,
84 CHECK(dev_buff_owner);
102 const int device_id) {
107 for (
size_t i = 0; i < memsz; ++i) {
119 const int device_id)
const {
120 return toStringFlat<int64_t>(
this, device_type, device_id);
124 const int device_id)
const {
125 return toStringFlat<int32_t>(
this, device_type, device_id);
131 for (
auto k : e.
key) {
174 os <<
"(" << inner_outer_string_op_infos.first <<
", "
175 << inner_outer_string_op_infos.second <<
")";
180 std::ostringstream os;
181 os << inner_outer_string_op_infos;
187 const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs) {
189 bool first_elem =
true;
190 for (
const auto& inner_outer_string_op_infos : inner_outer_string_op_infos_pairs) {
195 os << inner_outer_string_op_infos;
202 const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs) {
203 std::ostringstream os;
204 os << inner_outer_string_op_infos_pairs;
209 const std::vector<llvm::Value*>& hash_join_idx_args_in,
210 const bool is_sharded,
211 const bool col_is_nullable,
213 const int64_t sub_buff_size,
215 bool is_bucketized) {
217 using namespace std::string_literals;
219 std::string fname(is_bucketized ?
"bucketized_hash_join_idx"s :
"hash_join_idx"s);
227 if (!is_bw_eq && col_is_nullable) {
228 fname +=
"_nullable";
231 const auto slot_lv = executor->cgen_state_->emitCall(fname, hash_join_idx_args_in);
232 const auto slot_valid_lv = executor->cgen_state_->ir_builder_.CreateICmpSGE(
233 slot_lv, executor->cgen_state_->llInt(int64_t(0)));
235 auto pos_ptr = hash_join_idx_args_in[0];
238 auto count_ptr = executor->cgen_state_->ir_builder_.CreateAdd(
239 pos_ptr, executor->cgen_state_->llInt(sub_buff_size));
240 auto hash_join_idx_args = hash_join_idx_args_in;
241 hash_join_idx_args[0] = executor->cgen_state_->ir_builder_.CreatePtrToInt(
242 count_ptr, llvm::Type::getInt64Ty(executor->cgen_state_->context_));
244 const auto row_count_lv = executor->cgen_state_->ir_builder_.CreateSelect(
246 executor->cgen_state_->emitCall(fname, hash_join_idx_args),
247 executor->cgen_state_->llInt(int64_t(0)));
248 auto rowid_base_i32 = executor->cgen_state_->ir_builder_.CreateIntToPtr(
249 executor->cgen_state_->ir_builder_.CreateAdd(
250 pos_ptr, executor->cgen_state_->llInt(2 * sub_buff_size)),
251 llvm::Type::getInt32PtrTy(executor->cgen_state_->context_));
252 auto rowid_ptr_i32 = executor->cgen_state_->ir_builder_.CreateGEP(
253 rowid_base_i32->getType()->getScalarType()->getPointerElementType(),
256 return {rowid_ptr_i32, row_count_lv, slot_lv,
nullptr};
261 llvm::Value* hash_ptr =
nullptr;
262 const auto total_table_count =
263 executor->plan_state_->join_info_.join_hash_tables_.size();
264 CHECK_LT(table_idx, total_table_count);
265 if (total_table_count > 1) {
266 auto hash_tables_ptr =
270 ? executor->cgen_state_->ir_builder_.CreateGEP(
271 hash_tables_ptr->getType()->getScalarType()->getPointerElementType(),
273 executor->cgen_state_->llInt(static_cast<int64_t>(table_idx)))
275 hash_ptr = executor->cgen_state_->ir_builder_.CreateLoad(
276 hash_pptr->getType()->getPointerElementType(), hash_pptr);
278 hash_ptr =
get_arg_by_name(executor->cgen_state_->row_func_,
"join_hash_tables");
286 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
287 const std::vector<InputTableInfo>& query_infos,
291 const int device_count,
298 std::shared_ptr<HashJoin> join_hash_table;
301 throw std::runtime_error(
302 "Bounding box intersection disabled, attempting to fall back to loop join");
304 if (qual_bin_oper->is_bbox_intersect_oper()) {
305 VLOG(1) <<
"Trying to build geo hash table:";
314 hashtable_build_dag_map,
316 table_id_to_node_map);
317 }
else if (dynamic_cast<const Analyzer::ExpressionTuple*>(
318 qual_bin_oper->get_left_operand()) ||
321 LOG(
INFO) <<
"A user's query hint forced the join operation to use the Baseline "
324 VLOG(1) <<
"Trying to build keyed hash table:";
333 hashtable_build_dag_map,
335 table_id_to_node_map);
338 VLOG(1) <<
"Trying to build perfect hash table:";
347 hashtable_build_dag_map,
349 table_id_to_node_map);
354 CHECK_EQ(join_quals.size(), size_t(1));
355 const auto join_qual =
357 VLOG(1) <<
"Building a perfect join hash table fails: " << e.what();
358 VLOG(1) <<
"Trying to re-build keyed join hash table";
367 hashtable_build_dag_map,
369 table_id_to_node_map);
372 CHECK(join_hash_table);
375 for (
int device_id = 0; device_id < join_hash_table->getDeviceCount();
379 VLOG(2) <<
"Built GPU hash table: "
385 VLOG(2) <<
"Built CPU hash table: "
390 return join_hash_table;
393 std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
395 const Executor* executor,
396 const bool has_string_ops) {
397 const auto inner_col = cols.first;
399 const auto inner_ti = inner_col->get_type_info();
401 std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
402 inner_outer_str_dict_proxies{
nullptr,
nullptr};
403 if (inner_ti.is_string() && outer_col) {
405 CHECK(outer_ti.is_string());
406 inner_outer_str_dict_proxies.first =
407 executor->getStringDictionaryProxy(inner_ti.getStringDictKey(),
true);
408 CHECK(inner_outer_str_dict_proxies.first);
409 inner_outer_str_dict_proxies.second =
410 executor->getStringDictionaryProxy(outer_ti.getStringDictKey(),
true);
411 CHECK(inner_outer_str_dict_proxies.second);
412 if (!has_string_ops &&
413 *inner_outer_str_dict_proxies.first == *inner_outer_str_dict_proxies.second) {
415 CHECK_EQ(inner_ti.getStringDictKey(), outer_ti.getStringDictKey());
416 inner_outer_str_dict_proxies.first =
nullptr;
417 inner_outer_str_dict_proxies.second =
nullptr;
420 return inner_outer_str_dict_proxies;
427 const Executor* executor) {
428 const bool has_string_ops = inner_outer_string_op_infos.first.
size() ||
429 inner_outer_string_op_infos.second.size();
430 const auto inner_outer_proxies =
432 const bool translate_dictionary =
433 inner_outer_proxies.first && inner_outer_proxies.second;
434 if (translate_dictionary) {
435 const auto& inner_dict_id = inner_outer_proxies.first->getDictKey();
436 const auto& outer_dict_id = inner_outer_proxies.second->getDictKey();
437 CHECK(has_string_ops || inner_dict_id != outer_dict_id);
438 const auto id_map = executor->getJoinIntersectionStringProxyTranslationMap(
439 inner_outer_proxies.first,
440 inner_outer_proxies.second,
441 inner_outer_string_op_infos.first,
442 inner_outer_string_op_infos.second,
443 executor->getRowSetMemoryOwner());
444 if (!inner_outer_string_op_infos.second.empty()) {
449 static_cast<int64_t
>(
450 inner_outer_proxies.second->transientEntryCount() + 1) *
462 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments) {
463 auto const fragment_id = [](
auto const& frag_info) {
return frag_info.fragmentId; };
464 std::vector<int> frag_ids(fragments.size());
465 std::transform(fragments.cbegin(), fragments.cend(), frag_ids.begin(), fragment_id);
466 std::sort(frag_ids.begin(), frag_ids.end());
471 const std::vector<InnerOuter>& inner_outer_pairs,
472 const Executor* executor,
473 const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs) {
475 std::vector<const void*> sd_inner_proxy_per_key;
476 std::vector<void*> sd_outer_proxy_per_key;
477 std::vector<ChunkKey> cache_key_chunks;
478 const bool has_string_op_infos = inner_outer_string_op_infos_pairs.size();
479 if (has_string_op_infos) {
480 CHECK_EQ(inner_outer_pairs.size(), inner_outer_string_op_infos_pairs.size());
482 size_t string_op_info_pairs_idx = 0;
483 for (
const auto& inner_outer_pair : inner_outer_pairs) {
484 const auto inner_col = inner_outer_pair.first;
485 const auto outer_col = inner_outer_pair.second;
486 const auto& inner_ti = inner_col->get_type_info();
487 const auto& outer_ti = outer_col->get_type_info();
488 if (inner_ti.is_string() && outer_ti.is_string() &&
489 inner_ti.is_dict_encoded_string() != outer_ti.is_dict_encoded_string()) {
490 throw std::runtime_error(
491 "Detected a join between dictionary-encoded and none-encoded text columns. "
492 "Please consider applying dictionary-encoding for the other column.");
494 const auto& inner_column_key = inner_col->getColumnKey();
495 ChunkKey cache_key_chunks_for_column{
496 inner_column_key.db_id, inner_column_key.table_id, inner_column_key.column_id};
497 if (inner_ti.is_string() &&
498 (!(inner_ti.getStringDictKey() == outer_ti.getStringDictKey()) ||
499 (has_string_op_infos &&
500 (inner_outer_string_op_infos_pairs[string_op_info_pairs_idx].first.size() ||
501 inner_outer_string_op_infos_pairs[string_op_info_pairs_idx].second.size())))) {
502 CHECK(outer_ti.is_string());
506 sd_inner_proxy = executor->getStringDictionaryProxy(
507 inner_ti.getStringDictKey(), executor->getRowSetMemoryOwner(),
true);
508 sd_inner_proxy_per_key.push_back(sd_inner_proxy);
511 sd_outer_proxy = executor->getStringDictionaryProxy(
512 outer_ti.getStringDictKey(), executor->getRowSetMemoryOwner(),
true);
513 sd_outer_proxy_per_key.push_back(sd_outer_proxy);
514 cache_key_chunks_for_column.push_back(sd_outer_proxy->getGeneration());
517 sd_inner_proxy_per_key.emplace_back();
518 sd_outer_proxy_per_key.emplace_back();
519 cache_key_chunks_for_column.push_back({-1});
521 cache_key_chunks.push_back(cache_key_chunks_for_column);
522 string_op_info_pairs_idx++;
524 return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
527 std::vector<const StringDictionaryProxy::IdMap*>
530 const std::vector<InnerOuterStringOpInfos>& string_op_infos_for_keys,
531 const Executor* executor) {
534 const size_t num_proxies = inner_proxies.size();
535 CHECK_EQ(num_proxies, outer_proxies.size());
536 std::vector<const StringDictionaryProxy::IdMap*> proxy_translation_maps;
537 proxy_translation_maps.reserve(num_proxies);
538 for (
size_t proxy_pair_idx = 0; proxy_pair_idx < num_proxies; ++proxy_pair_idx) {
539 const bool translate_proxies =
540 inner_proxies[proxy_pair_idx] && outer_proxies[proxy_pair_idx];
541 if (translate_proxies) {
542 const auto inner_proxy =
549 CHECK_NE(inner_proxy->getDictKey(), outer_proxy->getDictKey());
550 proxy_translation_maps.emplace_back(
551 executor->getJoinIntersectionStringProxyTranslationMap(
554 string_op_infos_for_keys[proxy_pair_idx].first,
555 string_op_infos_for_keys[proxy_pair_idx].second,
556 executor->getRowSetMemoryOwner()));
558 proxy_translation_maps.emplace_back(
nullptr);
561 return proxy_translation_maps;
566 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
569 if (!string_op_infos.empty()) {
570 const auto coerced_col_var =
572 CHECK(coerced_col_var);
573 std::vector<llvm::Value*> codegen_val_vec{
575 return codegen_val_vec[0];
577 return code_generator.
codegen(col_or_string_oper,
true, co)[0];
581 std::string_view table,
582 std::string_view column,
591 auto ti = cmeta->columnType;
593 if (ti.is_geometry() && ti.get_type() !=
kPOINT) {
595 switch (ti.get_type()) {
597 geoColumnId = cmeta->columnId + 2;
601 geoColumnId = cmeta->columnId + 3;
605 geoColumnId = cmeta->columnId + 4;
613 ti = cmeta->columnType;
616 auto cv = std::make_shared<Analyzer::ColumnVar>(
634 std::set<const Analyzer::ColumnVar*>
result;
635 for (
const auto& expr_component : expr_tuple->
getTuple()) {
636 const auto component_rte_set = visitor.
visit(expr_component.get());
637 result.insert(component_rte_set.begin(), component_rte_set.end());
643 const std::set<const Analyzer::ColumnVar*>& aggregate,
644 const std::set<const Analyzer::ColumnVar*>& next_result)
const override {
646 result.insert(next_result.begin(), next_result.end());
652 std::unordered_set<shared::TableKey> phys_table_ids;
653 for (
auto cv : cvs) {
654 phys_table_ids.insert(cv->getTableKey());
657 std::unordered_set<PhysicalInput> phys_inputs;
658 for (
auto cv : cvs) {
659 const auto& column_key = cv->getColumnKey();
664 executor->setupCaching(phys_inputs, phys_table_ids);
668 std::set<const Analyzer::ColumnVar*> cvs,
669 Executor* executor) {
670 std::unordered_set<shared::TableKey> phys_table_ids;
671 for (
auto cv : cvs) {
672 phys_table_ids.insert(cv->getTableKey());
678 std::vector<InputTableInfo> query_infos;
679 query_infos.reserve(phys_table_ids.size());
680 for (
const auto& table_key : phys_table_ids) {
683 query_infos.push_back({table_key, td->fragmenter->getFragmentsForQuery()});
691 std::string_view table1,
692 std::string_view column1,
694 std::string_view table2,
695 std::string_view column2,
699 const int device_count,
701 Executor* executor) {
705 auto qual_bin_oper = std::make_shared<Analyzer::BinOper>(
kBOOLEAN,
kEQ,
kONE, a1, a2);
707 std::set<const Analyzer::ColumnVar*> cvs =
729 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
732 const int device_count,
734 Executor* executor) {
735 std::set<const Analyzer::ColumnVar*> cvs =
756 std::vector<std::shared_ptr<Analyzer::BinOper>> qual_bin_opers,
759 const int device_count,
761 Executor* executor) {
762 std::set<const Analyzer::ColumnVar*> cvs;
763 for (
auto& qual : qual_bin_opers) {
765 cvs.insert(cv.begin(), cv.end());
770 std::shared_ptr<HashJoin> hash_table;
771 std::string error_msg;
772 for (
auto& qual : qual_bin_opers) {
785 if (candidate_hash_table) {
786 hash_table = candidate_hash_table;
789 error_msg = e.what();
793 return std::make_pair(error_msg, hash_table);
797 const size_t shard_count,
798 const Executor* executor) {
812 template <
typename T>
814 auto* target_expr = expr;
815 if (
auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr)) {
816 target_expr = cast_expr->get_operand();
819 return dynamic_cast<const T*
>(target_expr);
826 const bool is_bbox_intersect) {
829 if (!is_bbox_intersect) {
831 throw HashJoinFail(
"Equijoin types must be identical, found: " +
836 throw HashJoinFail(
"Cannot apply hash join to inner column type " +
842 throw HashJoinFail(
"Equijoin with different decimal types");
848 if (lhs_ti.
is_string() && (
static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
849 (lhs_cast && lhs_cast->get_optype() !=
kCAST) ||
850 (rhs_cast && rhs_cast->get_optype() !=
kCAST))) {
852 "Cannot use hash join for given expression (non-cast unary operator)");
855 if (lhs_ti.
is_decimal() && (lhs_cast || rhs_cast)) {
856 throw HashJoinFail(
"Cannot use hash join for given expression (cast to decimal)");
858 auto lhs_col = getHashJoinColumn<Analyzer::ColumnVar>(lhs);
859 auto rhs_col = getHashJoinColumn<Analyzer::ColumnVar>(rhs);
861 const auto lhs_string_oper = getHashJoinColumn<Analyzer::StringOper>(lhs);
862 const auto rhs_string_oper = getHashJoinColumn<Analyzer::StringOper>(rhs);
864 auto process_string_op_infos = [](
const auto& string_oper,
auto& col,
auto& ti) {
865 std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
877 "Hash joins involving string operators currently restricted to column inputs "
878 "(i.e. not case statements).");
880 ti = col->get_type_info();
881 CHECK(ti.is_dict_encoded_string());
882 const auto chained_string_op_exprs = string_oper->getChainedStringOpExprs();
883 CHECK_GT(chained_string_op_exprs.size(), 0UL);
884 for (
const auto& chained_string_op_expr : chained_string_op_exprs) {
885 auto chained_string_op =
887 CHECK(chained_string_op);
889 chained_string_op->get_kind(),
890 chained_string_op->get_type_info(),
891 chained_string_op->getLiteralArgs());
892 string_op_infos.emplace_back(string_op_info);
895 return string_op_infos;
898 auto outer_string_op_infos = process_string_op_infos(lhs_string_oper, lhs_col, lhs_ti);
899 auto inner_string_op_infos = process_string_op_infos(rhs_string_oper, rhs_col, rhs_ti);
901 if (!lhs_col && !rhs_col) {
903 "Cannot use hash join for given expression (both lhs and rhs are invalid)",
909 auto outer_ti = lhs_ti;
910 auto inner_ti = rhs_ti;
913 if (!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) {
919 if (lhs_col && lhs_col->get_rte_idx() == 0) {
921 "Cannot use hash join for given expression (lhs' rte idx is zero)",
922 inner_qual_decision);
927 std::swap(outer_string_op_infos, inner_string_op_infos);
931 throw HashJoinFail(
"Cannot use hash join for given expression (invalid inner col)",
932 inner_qual_decision);
937 if (outer_constant_col) {
939 "Cannot use hash join for given expression: try to join with a constant "
941 inner_qual_decision);
944 int outer_rte_idx = rte_idx_visitor.
visit(outer_expr);
947 if (inner_col->get_rte_idx() <= outer_rte_idx) {
949 "Cannot use hash join for given expression (inner's rte <= outer's rte)",
950 inner_qual_decision);
955 const auto& column_key = inner_col->getColumnKey();
958 column_key.column_id, column_key.table_id, inner_col_cd, temporary_tables);
959 const auto& outer_col_ti =
961 ? outer_col->get_type_info()
964 if ((inner_col_real_ti.is_decimal() || outer_col_ti.is_decimal()) &&
965 (lhs_cast || rhs_cast)) {
966 throw HashJoinFail(
"Cannot use hash join for given expression (cast from decimal)");
968 if (is_bbox_intersect) {
969 if (!inner_col_real_ti.is_array()) {
971 "Bounding box intersection only supported for inner columns with array type");
973 auto is_bounds_array = [](
const auto ti) {
974 return ti.is_fixlen_array() && ti.get_size() == 32;
976 if (!is_bounds_array(inner_col_real_ti)) {
978 "Bounding box intersection only supported for 4-element double fixed length "
981 if (!(outer_col_ti.get_type() ==
kPOINT || is_bounds_array(outer_col_ti) ||
984 "Bounding box intersection only supported for geometry outer columns of type "
986 "geometry columns with bounds or constructed points");
989 if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
990 inner_col_real_ti.is_decimal() ||
991 (inner_col_real_ti.is_string() &&
994 "Can only apply hash join to integer-like types and dictionary encoded "
999 auto normalized_inner_col = inner_col;
1000 auto normalized_outer_col = outer_col ? outer_col : outer_expr;
1002 const auto& normalized_inner_ti = normalized_inner_col->get_type_info();
1003 const auto& normalized_outer_ti = normalized_outer_col->get_type_info();
1005 if (normalized_inner_ti.is_string() != normalized_outer_ti.is_string()) {
1006 throw HashJoinFail(std::string(
"Could not build hash tables for incompatible types " +
1007 normalized_inner_ti.get_type_name() +
" and " +
1008 normalized_outer_ti.get_type_name()));
1010 return std::make_pair(std::make_pair(normalized_inner_col, normalized_outer_col),
1011 std::make_pair(inner_string_op_infos, outer_string_op_infos));
1014 std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>>
1017 std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>>
result;
1018 const auto lhs_tuple_expr =
1020 const auto rhs_tuple_expr =
1023 CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
1024 if (lhs_tuple_expr) {
1025 const auto& lhs_tuple = lhs_tuple_expr->getTuple();
1026 const auto& rhs_tuple = rhs_tuple_expr->getTuple();
1027 CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
1028 for (
size_t i = 0; i < lhs_tuple.size(); ++i) {
1033 result.first.emplace_back(col_pair.first);
1034 result.second.emplace_back(col_pair.second);
1037 CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
1042 result.first.emplace_back(col_pair.first);
1043 result.second.emplace_back(col_pair.second);
1050 bool invalid_cache_key,
1057 const Executor* executor,
1058 size_t rowid_size) noexcept {
1067 CHECK_GT(rowid_size, static_cast<size_t>(0));
1069 executor->maxGpuSlabSize() / rowid_size);
1085 const Executor* executor) {
1088 std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
1090 std::tie(inner_col, outer_col) =
1091 get_cols(join_condition, executor->getTemporaryTables());
1095 if (!inner_col || !outer_col) {
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
static std::shared_ptr< HashJoin > getSyntheticInstance(std::string_view table1, std::string_view column1, const Catalog_Namespace::Catalog &catalog1, std::string_view table2, std::string_view column2, const Catalog_Namespace::Catalog &catalog2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from named tables and columns (such as for testing).
int64_t getIntMin() const
std::vector< int > ChunkKey
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
size_t g_num_tuple_threshold_switch_to_baseline
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
class for a per-database catalog. also includes metadata for the current database and the current use...
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
std::string toStringFlat(const HashJoin *hash_table, const ExecutorDeviceType device_type, const int device_id)
static bool canAccessHashTable(bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
std::vector< const void * > sd_inner_proxy_per_key
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
std::list< std::shared_ptr< Analyzer::Expr > > coalesce_singleton_equi_join(const std::shared_ptr< Analyzer::BinOper > &join_qual)
static void checkHashJoinReplicationConstraint(const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
static JoinColumn makeJoinColumn(Executor *executor, const Analyzer::ColumnVar &hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, std::vector< std::shared_ptr< void >> &malloc_owner, ColumnCacheMap &column_cache)
Creates a JoinColumn struct containing an array of JoinChunk structs.
void setBucketInfo(const std::vector< double > &bucket_sizes_for_dimension, const std::vector< InnerOuter > inner_outer_pairs)
std::set< const Analyzer::ColumnVar * > aggregateResult(const std::set< const Analyzer::ColumnVar * > &aggregate, const std::set< const Analyzer::ColumnVar * > &next_result) const override
HOST DEVICE int get_scale() const
const Expr * get_right_operand() const
bool is_constructed_point(const Analyzer::Expr *expr)
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
static std::pair< const StringDictionaryProxy *, StringDictionaryProxy * > getStrDictProxies(const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
DEVICE void sort(ARGS &&...args)
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
llvm::Value * codegenPseudoStringOper(const Analyzer::ColumnVar *, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, const CompilationOptions &)
virtual int8_t * alloc(const size_t num_bytes)=0
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
T visit(const Analyzer::Expr *expr) const
HOST DEVICE SQLTypes get_type() const
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
bool g_enable_data_recycler
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
const std::vector< JoinColumnTypeInfo > join_column_types
size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline
virtual void copyToDevice(void *device_dst, const void *host_src, const size_t num_bytes) const =0
size_t col_chunks_buff_sz
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
const std::vector< std::shared_ptr< Analyzer::Expr > > & getTuple() const
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
std::vector< void * > sd_outer_proxy_per_key
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
static size_t getMaximumNumHashEntriesCanHold(MemoryLevel memory_level, const Executor *executor, size_t rowid_size) noexcept
static std::shared_ptr< BoundingBoxIntersectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
bool is_bbox_intersect_oper() const
static constexpr size_t MAX_NUM_HASH_ENTRIES
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
int getDatabaseId() const
static std::vector< const StringDictionaryProxy::IdMap * > translateCompositeStrDictProxies(const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
OUTPUT transform(INPUT const &input, FUNC const &func)
const int8_t * col_chunks_buff
std::string toString(const Executor::ExtModuleKinds &kind)
const SQLTypeInfo & get_type_info() const
int get_precision() const
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
static const StringDictionaryProxy::IdMap * translateInnerToOuterStrDictProxies(const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
static RegisteredQueryHint defaults()
Expression class for string functions The "arg" constructor parameter must be an expression that reso...
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
bool table_is_replicated(const TableDescriptor *td)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
std::set< const Analyzer::ColumnVar * > visitColumnVarTuple(const Analyzer::ExpressionTuple *expr_tuple) const override
bool g_enable_bbox_intersect_hashjoin
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
std::string get_type_name() const
static std::shared_ptr< PerfectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, const Catalog_Namespace::Catalog &catalog)
bool force_baseline_hash_join
int64_t getIntMax() const
std::set< const Analyzer::ColumnVar * > visitColumnVar(const Analyzer::ColumnVar *column) const override
std::pair< std::vector< StringOps_Namespace::StringOpInfo >, std::vector< StringOps_Namespace::StringOpInfo >> InnerOuterStringOpInfos
std::set< int32_t > payload
#define DEBUG_TIMER(name)
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_bbox_intersect=false)
static const T * getHashJoinColumn(const Analyzer::Expr *expr)
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
const Expr * get_left_operand() const
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< int64_t > key
std::vector< JoinBucketInfo > join_buckets
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
DEVICE void swap(ARGS &&...args)
bool g_use_hashtable_cache
const std::vector< JoinColumn > join_columns
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})