31 #include <llvm/Bitcode/BitcodeReader.h>
32 #include <llvm/IR/Function.h>
33 #include <llvm/IR/IRBuilder.h>
34 #include <llvm/IR/Verifier.h>
35 #include <llvm/Support/SourceMgr.h>
36 #include <llvm/Support/raw_os_ostream.h>
49 return function->add<
Load>(
51 ptr->
label() +
"_loaded");
75 LOG(
FATAL) <<
"Invalid byte width: " << compact_sz;
86 const size_t chosen_bytes,
88 Function* ir_reduce_one_entry) {
90 const auto dest_name = agg_kind +
"_dest";
91 if (sql_type.is_fp()) {
92 if (chosen_bytes ==
sizeof(
float)) {
93 const auto agg = ir_reduce_one_entry->add<
Cast>(
96 ir_reduce_one_entry->add<
Call>(
97 "agg_" + agg_kind +
"_float", std::vector<const Value*>{agg, val},
"");
99 CHECK_EQ(chosen_bytes,
sizeof(
double));
100 const auto agg = ir_reduce_one_entry->add<
Cast>(
103 ir_reduce_one_entry->add<
Call>(
104 "agg_" + agg_kind +
"_double", std::vector<const Value*>{agg, val},
"");
107 if (chosen_bytes ==
sizeof(int32_t)) {
108 const auto agg = ir_reduce_one_entry->add<
Cast>(
110 const auto val =
emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
111 ir_reduce_one_entry->add<
Call>(
112 "agg_" + agg_kind +
"_int32", std::vector<const Value*>{agg, val},
"");
114 CHECK_EQ(chosen_bytes,
sizeof(int64_t));
115 const auto agg = ir_reduce_one_entry->add<
Cast>(
117 const auto val =
emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
118 ir_reduce_one_entry->add<
Call>(
119 "agg_" + agg_kind, std::vector<const Value*>{agg, val},
"");
128 const int64_t init_val,
129 const size_t chosen_bytes,
131 Function* ir_reduce_one_entry) {
133 const auto dest_name = agg_kind +
"_dest";
136 if (sql_type.is_fp()) {
137 if (chosen_bytes ==
sizeof(
float)) {
138 const auto agg = ir_reduce_one_entry->add<
Cast>(
141 const auto init_val_lv = ir_reduce_one_entry->addConstant<
ConstantFP>(
142 *
reinterpret_cast<const float*
>(may_alias_ptr(&init_val)),
Type::Float);
143 std::vector<const Value*>
args{agg, val, init_val_lv};
144 ir_reduce_one_entry->add<
Call>(
"agg_" + agg_kind +
"_float_skip_val",
args,
"");
146 CHECK_EQ(chosen_bytes,
sizeof(
double));
147 const auto agg = ir_reduce_one_entry->add<
Cast>(
150 const auto init_val_lv = ir_reduce_one_entry->addConstant<
ConstantFP>(
151 *
reinterpret_cast<const double*
>(may_alias_ptr(&init_val)),
Type::Double);
152 ir_reduce_one_entry->add<
Call>(
"agg_" + agg_kind +
"_double_skip_val",
153 std::vector<const Value*>{agg, val, init_val_lv},
157 if (chosen_bytes ==
sizeof(int32_t)) {
158 const auto agg = ir_reduce_one_entry->add<
Cast>(
160 const auto val =
emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
161 const auto init_val_lv =
163 ir_reduce_one_entry->add<
Call>(
"agg_" + agg_kind +
"_int32_skip_val",
164 std::vector<const Value*>{agg, val, init_val_lv},
167 CHECK_EQ(chosen_bytes,
sizeof(int64_t));
168 const auto agg = ir_reduce_one_entry->add<
Cast>(
170 const auto val =
emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
171 const auto init_val_lv =
173 ir_reduce_one_entry->add<
Call>(
"agg_" + agg_kind +
"_skip_val",
174 std::vector<const Value*>{agg, val, init_val_lv},
180 agg_kind, val_ptr, other_ptr, chosen_bytes, agg_info, ir_reduce_one_entry);
187 const size_t chosen_bytes,
188 Function* ir_reduce_one_entry) {
189 const auto dest_name =
"count_dest";
190 if (chosen_bytes ==
sizeof(int32_t)) {
191 const auto agg = ir_reduce_one_entry->add<
Cast>(
193 const auto val =
emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
194 ir_reduce_one_entry->add<
Call>(
195 "agg_sum_int32", std::vector<const Value*>{agg, val},
"");
197 CHECK_EQ(chosen_bytes,
sizeof(int64_t));
198 const auto agg = ir_reduce_one_entry->add<
Cast>(
200 const auto val =
emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
201 ir_reduce_one_entry->add<
Call>(
"agg_sum", std::vector<const Value*>{agg, val},
"");
210 const int64_t init_val,
211 const size_t chosen_bytes,
212 Function* ir_reduce_one_entry) {
213 const auto func_name =
"write_projection_int" +
std::to_string(chosen_bytes * 8);
214 if (chosen_bytes ==
sizeof(int32_t)) {
215 const auto proj_val =
emit_load_i32(other_pi8, ir_reduce_one_entry);
216 ir_reduce_one_entry->add<
Call>(
218 std::vector<const Value*>{
224 CHECK_EQ(chosen_bytes,
sizeof(int64_t));
225 const auto proj_val =
emit_load_i64(other_pi8, ir_reduce_one_entry);
226 ir_reduce_one_entry->add<
Call>(
228 std::vector<const Value*>{
241 const int64_t init_val,
242 const size_t chosen_bytes,
243 Function* ir_reduce_one_entry) {
244 if (chosen_bytes ==
sizeof(int32_t)) {
245 const auto func_name =
"checked_single_agg_id_int32";
246 const auto proj_val =
emit_load_i32(other_pi8, ir_reduce_one_entry);
247 const auto slot_pi32 = ir_reduce_one_entry->add<
Cast>(
249 return ir_reduce_one_entry->add<
Call>(
252 std::vector<const Value*>{
258 const auto func_name =
"checked_single_agg_id";
259 CHECK_EQ(chosen_bytes,
sizeof(int64_t));
260 const auto proj_val =
emit_load_i64(other_pi8, ir_reduce_one_entry);
261 const auto slot_pi64 = ir_reduce_one_entry->add<
Cast>(
264 return ir_reduce_one_entry->add<
Call>(
267 std::vector<const Value*>{
276 const std::string
name,
277 const std::vector<Function::NamedArg>& arg_types,
279 const bool always_inline) {
280 return std::make_unique<Function>(
name, arg_types, ret_type, always_inline);
292 const QueryDescriptionType hash_type) {
293 std::string this_ptr_name;
294 std::string that_ptr_name;
297 this_ptr_name =
"this_targets_ptr";
298 that_ptr_name =
"that_targets_ptr";
303 this_ptr_name =
"this_row_ptr";
304 that_ptr_name =
"that_row_ptr";
308 LOG(
FATAL) <<
"Unexpected query description type";
326 {
"that_entry_idx", Type::Int32},
327 {
"that_entry_count", Type::Int32},
341 {
"start_index", Type::Int32},
342 {
"end_index", Type::Int32},
343 {
"that_entry_count", Type::Int32},
354 std::vector<llvm::Type*> parameter_types;
355 const auto& arg_types =
function->arg_types();
356 for (
const auto& named_arg : arg_types) {
358 parameter_types.push_back(
llvm_type(named_arg.type, ctx));
360 const auto func_type = llvm::FunctionType::get(
361 llvm_type(function->ret_type(), ctx), parameter_types,
false);
362 const auto linkage =
function->always_inline() ? llvm::Function::PrivateLinkage
363 : llvm::Function::ExternalLinkage;
365 llvm::Function::Create(func_type, linkage, function->name(), cgen_state->
module_);
366 const auto arg_it = func->arg_begin();
367 for (
size_t i = 0; i < arg_types.size(); ++i) {
368 const auto arg = &*(arg_it + i);
369 arg->setName(arg_types[i].
name);
371 if (function->always_inline()) {
385 return reduction_code;
397 const int8_t* that_ptr1,
398 const int8_t* that_ptr2,
399 const int64_t init_val) {
400 const auto rhs_proj_col = *
reinterpret_cast<const int64_t*
>(that_ptr1);
401 if (rhs_proj_col != init_val) {
402 *
reinterpret_cast<int64_t*
>(this_ptr1) = rhs_proj_col;
404 CHECK(this_ptr2 && that_ptr2);
405 *
reinterpret_cast<int64_t*
>(this_ptr2) = *reinterpret_cast<const int64_t*>(that_ptr2);
411 const void* serialized_varlen_buffer_handle,
414 const int8_t* that_ptr1,
415 const int8_t* that_ptr2,
416 const int64_t init_val,
417 const int64_t length_to_elems) {
418 if (!serialized_varlen_buffer_handle) {
422 const auto& serialized_varlen_buffer =
423 *
reinterpret_cast<const std::vector<std::string>*
>(serialized_varlen_buffer_handle);
424 if (!serialized_varlen_buffer.empty()) {
425 const auto rhs_proj_col = *
reinterpret_cast<const int64_t*
>(that_ptr1);
426 CHECK_LT(static_cast<size_t>(rhs_proj_col), serialized_varlen_buffer.size());
427 const auto& varlen_bytes_str = serialized_varlen_buffer[rhs_proj_col];
428 const auto str_ptr =
reinterpret_cast<const int8_t*
>(varlen_bytes_str.c_str());
429 *
reinterpret_cast<int64_t*
>(this_ptr1) = reinterpret_cast<const int64_t>(str_ptr);
430 *
reinterpret_cast<int64_t*
>(this_ptr2) =
431 static_cast<int64_t>(varlen_bytes_str.size() / length_to_elems);
441 const int64_t new_set_handle,
442 const int64_t old_set_handle,
443 const void* that_qmd_handle,
444 const void* this_qmd_handle,
445 const int64_t target_logical_idx) {
448 const auto& new_count_distinct_desc =
450 const auto& old_count_distinct_desc =
451 this_qmd->getCountDistinctDescriptor(target_logical_idx);
453 CHECK(old_count_distinct_desc.impl_type_ == new_count_distinct_desc.impl_type_);
455 new_set_handle, old_set_handle, new_count_distinct_desc, old_count_distinct_desc);
459 const int64_t old_set_handle,
460 const void* that_qmd_handle,
461 const void* this_qmd_handle,
462 const int64_t target_logical_idx) {
464 if (incoming->centroids().capacity()) {
466 if (accumulator->centroids().capacity() == 0u) {
467 *accumulator = std::move(*incoming);
469 accumulator->mergeTDigest(*incoming);
475 const int64_t old_set_handle,
476 const void* that_qmd_handle,
477 const void* this_qmd_handle,
478 const int64_t target_logical_idx) {
479 auto* accumulator =
reinterpret_cast<AggMode*
>(old_set_handle);
480 auto* incoming =
reinterpret_cast<AggMode*
>(new_set_handle);
481 accumulator->
reduce(std::move(*incoming));
485 int8_t* groups_buffer,
487 const uint32_t key_count,
488 const void* this_qmd_handle,
489 const int8_t* that_buff,
490 const uint32_t that_entry_idx,
491 const uint32_t that_entry_count,
492 const uint32_t row_size_bytes,
498 this_qmd.getEntryCount(),
499 reinterpret_cast<const int64_t*
>(key),
501 this_qmd.getEffectiveKeyWidth(),
503 reinterpret_cast<const int64_t*
>(that_buff),
506 row_size_bytes >> 3);
507 *buff_out = gvi.first;
528 const std::vector<TargetInfo>& targets,
529 const std::vector<int64_t>& target_init_vals,
530 const size_t executor_id)
531 : executor_id_(executor_id)
532 , query_mem_desc_(query_mem_desc)
534 , target_init_vals_(target_init_vals) {}
587 LOG(
FATAL) <<
"Unexpected query description type";
594 return reduction_code;
598 std::lock_guard<std::mutex> compilation_lock(executor->compilation_mutex_);
599 const auto compilation_context =
601 if (compilation_context) {
602 reduction_code.func_ptr =
604 return reduction_code;
606 auto cgen_state_ = std::unique_ptr<CgenState>(
new CgenState({},
false, executor.get()));
607 auto cgen_state = reduction_code.cgen_state = cgen_state_.get();
608 cgen_state->set_module_shallow_copy(executor->get_rt_module());
609 reduction_code.module = cgen_state->module_;
613 auto ir_reduce_one_entry =
615 auto ir_reduce_one_entry_idx =
617 auto ir_reduce_loop =
619 std::unordered_map<const Function*, llvm::Function*>
f;
620 f.emplace(reduction_code.ir_is_empty.get(), ir_is_empty);
621 f.emplace(reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry);
622 f.emplace(reduction_code.ir_reduce_one_entry_idx.get(), ir_reduce_one_entry_idx);
623 f.emplace(reduction_code.ir_reduce_loop.get(), ir_reduce_loop);
626 reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry, reduction_code,
f);
628 ir_reduce_one_entry_idx,
632 reduction_code.ir_reduce_loop.get(), ir_reduce_loop, reduction_code,
f);
633 reduction_code.llvm_reduce_loop = ir_reduce_loop;
635 reduction_code.cgen_state =
nullptr;
637 reduction_code, ir_is_empty, ir_reduce_one_entry, ir_reduce_one_entry_idx, key);
638 return reduction_code;
642 auto ir_is_empty = reduction_code.
ir_is_empty.get();
646 Value* empty_key_val{
nullptr};
647 const auto keys_ptr = ir_is_empty->arg(0);
659 "is_empty_slot_ptr");
660 const auto compact_sz =
663 empty_key_val = ir_is_empty->addConstant<
ConstantInt>(
685 ir_is_empty->add<
Ret>(ret);
691 const auto this_row_ptr = ir_reduce_one_entry->arg(0);
692 const auto that_row_ptr = ir_reduce_one_entry->arg(1);
693 const auto that_is_empty =
695 std::vector<const Value*>{that_row_ptr},
702 ir_reduce_one_entry->add<
MemCpy>(
709 const auto key_bytes_lv =
711 const auto this_targets_start_ptr = ir_reduce_one_entry->add<
GetElementPtr>(
712 this_row_ptr, key_bytes_lv,
"this_targets_start");
713 const auto that_targets_start_ptr = ir_reduce_one_entry->add<
GetElementPtr>(
714 that_row_ptr, key_bytes_lv,
"that_targets_start");
717 ir_reduce_one_entry, this_targets_start_ptr, that_targets_start_ptr);
721 Function* ir_reduce_one_entry,
722 Value* this_targets_start_ptr,
723 Value* that_targets_start_ptr)
const {
725 Value* this_targets_ptr = this_targets_start_ptr;
726 Value* that_targets_ptr = that_targets_start_ptr;
727 size_t init_agg_val_idx = 0;
728 for (
size_t target_logical_idx = 0; target_logical_idx <
targets_.size();
729 ++target_logical_idx) {
730 const auto& target_info =
targets_[target_logical_idx];
731 const auto& slots_for_col = col_slot_context.getSlotsForCol(target_logical_idx);
732 Value* this_ptr2{
nullptr};
733 Value* that_ptr2{
nullptr};
735 bool two_slot_target{
false};
736 if (target_info.is_agg &&
737 (target_info.agg_kind ==
kAVG ||
738 (target_info.agg_kind ==
kSAMPLE && target_info.sql_type.is_varlen()))) {
742 two_slot_target =
true;
745 for (
size_t target_slot_idx = slots_for_col.front();
746 target_slot_idx < slots_for_col.back() + 1;
747 target_slot_idx += 2) {
749 const auto slot_off =
752 const auto desc =
"target_" +
std::to_string(target_logical_idx) +
"_second_slot";
754 this_targets_ptr, slot_off,
"this_" + desc);
756 that_targets_ptr, slot_off,
"that_" + desc);
766 slots_for_col.front(),
767 ir_reduce_one_entry);
768 auto increment_agg_val_idx_maybe =
769 [&init_agg_val_idx, &target_logical_idx,
this](
const int slot_count) {
772 init_agg_val_idx += slot_count;
775 if (target_logical_idx + 1 ==
targets_.size() &&
776 target_slot_idx + 1 >= slots_for_col.back()) {
779 const auto next_desc =
780 "target_" +
std::to_string(target_logical_idx + 1) +
"_first_slot";
782 increment_agg_val_idx_maybe(2);
783 const auto two_slot_off = ir_reduce_one_entry->addConstant<
ConstantInt>(
787 this_targets_ptr, two_slot_off,
"this_" + next_desc);
789 that_targets_ptr, two_slot_off,
"that_" + next_desc);
791 increment_agg_val_idx_maybe(1);
793 this_targets_ptr, slot_off,
"this_" + next_desc);
795 that_targets_ptr, slot_off,
"that_" + next_desc);
799 ir_reduce_one_entry->add<
Ret>(
806 const auto this_targets_ptr_arg = ir_reduce_one_entry->arg(0);
807 const auto that_targets_ptr_arg = ir_reduce_one_entry->arg(1);
808 Value* this_ptr1 = this_targets_ptr_arg;
809 Value* that_ptr1 = that_targets_ptr_arg;
811 size_t init_agg_val_idx = 0;
812 for (
size_t target_logical_idx = 0; target_logical_idx <
targets_.size();
813 ++target_logical_idx) {
814 const auto& target_info =
targets_[target_logical_idx];
815 Value* this_ptr2{
nullptr};
816 Value* that_ptr2{
nullptr};
817 if (target_info.is_agg &&
818 (target_info.agg_kind ==
kAVG ||
819 (target_info.agg_kind ==
kSAMPLE && target_info.sql_type.is_varlen()))) {
820 const auto desc =
"target_" +
std::to_string(target_logical_idx) +
"_second_slot";
821 const auto second_slot_rel_off =
824 this_ptr1, second_slot_rel_off,
"this_" + desc);
826 that_ptr1, second_slot_rel_off,
"that_" + desc);
837 ir_reduce_one_entry);
838 if (target_logical_idx + 1 ==
targets_.size()) {
842 init_agg_val_idx =
advance_slot(init_agg_val_idx, target_info,
false);
845 init_agg_val_idx =
advance_slot(init_agg_val_idx, target_info,
false);
849 const auto next_desc =
850 "target_" +
std::to_string(target_logical_idx + 1) +
"_first_slot";
851 auto next_slot_rel_off = ir_reduce_one_entry->addConstant<
ConstantInt>(
854 this_targets_ptr_arg, next_slot_rel_off, next_desc);
856 that_targets_ptr_arg, next_slot_rel_off, next_desc);
858 ir_reduce_one_entry->add<
Ret>(
869 const auto this_buff = ir_reduce_one_entry_idx->arg(0);
870 const auto that_buff = ir_reduce_one_entry_idx->arg(1);
871 const auto entry_idx = ir_reduce_one_entry_idx->arg(2);
872 const auto this_qmd_handle = ir_reduce_one_entry_idx->arg(4);
873 const auto that_qmd_handle = ir_reduce_one_entry_idx->arg(5);
874 const auto serialized_varlen_buffer_arg = ir_reduce_one_entry_idx->arg(6);
875 const auto row_bytes = ir_reduce_one_entry_idx->addConstant<
ConstantInt>(
877 const auto entry_idx_64 = ir_reduce_one_entry_idx->add<
Cast>(
879 const auto row_off_in_bytes = ir_reduce_one_entry_idx->add<
BinaryOperator>(
881 const auto this_row_ptr = ir_reduce_one_entry_idx->add<
GetElementPtr>(
882 this_buff, row_off_in_bytes,
"this_row_ptr");
883 const auto that_row_ptr = ir_reduce_one_entry_idx->add<
GetElementPtr>(
884 that_buff, row_off_in_bytes,
"that_row_ptr");
885 const auto reduce_rc = ir_reduce_one_entry_idx->add<
Call>(
887 std::vector<const Value*>{this_row_ptr,
891 serialized_varlen_buffer_arg},
893 ir_reduce_one_entry_idx->add<
Ret>(reduce_rc);
903 const auto this_buff = ir_reduce_one_entry_idx->arg(0);
904 const auto that_buff = ir_reduce_one_entry_idx->arg(1);
905 const auto that_entry_idx = ir_reduce_one_entry_idx->arg(2);
906 const auto that_entry_count = ir_reduce_one_entry_idx->arg(3);
907 const auto this_qmd_handle = ir_reduce_one_entry_idx->arg(4);
908 const auto that_qmd_handle = ir_reduce_one_entry_idx->arg(5);
909 const auto serialized_varlen_buffer_arg = ir_reduce_one_entry_idx->arg(6);
910 const auto row_bytes = ir_reduce_one_entry_idx->addConstant<
ConstantInt>(
912 const auto that_entry_idx_64 = ir_reduce_one_entry_idx->add<
Cast>(
914 const auto that_row_off_in_bytes =
918 "that_row_off_in_bytes");
919 const auto that_row_ptr = ir_reduce_one_entry_idx->add<
GetElementPtr>(
920 that_buff, that_row_off_in_bytes,
"that_row_ptr");
921 const auto that_is_empty =
923 std::vector<const Value*>{that_row_ptr},
930 const auto one_element =
932 const auto this_targets_ptr_i64_ptr = ir_reduce_one_entry_idx->add<
Alloca>(
934 const auto this_is_empty_ptr =
935 ir_reduce_one_entry_idx->add<
Alloca>(
Type::Int8, one_element,
"this_is_empty_out");
937 "get_group_value_reduction_rt",
939 std::vector<const Value*>{
948 this_targets_ptr_i64_ptr,
951 const auto this_targets_ptr_i64 = ir_reduce_one_entry_idx->add<
Load>(
952 this_targets_ptr_i64_ptr,
"this_targets_ptr_i64");
954 ir_reduce_one_entry_idx->add<
Load>(this_is_empty_ptr,
"this_is_empty");
955 this_is_empty = ir_reduce_one_entry_idx->add<
Cast>(
962 const auto this_targets_ptr = ir_reduce_one_entry_idx->add<
Cast>(
964 const auto key_byte_count = key_qw_count *
sizeof(int64_t);
965 const auto key_byte_count_lv =
967 const auto that_targets_ptr = ir_reduce_one_entry_idx->add<
GetElementPtr>(
968 that_row_ptr, key_byte_count_lv,
"that_targets_ptr");
969 const auto reduce_rc = ir_reduce_one_entry_idx->add<
Call>(
971 std::vector<const Value*>{this_targets_ptr,
975 serialized_varlen_buffer_arg},
977 ir_reduce_one_entry_idx->add<
Ret>(reduce_rc);
983 Function* ir_reduce_loop,
984 Function* ir_reduce_one_entry_idx,
988 Value* that_entry_count,
989 Value* this_qmd_handle,
990 Value* that_qmd_handle,
991 Value* serialized_varlen_buffer) {
994 const auto sample_seed =
997 const auto checker_rt_name =
1001 checker_rt_name,
Type::Int8, std::vector<const Value*>{sample_seed},
"");
1002 const auto interrupt_triggered_bool =
1008 interrupt_triggered_bool,
1012 const auto reduce_rc =
1013 for_loop->
add<
Call>(ir_reduce_one_entry_idx,
1014 std::vector<const Value*>{this_buff,
1020 serialized_varlen_buffer},
1023 auto reduce_rc_bool =
1035 const auto this_buff_arg = ir_reduce_loop->arg(0);
1036 const auto that_buff_arg = ir_reduce_loop->arg(1);
1037 const auto start_index_arg = ir_reduce_loop->arg(2);
1038 const auto end_index_arg = ir_reduce_loop->arg(3);
1039 const auto that_entry_count_arg = ir_reduce_loop->arg(4);
1040 const auto this_qmd_handle_arg = ir_reduce_loop->arg(5);
1041 const auto that_qmd_handle_arg = ir_reduce_loop->arg(6);
1042 const auto serialized_varlen_buffer_arg = ir_reduce_loop->arg(7);
1044 static_cast<For*
>(ir_reduce_loop->add<
For>(start_index_arg, end_index_arg,
""));
1051 that_entry_count_arg,
1052 this_qmd_handle_arg,
1053 that_qmd_handle_arg,
1054 serialized_varlen_buffer_arg);
1063 const size_t target_logical_idx,
1064 const size_t target_slot_idx,
1065 const size_t init_agg_val_idx,
1066 const size_t first_slot_idx_for_target,
1067 Function* ir_reduce_one_entry)
const {
1078 if (target_info.
is_agg &&
1089 ir_reduce_one_entry);
1092 this_ptr1, that_ptr1, init_val, chosen_bytes, ir_reduce_one_entry);
1094 auto checked_rc_bool = ir_reduce_one_entry->add<
ICmp>(
1100 ir_reduce_one_entry->add<
ReturnEarly>(checked_rc_bool, checked_rc,
"");
1104 this_ptr1, that_ptr1, init_val, chosen_bytes, ir_reduce_one_entry);
1106 CHECK(this_ptr2 && that_ptr2);
1107 size_t length_to_elems{0};
1110 length_to_elems = target_slot_idx == first_slot_idx_for_target ? 1 : 4;
1115 const auto serialized_varlen_buffer_arg = ir_reduce_one_entry->arg(4);
1117 "serialized_varlen_buffer_sample",
1119 std::vector<const Value*>{
1120 serialized_varlen_buffer_arg,
1126 ir_reduce_one_entry->addConstant<
ConstantInt>(length_to_elems,
1138 const size_t target_logical_idx,
1139 const size_t target_slot_idx,
1140 const int64_t init_val,
1141 const int8_t chosen_bytes,
1142 Function* ir_reduce_one_entry)
const {
1143 auto agg_kind = target_info.
agg_kind;
1149 CHECK_EQ(static_cast<size_t>(chosen_bytes),
sizeof(int64_t));
1151 this_ptr1, that_ptr1, target_logical_idx, ir_reduce_one_entry);
1158 CHECK_EQ(
sizeof(int64_t), static_cast<size_t>(chosen_bytes));
1160 this_ptr1, that_ptr1, target_logical_idx, ir_reduce_one_entry);
1163 reduceOneModeSlot(this_ptr1, that_ptr1, target_logical_idx, ir_reduce_one_entry);
1170 ir_reduce_one_entry);
1185 ir_reduce_one_entry);
1188 UNREACHABLE() <<
"Invalid aggregate type: " << agg_kind;
1195 const size_t target_logical_idx,
1196 Function* ir_reduce_one_entry)
const {
1198 const auto old_set_handle =
emit_load_i64(this_ptr1, ir_reduce_one_entry);
1199 const auto new_set_handle =
emit_load_i64(that_ptr1, ir_reduce_one_entry);
1200 const auto this_qmd_arg = ir_reduce_one_entry->arg(2);
1201 const auto that_qmd_arg = ir_reduce_one_entry->arg(3);
1203 "count_distinct_set_union_jit_rt",
1205 std::vector<const Value*>{
1217 const size_t target_logical_idx,
1218 Function* ir_reduce_one_entry)
const {
1220 const auto old_set_handle =
emit_load_i64(this_ptr1, ir_reduce_one_entry);
1221 const auto new_set_handle =
emit_load_i64(that_ptr1, ir_reduce_one_entry);
1222 const auto this_qmd_arg = ir_reduce_one_entry->arg(2);
1223 const auto that_qmd_arg = ir_reduce_one_entry->arg(3);
1225 "approx_quantile_jit_rt",
1227 std::vector<const Value*>{
1238 const size_t target_logical_idx,
1239 Function* ir_reduce_one_entry)
const {
1241 const auto old_set_handle =
emit_load_i64(this_ptr1, ir_reduce_one_entry);
1242 const auto new_set_handle =
emit_load_i64(that_ptr1, ir_reduce_one_entry);
1243 const auto this_qmd_arg = ir_reduce_one_entry->arg(2);
1244 const auto that_qmd_arg = ir_reduce_one_entry->arg(3);
1248 std::vector<const Value*>{
1259 const llvm::Function* ir_is_empty,
1260 const llvm::Function* ir_reduce_one_entry,
1261 const llvm::Function* ir_reduce_one_entry_idx,
1265 VLOG(3) <<
"Reduction Loop:\n"
1269 VLOG(3) <<
"Reduction One Entry Idx Func:\n"
1272 LOG(
IR) <<
"Reduction Loop:\n"
1276 LOG(
IR) <<
"Reduction One Entry Idx Func:\n"
1283 auto cpu_compilation_context = std::make_shared<CpuCompilationContext>(std::move(ee));
1284 cpu_compilation_context->setFunctionPointer(reduction_code.
llvm_reduce_loop);
1290 std::move(cpu_compilation_context));
1309 std::vector<std::string> target_init_vals_strings;
1312 std::back_inserter(target_init_vals_strings),
1314 const auto target_init_vals_key =
1316 std::vector<std::string> targets_strings;
1320 std::back_inserter(targets_strings),
1336 auto cgen_state_ = std::unique_ptr<CgenState>(
new CgenState({},
false, executor.get()));
1337 auto cgen_state = reduction_code.
cgen_state = cgen_state_.get();
1340 reduction_code.
module = cgen_state->module_;
1344 auto ir_reduce_one_entry =
1346 auto ir_reduce_one_entry_idx =
1348 auto ir_reduce_loop =
1350 std::unordered_map<const Function*, llvm::Function*>
f;
1351 f.emplace(reduction_code.
ir_is_empty.get(), ir_is_empty);
1357 reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry, reduction_code,
f);
1359 ir_reduce_one_entry_idx,
1363 reduction_code.ir_reduce_loop.get(), ir_reduce_loop, reduction_code,
f);
1364 reduction_code.llvm_reduce_loop = ir_reduce_loop;
1365 reduction_code.cgen_state =
nullptr;
1366 return reduction_code;
GroupValueInfo get_group_value_reduction(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const size_t key_width, const QueryMemoryDescriptor &query_mem_desc, const int64_t *that_buff_i64, const size_t that_entry_idx, const size_t that_entry_count, const uint32_t row_size_quad)
void reduceOneSlot(Value *this_ptr1, Value *this_ptr2, Value *that_ptr1, Value *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const size_t first_slot_idx_for_target, Function *ir_reduce_one_entry) const
void reduce(AggMode &&rhs)
bool is_aggregate_query(const QueryDescriptionType hash_type)
size_t getEntryCount() const
void count_distinct_set_union(const int64_t new_set_handle, const int64_t old_set_handle, const CountDistinctDescriptor &new_count_distinct_desc, const CountDistinctDescriptor &old_count_distinct_desc)
__device__ bool dynamic_watchdog()
const std::string & label() const
RUNTIME_EXPORT uint8_t check_watchdog_rt(const size_t sample_seed)
void reduceOneEntryNoCollisions(const ReductionCode &reduction_code) const
int64_t getTargetGroupbyIndex(const size_t target_idx) const
void varlen_buffer_sample(int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const int64_t init_val)
std::unique_ptr< Function > ir_reduce_loop
Value * emit_read_int_from_buff(Value *ptr, const int8_t compact_sz, Function *function)
void reduceOneEntryBaselineIdx(const ReductionCode &reduction_code) const
void mark_function_always_inline(llvm::Function *func)
Calculate approximate median and general quantiles, based on "Computing Extremely Accurate Quantiles ...
void reduceLoop(const ReductionCode &reduction_code) const
const size_t INTERP_THRESHOLD
llvm::Function * llvm_reduce_loop
void reduceOneEntryNoCollisionsIdx(const ReductionCode &reduction_code) const
bool hasKeylessHash() const
std::vector< std::string > CodeCacheKey
size_t get_slot_off_quad(const QueryMemoryDescriptor &query_mem_desc)
std::string cacheKey() const
size_t getEffectiveKeyWidth() const
std::unique_ptr< Function > ir_reduce_one_entry
bool g_enable_dynamic_watchdog
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
const std::vector< int64_t > target_init_vals_
void reduceOneAggregateSlot(Value *this_ptr1, Value *this_ptr2, Value *that_ptr1, Value *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const int64_t init_val, const int8_t chosen_bytes, Function *ir_reduce_one_entry) const
bool takes_float_argument(const TargetInfo &target_info)
bool g_enable_non_kernel_time_query_interrupt
Value * add(Args &&...args)
const Value * emit_checked_write_projection(Value *slot_pi8, Value *other_pi8, const int64_t init_val, const size_t chosen_bytes, Function *ir_reduce_one_entry)
std::unique_ptr< Function > setup_reduce_one_entry_idx(ReductionCode *reduction_code)
int32_t(*)(int8_t *this_buff, const int8_t *that_buff, const int32_t start_entry_index, const int32_t end_entry_index, const int32_t that_entry_count, const void *this_qmd, const void *that_qmd, const void *serialized_varlen_buffer) FuncPtr
const QueryMemoryDescriptor query_mem_desc_
std::unique_ptr< Function > ir_is_empty
void translate_function(const Function *function, llvm::Function *llvm_function, const ReductionCode &reduction_code, const std::unordered_map< const Function *, llvm::Function * > &f)
void emit_aggregate_one_value(const std::string &agg_kind, Value *val_ptr, Value *other_ptr, const size_t chosen_bytes, const TargetInfo &agg_info, Function *ir_reduce_one_entry)
RUNTIME_EXPORT void serialized_varlen_buffer_sample(const void *serialized_varlen_buffer_handle, int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const int64_t init_val, const int64_t length_to_elems)
Value * emit_load_i32(Value *ptr, Function *function)
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool blocksShareMemory() const
__device__ bool check_interrupt()
int8_t get_width_for_slot(const size_t target_slot_idx, const bool float_argument_input, const QueryMemoryDescriptor &query_mem_desc)
llvm::LLVMContext & context_
size_t get_byteoff_of_slot(const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc)
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
void reduceOneCountDistinctSlot(Value *this_ptr1, Value *that_ptr1, const size_t target_logical_idx, Function *ir_reduce_one_entry) const
uint8_t check_interrupt_rt(const size_t sample_seed)
size_t getGroupbyColCount() const
const int32_t INTERRUPT_ERROR
size_t targetGroupbyIndicesSize() const
void generate_loop_body(For *for_loop, Function *ir_reduce_loop, Function *ir_reduce_one_entry_idx, Value *this_buff, Value *that_buff, Value *start_index, Value *that_entry_count, Value *this_qmd_handle, Value *that_qmd_handle, Value *serialized_varlen_buffer)
void emit_write_projection(Value *slot_pi8, Value *other_pi8, const int64_t init_val, const size_t chosen_bytes, Function *ir_reduce_one_entry)
void emit_aggregate_one_nullable_value(SQLAgg const sql_agg, Value *val_ptr, Value *other_ptr, const int64_t init_val, const size_t chosen_bytes, const TargetInfo &agg_info, Function *ir_reduce_one_entry)
ReductionCode codegen() const override
bool is_distinct_target(const TargetInfo &target_info)
std::string target_info_key(const TargetInfo &target_info)
OUTPUT transform(INPUT const &input, FUNC const &func)
std::unique_ptr< Function > ir_reduce_one_entry_idx
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ReductionCode setup_functions_ir(const QueryDescriptionType hash_type)
std::string toString(const Executor::ExtModuleKinds &kind)
std::unique_ptr< Function > setup_is_empty_entry(ReductionCode *reduction_code)
size_t getCountDistinctDescriptorsSize() const
void reduceOneEntryTargetsNoCollisions(Function *ir_reduce_one_entry, Value *this_targets_start_ptr, Value *that_targets_start_ptr) const
QueryDescriptionType getQueryDescriptionType() const
void set_module_shallow_copy(const std::unique_ptr< llvm::Module > &module, bool always_clone=false)
llvm::Type * llvm_type(const Type type, llvm::LLVMContext &ctx)
virtual ReductionCode codegen() const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
RUNTIME_EXPORT void mode_jit_rt(const int64_t new_set_handle, const int64_t old_set_handle, const void *that_qmd_handle, const void *this_qmd_handle, const int64_t target_logical_idx)
std::string serialize_llvm_object(const T *llvm_obj)
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
RUNTIME_EXPORT void count_distinct_set_union_jit_rt(const int64_t new_set_handle, const int64_t old_set_handle, const void *that_qmd_handle, const void *this_qmd_handle, const int64_t target_logical_idx)
void finalizeReductionCode(ReductionCode &reduction_code, const llvm::Function *ir_is_empty, const llvm::Function *ir_reduce_one_entry, const llvm::Function *ir_reduce_one_entry_idx, const CodeCacheKey &key) const
std::unique_ptr< Function > create_function(const std::string name, const std::vector< Function::NamedArg > &arg_types, const Type ret_type, const bool always_inline)
std::string get_type_name() const
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
const Value * iter() const
RUNTIME_EXPORT void approx_quantile_jit_rt(const int64_t new_set_handle, const int64_t old_set_handle, const void *that_qmd_handle, const void *this_qmd_handle, const int64_t target_logical_idx)
RUNTIME_EXPORT void get_group_value_reduction_rt(int8_t *groups_buffer, const int8_t *key, const uint32_t key_count, const void *this_qmd_handle, const int8_t *that_buff, const uint32_t that_entry_idx, const uint32_t that_entry_count, const uint32_t row_size_bytes, int64_t **buff_out, uint8_t *empty)
bool didOutputColumnar() const
std::unique_ptr< Function > setup_reduce_one_entry(ReductionCode *reduction_code, const QueryDescriptionType hash_type)
void isEmpty(const ReductionCode &reduction_code) const
Value * emit_load_i64(Value *ptr, Function *function)
const ColSlotContext & getColSlotContext() const
void reduceOneEntryBaseline(const ReductionCode &reduction_code) const
static std::shared_ptr< QueryEngine > getInstance()
const int32_t WATCHDOG_ERROR
Value * emit_load(Value *ptr, Type ptr_type, Function *function)
llvm::Function * create_llvm_function(const Function *function, CgenState *cgen_state)
ResultSetReductionJIT(const QueryMemoryDescriptor &query_mem_desc, const std::vector< TargetInfo > &targets, const std::vector< int64_t > &target_init_vals, const size_t executor_id)
HOST DEVICE bool get_notnull() const
void emit_aggregate_one_count(Value *val_ptr, Value *other_ptr, const size_t chosen_bytes, Function *ir_reduce_one_entry)
SQLTypeInfo get_elem_type() const
void reduceOneApproxQuantileSlot(Value *this_ptr1, Value *that_ptr1, const size_t target_logical_idx, Function *ir_reduce_one_entry) const
std::unique_ptr< Function > setup_reduce_loop(ReductionCode *reduction_code)
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
SQLAgg get_non_conditional_agg_type(SQLAgg const agg_type)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
std::string reductionKey() const
const Executor * getExecutor() const
void reduceOneModeSlot(Value *this_ptr1, Value *that_ptr1, const size_t target_logical_idx, Function *ir_reduce_one_entry) const
int32_t getTargetIdxForKey() const
const std::vector< TargetInfo > targets_