32 #include "../CudaMgr/CudaMgr.h"
33 #include "../Shared/checked_alloc.h"
34 #include "../Shared/funcannotations.h"
35 #include "../Utils/ChunkIter.h"
45 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
50 #include <string_view>
64 return min == 0 &&
max == -1;
68 out <<
"Hash Type = " << info.
hash_type_ <<
" min = " << info.
min
69 <<
" max = " << info.
max <<
" bucket = " << info.
bucket
70 <<
" has_nulls = " << info.
has_nulls <<
"\n";
83 out <<
"UnorderedSet";
86 out <<
"<Unkown Type>";
105 for (
auto target_expr : target_exprs) {
108 if (!agg_expr || agg_expr->get_aggtype() ==
kSAMPLE) {
110 if (ti.is_buffer()) {
112 }
else if (ti.is_geometry()) {
113 agg_count += ti.get_physical_coord_cols() * 2;
119 if (agg_expr && agg_expr->get_aggtype() ==
kAVG) {
134 if (!cd || !cd->isVirtualCol) {
142 for (
const auto& target_expr : ra_exe_unit.
target_exprs) {
152 const int64_t max_entry_count) {
178 const std::vector<InputTableInfo>& query_infos,
180 Executor* executor) {
186 expr, query_infos, executor, boost::make_optional(ra_exe_unit.
simple_quals));
187 switch (expr_range.getType()) {
189 if (expr_range.getIntMin() > expr_range.getIntMax()) {
194 expr_range.getIntMin(),
195 expr_range.getIntMax(),
196 expr_range.getBucket(),
197 expr_range.hasNulls()};
201 if (expr_range.getFpMin() > expr_range.getFpMax()) {
224 const int64_t baseline_threshold =
231 bool has_nulls{
false};
239 group_cardinality_estimation,
245 cardinality *= crt_col_cardinality;
246 if (col_range_info.has_nulls) {
251 if (!cardinality || cardinality > baseline_threshold) {
254 group_cardinality_estimation,
262 int64_t(cardinality),
268 group_cardinality_estimation,
282 group_cardinality_estimation,
289 return col_range_info;
292 const int64_t col_count =
294 int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count *
sizeof(int64_t));
296 max_entry_count = std::min(max_entry_count, baseline_threshold);
299 if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
302 const bool has_filters =
326 col_range_info.has_nulls};
329 return col_range_info;
341 col_range_info.has_nulls};
346 !col_range_info.bucket) {
351 col_range_info.has_nulls};
353 return col_range_info;
359 if (col_range_info.
bucket) {
360 crt_col_cardinality /= col_range_info.
bucket;
362 return static_cast<int64_t
>(crt_col_cardinality +
363 (1 + (col_range_info.
has_nulls ? 1 : 0)));
369 if (col_range_info.
min <= col_range_info.
max) {
370 size_t size = col_range_info.
max - col_range_info.
min;
371 if (col_range_info.
bucket) {
372 size /= col_range_info.
bucket;
374 if (size >= static_cast<size_t>(std::numeric_limits<int64_t>::max())) {
379 return static_cast<int64_t
>(size + 1);
386 #define LL_CONTEXT executor_->cgen_state_->context_
387 #define LL_BUILDER executor_->cgen_state_->ir_builder_
388 #define LL_BOOL(v) executor_->cgen_state_->llBool(v)
389 #define LL_INT(v) executor_->cgen_state_->llInt(v)
390 #define LL_FP(v) executor_->cgen_state_->llFp(v)
391 #define ROW_FUNC executor_->cgen_state_->row_func_
392 #define CUR_FUNC executor_->cgen_state_->current_func_
398 const std::vector<InputTableInfo>& query_infos,
399 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
400 const std::optional<int64_t>& group_cardinality_estimation)
402 , ra_exe_unit_(ra_exe_unit)
403 , query_infos_(query_infos)
404 , row_set_mem_owner_(row_set_mem_owner)
405 , device_type_(device_type)
406 , group_cardinality_estimation_(group_cardinality_estimation) {
411 const auto& groupby_ti = groupby_expr->get_type_info();
412 if (groupby_ti.is_text_encoding_none()) {
413 throw std::runtime_error(
414 "Cannot group by string columns which are not dictionary encoded.");
416 if (groupby_ti.is_buffer()) {
417 throw std::runtime_error(
"Group by buffer not supported");
419 if (groupby_ti.is_geometry()) {
420 throw std::runtime_error(
"Group by geometry not supported");
426 const size_t shard_count)
const {
427 size_t device_count{0};
429 device_count =
executor_->cudaMgr()->getDeviceCount();
433 int64_t bucket{col_range_info.
bucket};
456 if (device_count < shard_count) {
457 bucket =
g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
458 : std::min(device_count, shard_count - device_count);
460 bucket = shard_count * std::max(
g_leaf_count, static_cast<size_t>(1));
479 const std::vector<InputTableInfo>& query_infos,
480 const bool is_group_by,
481 Executor* executor) {
482 bool keyless{
true}, found{
false};
483 int32_t num_agg_expr{0};
485 for (
const auto target_expr : ra_exe_unit.
target_exprs) {
488 if (agg_info.is_agg) {
494 const auto arg_expr =
agg_arg(target_expr);
496 switch (agg_info.agg_kind) {
499 if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
502 expr_range_info.hasNulls()) {
509 if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
512 expr_range_info.hasNulls()) {
519 auto arg_ti = arg_expr->get_type_info();
521 arg_ti.set_notnull(
true);
523 if (!arg_ti.get_notnull()) {
526 !expr_range_info.hasNulls()) {
531 switch (expr_range_info.getType()) {
534 if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
539 if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
550 CHECK(agg_expr && agg_expr->get_arg());
551 const auto& arg_ti = agg_expr->get_arg()->get_type_info();
552 if (arg_ti.is_string() || arg_ti.is_buffer()) {
555 auto expr_range_info =
559 is_group_by || float_argument_input,
560 float_argument_input ?
sizeof(
float) : 8);
561 switch (expr_range_info.getType()) {
565 *
reinterpret_cast<const double*
>(may_alias_ptr(&init_max));
566 if (expr_range_info.getFpMax() < double_max) {
572 if (expr_range_info.getIntMax() < init_max) {
582 CHECK(agg_expr && agg_expr->get_arg());
583 const auto& arg_ti = agg_expr->get_arg()->get_type_info();
584 if (arg_ti.is_string() || arg_ti.is_buffer()) {
587 auto expr_range_info =
592 expr_range_info.hasNulls()) {
597 is_group_by || float_argument_input,
598 float_argument_input ?
sizeof(
float) : 8);
599 switch (expr_range_info.getType()) {
603 *
reinterpret_cast<const double*
>(may_alias_ptr(&init_min));
604 if (expr_range_info.getFpMin() > double_min) {
610 if (expr_range_info.getIntMin() > init_min) {
641 const std::vector<InputTableInfo>& query_infos,
644 Executor* executor) {
646 auto compute_bytes_per_group =
648 size_t effective_size_bytes = (bitmap_sz + 7) / 8;
649 const auto padded_size =
652 : effective_size_bytes;
653 return padded_size * sub_bitmap_count;
655 for (
size_t i = 0; i < ra_exe_unit.
target_exprs.size(); i++) {
659 CHECK(agg_info.is_agg);
663 if (arg_ti.is_text_encoding_none()) {
664 throw std::runtime_error(
665 "Strings must be dictionary-encoded for COUNT(DISTINCT).");
668 throw std::runtime_error(
"APPROX_COUNT_DISTINCT on arrays not supported yet");
671 throw std::runtime_error(
672 "APPROX_COUNT_DISTINCT on geometry columns not supported");
674 if (agg_info.is_distinct && arg_ti.is_geometry()) {
675 throw std::runtime_error(
"COUNT DISTINCT on geometry columns not supported");
678 auto arg_range_info =
679 arg_ti.is_fp() ? no_range_info
681 ra_exe_unit, query_infos, agg_expr->get_arg(), executor);
684 const auto& original_target_expr_ti = it->second;
685 if (arg_ti.is_integer() && original_target_expr_ti.get_type() ==
kDATE &&
689 auto is_date_value_not_encoded = [&original_target_expr_ti](int64_t date_val) {
690 if (original_target_expr_ti.get_comp_param() == 16) {
691 return date_val < INT16_MIN || date_val > INT16_MAX;
693 return date_val < INT32_MIN || date_val > INT32_MIN;
696 if (is_date_value_not_encoded(arg_range_info.min)) {
702 if (is_date_value_not_encoded(arg_range_info.max)) {
708 arg_range_info.bucket = 0;
713 int64_t bitmap_sz_bits{0};
715 const auto error_rate_expr = agg_expr->get_arg1();
716 if (error_rate_expr) {
717 CHECK(error_rate_expr->get_type_info().get_type() ==
kINT);
718 auto const error_rate =
721 CHECK_GE(error_rate->get_constval().intval, 1);
727 if (arg_range_info.isEmpty()) {
728 count_distinct_descriptors.emplace_back(
731 arg_range_info.bucket,
738 const auto sub_bitmap_count =
740 size_t worst_case_num_groups{1};
742 !(arg_ti.is_buffer() || arg_ti.is_geometry())) {
745 if (shared::is_any<kCOUNT, kCOUNT_IF>(agg_info.agg_kind)) {
751 const auto total_bytes_per_entry =
752 compute_bytes_per_group(bitmap_sz_bits, sub_bitmap_count, device_type);
753 const auto range_bucket = std::max(group_by_range_info.
bucket, (int64_t)1);
754 const auto maximum_num_groups =
755 (group_by_range_info.
max - group_by_range_info.
min + 1) / range_bucket;
756 const auto total_bitmap_bytes_for_groups =
757 total_bytes_per_entry * maximum_num_groups;
760 if (total_bitmap_bytes_for_groups >=
762 const auto agg_expr_max_entry_count =
763 arg_range_info.max - arg_range_info.min + 1;
764 int64_t max_agg_expr_table_cardinality{1};
766 bool (*)(
const Analyzer::ColumnVar*,
const Analyzer::ColumnVar*)>
769 for (
const auto cv : colvar_set) {
771 std::find_if(query_infos.begin(),
773 [&](
const auto& input_table_info) {
774 return input_table_info.table_key == cv->getTableKey();
776 int64_t cur_table_cardinality =
777 it != query_infos.end()
778 ?
static_cast<int64_t
>(it->info.getNumTuplesUpperBound())
780 max_agg_expr_table_cardinality =
781 std::max(max_agg_expr_table_cardinality, cur_table_cardinality);
782 worst_case_num_groups *= cur_table_cardinality;
784 auto has_valid_stat = [agg_expr_max_entry_count, maximum_num_groups]() {
785 return agg_expr_max_entry_count > 0 && maximum_num_groups > 0;
788 if (has_valid_stat()) {
792 const size_t unordered_set_threshold{2};
798 const auto bits_for_agg_entry = std::ceil(log(agg_expr_max_entry_count));
799 const auto bits_for_agg_table =
800 std::ceil(log(max_agg_expr_table_cardinality));
801 const auto avg_num_unique_entries_per_group =
802 std::ceil(max_agg_expr_table_cardinality / maximum_num_groups);
809 if ((bits_for_agg_entry - bits_for_agg_table) >= unordered_set_threshold ||
810 agg_expr_max_entry_count >= avg_num_unique_entries_per_group) {
813 throw std::runtime_error(
814 "Consider using approx_count_distinct operator instead of "
815 "count_distinct operator to lower the memory "
824 !(arg_ti.is_array() || arg_ti.is_geometry())) {
827 const size_t too_many_entries{100000000};
829 worst_case_num_groups > too_many_entries &&
832 "Detect too many input entries for set-based count distinct operator under "
835 count_distinct_descriptors.emplace_back(
838 arg_range_info.bucket,
848 return count_distinct_descriptors;
854 const bool allow_multifrag,
855 const size_t max_groups_buffer_entry_count,
856 const int8_t crt_min_byte_width,
858 const bool output_columnar_hint) {
862 bool sort_on_gpu_hint =
870 bool must_use_baseline_sort = shard_count;
874 max_groups_buffer_entry_count,
878 must_use_baseline_sort,
879 output_columnar_hint);
880 CHECK(query_mem_desc);
881 if (query_mem_desc->sortOnGpu() &&
883 align_to_int64(query_mem_desc->getEntryCount() *
sizeof(int32_t))) >
884 2 * 1024 * 1024 * 1024LL) {
885 must_use_baseline_sort =
true;
886 sort_on_gpu_hint =
false;
896 size_t target_count = 0u;
899 if (target_count == 0u) {
904 std::vector<ApproxQuantileDescriptor> descriptors;
905 descriptors.reserve(target_count);
914 const bool allow_multifrag,
915 const size_t max_groups_buffer_entry_count,
916 const int8_t crt_min_byte_width,
917 const bool sort_on_gpu_hint,
919 const bool must_use_baseline_sort,
920 const bool output_columnar_hint) {
923 const bool threads_can_reuse_group_by_buffers =
933 const auto col_range_info =
935 col_range_info_nosharding.min,
936 col_range_info_nosharding.max,
938 col_range_info_nosharding.has_nulls};
942 const auto keyless_info =
953 (col_range_info.max - col_range_info.min) /
954 std::max(col_range_info.bucket, int64_t(1)) >
973 max_groups_buffer_entry_count,
975 approx_quantile_descriptors,
976 count_distinct_descriptors,
977 must_use_baseline_sort,
978 output_columnar_hint,
980 threads_can_reuse_group_by_buffers);
982 LOG(
WARNING) << e.what() <<
" Disabling Streaming Top N.";
993 max_groups_buffer_entry_count,
995 approx_quantile_descriptors,
996 count_distinct_descriptors,
997 must_use_baseline_sort,
998 output_columnar_hint,
1000 threads_can_reuse_group_by_buffers);
1005 const std::list<Analyzer::OrderEntry>& order_entries) {
1006 if (order_entries.size() > 1) {
1009 for (
const auto& order_entry : order_entries) {
1013 if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
1018 if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() ==
kAVG ||
1019 agg_expr->get_aggtype() ==
kMIN || agg_expr->get_aggtype() ==
kMAX ||
1023 if (agg_expr->get_arg()) {
1025 if (arg_ti.is_fp()) {
1028 auto expr_range_info =
1033 expr_range_info.has_nulls) &&
1034 order_entry.is_desc == order_entry.nulls_first) {
1038 const auto& target_ti = target_expr->get_type_info();
1039 CHECK(!target_ti.is_buffer());
1040 if (!target_ti.is_integer()) {
1048 llvm::BasicBlock* sc_false,
1053 CHECK(filter_result);
1055 bool can_return_error =
false;
1056 llvm::BasicBlock* filter_false{
nullptr};
1078 llvm::Value* old_total_matched_val{
nullptr};
1080 old_total_matched_val =
1081 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
1084 #
if LLVM_VERSION_MAJOR > 12
1087 llvm::AtomicOrdering::Monotonic);
1089 old_total_matched_val =
LL_BUILDER.CreateLoad(
1090 total_matched_ptr->getType()->getPointerElementType(), total_matched_ptr);
1096 LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
1099 auto agg_out_ptr_w_idx =
codegenGroupBy(query_mem_desc, co, filter_cfg);
1105 filter_cfg.setChainToNext();
1110 varlen_output_buffer,
1118 llvm::Value* nullcheck_cond{
nullptr};
1120 nullcheck_cond =
LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
1124 std::get<0>(agg_out_ptr_w_idx),
1125 llvm::ConstantPointerNull::get(
1129 nullcheck_cond,
executor_,
false,
"groupby_nullcheck", &filter_cfg,
false);
1131 varlen_output_buffer,
1138 can_return_error =
true;
1148 code_generator.
posArg(
nullptr),
1154 std::stack<llvm::BasicBlock*> array_loops;
1157 auto arg_it =
ROW_FUNC->arg_begin();
1158 std::vector<llvm::Value*> agg_out_vec;
1160 agg_out_vec.push_back(&*arg_it++);
1175 }
else if (sc_false) {
1176 const auto saved_insert_block =
LL_BUILDER.GetInsertBlock();
1179 LL_BUILDER.SetInsertPoint(saved_insert_block);
1182 return can_return_error;
1186 llvm::Value* groups_buffer,
1200 : query_mem_desc.
getRowSize() /
sizeof(int64_t);
1204 CHECK_GE(only_order_entry.tle_no,
int(1));
1205 const size_t target_idx = only_order_entry.tle_no - 1;
1208 const auto chosen_bytes =
1210 auto order_entry_lv =
executor_->cgen_state_->castToTypeIn(
1211 code_generator.
codegen(order_entry_expr,
true, co).front(), chosen_bytes * 8);
1214 std::string fname =
"get_bin_from_k_heap";
1215 const auto& oe_ti = order_entry_expr->get_type_info();
1216 llvm::Value* null_key_lv =
nullptr;
1217 if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1218 const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1219 switch (bit_width) {
1231 CHECK(oe_ti.is_fp());
1232 if (order_entry_lv->getType()->isDoubleTy()) {
1237 fname += order_entry_lv->getType()->isDoubleTy() ?
"_double" :
"_float";
1239 const auto key_slot_idx =
1247 LL_BOOL(only_order_entry.is_desc),
1248 LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1249 LL_BOOL(only_order_entry.nulls_first),
1254 const auto output_buffer_entry_count_lv =
1255 LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1257 const auto group_expr_lv =
1258 LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1259 std::vector<llvm::Value*>
args{groups_buffer,
1260 output_buffer_entry_count_lv,
1262 code_generator.
posArg(
nullptr)};
1264 const auto columnar_output_offset =
1266 return columnar_output_offset;
1278 auto arg_it =
ROW_FUNC->arg_begin();
1279 auto groups_buffer = arg_it++;
1281 std::stack<llvm::BasicBlock*> array_loops;
1286 return std::make_tuple(
1290 return std::make_tuple(
1303 : query_mem_desc.
getRowSize() /
sizeof(int64_t);
1309 llvm::Value* group_key =
nullptr;
1310 llvm::Value* key_size_lv =
nullptr;
1321 col_width_size ==
sizeof(int32_t)
1329 int32_t subkey_idx = 0;
1332 const auto col_range_info =
1334 const auto translated_null_value =
static_cast<int64_t
>(
1339 (col_range_info.bucket ? col_range_info.bucket : 1));
1341 const bool col_has_nulls =
1346 : col_range_info.has_nulls)
1349 const auto group_expr_lvs =
1350 executor_->groupByColumnCodegen(group_expr.get(),
1354 translated_null_value,
1358 const auto group_expr_lv = group_expr_lvs.translated_value;
1365 group_expr_lvs.original_value,
1372 group_key->getType()->getScalarType()->getPointerElementType(),
1381 &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1393 return std::make_tuple(
nullptr,
nullptr);
1403 auto arg_it =
ROW_FUNC->arg_begin();
1405 auto varlen_output_buffer = arg_it++;
1406 CHECK(varlen_output_buffer->getType() == llvm::Type::getInt64PtrTy(
LL_CONTEXT));
1407 return varlen_output_buffer;
1410 std::tuple<llvm::Value*, llvm::Value*>
1414 llvm::Value* groups_buffer,
1415 llvm::Value* group_expr_lv_translated,
1416 llvm::Value* group_expr_lv_original,
1417 const int32_t row_size_quad) {
1421 ?
"get_columnar_group_bin_offset"
1422 :
"get_group_value_fast"};
1424 get_group_fn_name +=
"_keyless";
1429 get_group_fn_name +=
"_semiprivate";
1431 std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1432 &*group_expr_lv_translated};
1433 if (group_expr_lv_original && get_group_fn_name ==
"get_group_value_fast" &&
1435 get_group_fn_name +=
"_with_original_key";
1436 get_group_fn_args.push_back(group_expr_lv_original);
1442 get_group_fn_args.push_back(
LL_INT(row_size_quad));
1446 get_group_fn_args.push_back(
LL_INT(row_size_quad));
1450 get_group_fn_args.push_back(warp_idx);
1454 if (get_group_fn_name ==
"get_columnar_group_bin_offset") {
1455 return std::make_tuple(&*groups_buffer,
1456 emitCall(get_group_fn_name, get_group_fn_args));
1458 return std::make_tuple(
emitCall(get_group_fn_name, get_group_fn_args),
nullptr);
1462 llvm::Value* groups_buffer,
1463 llvm::Value* group_key,
1464 llvm::Value* key_size_lv,
1466 const int32_t row_size_quad) {
1473 LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1477 const std::string set_matching_func_name{
1478 "set_matching_group_value_perfect_hash_columnar"};
1479 const std::vector<llvm::Value*> set_matching_func_arg{
1486 emitCall(set_matching_func_name, set_matching_func_arg);
1488 return std::make_tuple(groups_buffer, hash_lv);
1491 return std::make_tuple(
emitCall(
"get_matching_group_value_perfect_hash_keyless",
1492 {groups_buffer, hash_lv,
LL_INT(row_size_quad)}),
1495 return std::make_tuple(
1497 "get_matching_group_value_perfect_hash",
1498 {groups_buffer, hash_lv, group_key, key_size_lv,
LL_INT(row_size_quad)}),
1504 std::tuple<llvm::Value*, llvm::Value*>
1507 llvm::Value* groups_buffer,
1508 llvm::Value* group_key,
1509 llvm::Value* key_size_lv,
1511 const size_t key_width,
1512 const int32_t row_size_quad) {
1514 if (group_key->getType() != llvm::Type::getInt64PtrTy(
LL_CONTEXT)) {
1515 CHECK(key_width ==
sizeof(int32_t));
1519 std::vector<llvm::Value*> func_args{
1524 LL_INT(static_cast<int32_t>(key_width))};
1525 std::string func_name{
"get_group_value"};
1527 func_name +=
"_columnar_slot";
1529 func_args.push_back(
LL_INT(row_size_quad));
1532 func_name +=
"_with_watchdog";
1535 return std::make_tuple(groups_buffer,
emitCall(func_name, func_args));
1537 return std::make_tuple(
emitCall(func_name, func_args),
nullptr);
1544 auto ft = llvm::FunctionType::get(
1548 auto key_hash_func = llvm::Function::Create(ft,
1549 llvm::Function::ExternalLinkage,
1552 executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1554 auto& key_buff_arg = *key_hash_func->args().begin();
1555 llvm::Value* key_buff_lv = &key_buff_arg;
1556 auto bb = llvm::BasicBlock::Create(
LL_CONTEXT,
"entry", key_hash_func);
1557 llvm::IRBuilder<> key_hash_func_builder(bb);
1559 std::vector<int64_t> cardinalities;
1561 auto col_range_info =
1568 auto* gep = key_hash_func_builder.CreateGEP(
1569 key_buff_lv->getType()->getScalarType()->getPointerElementType(),
1573 key_hash_func_builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
1574 auto col_range_info =
1577 key_hash_func_builder.CreateSub(key_comp_lv,
LL_INT(col_range_info.min));
1578 if (col_range_info.bucket) {
1580 key_hash_func_builder.CreateSDiv(crt_term_lv,
LL_INT(col_range_info.bucket));
1582 for (
size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1583 crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1584 LL_INT(cardinalities[prev_dim_idx]));
1586 hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1589 key_hash_func_builder.CreateRet(
1591 return key_hash_func;
1596 llvm::Value* target) {
1598 const auto& agg_type = agg_info.
sql_type;
1599 const size_t chosen_bytes = agg_type.
get_size();
1601 bool need_conversion{
false};
1602 llvm::Value* arg_null{
nullptr};
1603 llvm::Value* agg_null{
nullptr};
1604 llvm::Value* target_to_cast{target};
1605 if (arg_type.
is_fp()) {
1606 arg_null =
executor_->cgen_state_->inlineFpNull(arg_type);
1607 if (agg_type.is_fp()) {
1608 agg_null =
executor_->cgen_state_->inlineFpNull(agg_type);
1609 if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1610 static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1611 need_conversion =
true;
1618 arg_null =
executor_->cgen_state_->inlineIntNull(arg_type);
1619 if (agg_type.is_fp()) {
1620 agg_null =
executor_->cgen_state_->inlineFpNull(agg_type);
1621 need_conversion =
true;
1622 target_to_cast =
executor_->castToFP(target, arg_type, agg_type);
1624 agg_null =
executor_->cgen_state_->inlineIntNull(agg_type);
1625 if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1626 static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1627 (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1628 static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1629 need_conversion =
true;
1633 if (need_conversion) {
1634 auto cmp = arg_type.
is_fp() ?
LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1639 executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1651 const auto window_func_context =
1656 : query_mem_desc.
getRowSize() /
sizeof(int64_t);
1657 auto arg_it =
ROW_FUNC->arg_begin();
1658 auto groups_buffer = arg_it++;
1661 window_func_context, code_generator.
posArg(
nullptr));
1662 const auto pos_in_window =
1664 llvm::Value* entry_count_lv =
1666 std::vector<llvm::Value*>
args{
1667 &*groups_buffer, entry_count_lv, pos_in_window, code_generator.
posArg(
nullptr)};
1669 const auto columnar_output_offset =
1676 auto arg_it =
ROW_FUNC->arg_begin();
1677 auto groups_buffer = arg_it++;
1682 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
1683 llvm::Value* varlen_output_buffer,
1684 const std::vector<llvm::Value*>& agg_out_vec,
1690 auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1693 const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1694 bool can_return_error =
false;
1696 CHECK(agg_out_vec.empty());
1698 CHECK(!agg_out_vec.empty());
1703 llvm::Value* output_buffer_byte_stream{
nullptr};
1704 llvm::Value* out_row_idx{
nullptr};
1707 output_buffer_byte_stream =
LL_BUILDER.CreateBitCast(
1708 std::get<0>(agg_out_ptr_w_idx),
1709 llvm::PointerType::get(llvm::Type::getInt8Ty(
LL_CONTEXT), 0));
1710 output_buffer_byte_stream->setName(
"out_buff_b_stream");
1711 CHECK(std::get<1>(agg_out_ptr_w_idx));
1712 out_row_idx =
LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1714 out_row_idx->setName(
"out_row_idx");
1723 target_builder(target_expr,
executor_, query_mem_desc, co);
1733 output_buffer_byte_stream,
1735 varlen_output_buffer,
1738 return can_return_error;
1745 llvm::Value* output_buffer_byte_stream,
1746 llvm::Value* out_row_idx,
1747 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
1749 const size_t chosen_bytes,
1750 const size_t agg_out_off,
1751 const size_t target_idx) {
1753 llvm::Value* agg_col_ptr{
nullptr};
1759 CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1761 CHECK(output_buffer_byte_stream);
1765 auto out_per_col_byte_idx =
1767 LL_BUILDER.CreateShl(out_row_idx, __lzcnt(chosen_bytes) - 1);
1769 LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1771 auto byte_offset =
LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1772 LL_INT(static_cast<int64_t>(col_off)));
1773 byte_offset->setName(
"out_byte_off_target_" +
std::to_string(target_idx));
1775 output_buffer_byte_stream->getType()->getScalarType()->getPointerElementType(),
1776 output_buffer_byte_stream,
1781 agg_col_ptr->setName(
"out_ptr_target_" +
std::to_string(target_idx));
1783 auto const col_off_in_bytes = query_mem_desc.
getColOffInBytes(agg_out_off);
1784 auto const col_off = col_off_in_bytes / chosen_bytes;
1785 auto const col_rem = col_off_in_bytes % chosen_bytes;
1786 CHECK_EQ(col_rem, 0u) << col_off_in_bytes <<
" % " << chosen_bytes;
1787 CHECK(std::get<1>(agg_out_ptr_w_idx));
1789 std::get<1>(agg_out_ptr_w_idx),
1793 std::get<0>(agg_out_ptr_w_idx),
1796 bit_cast->getType()->getScalarType()->getPointerElementType(),
1802 auto const col_off = col_off_in_bytes / chosen_bytes;
1803 auto const col_rem = col_off_in_bytes % chosen_bytes;
1804 CHECK_EQ(col_rem, 0u) << col_off_in_bytes <<
" % " << chosen_bytes;
1806 std::get<0>(agg_out_ptr_w_idx),
1809 bit_cast->getType()->getScalarType()->getPointerElementType(),
1823 auto estimator_comp_count_lv =
LL_INT(static_cast<int32_t>(estimator_arg.size()));
1825 estimator_comp_count_lv);
1826 int32_t subkey_idx = 0;
1827 for (
const auto& estimator_arg_comp : estimator_arg) {
1828 const auto estimator_arg_comp_lvs =
1829 executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1837 CHECK(!estimator_arg_comp_lvs.original_value);
1838 const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1841 estimator_arg_comp_lv,
1843 estimator_key_lv->getType()->getScalarType()->getPointerElementType(),
1849 const auto key_bytes =
LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1850 const auto estimator_comp_bytes_lv =
1851 LL_INT(static_cast<int32_t>(estimator_arg.size() *
sizeof(int64_t)));
1852 const auto bitmap_size_lv =
1855 {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1864 const int64_t skip_val) {
1865 if (val != skip_val) {
1877 auto* mode_map =
reinterpret_cast<AggMode*
>(*agg);
1882 const size_t target_idx,
1884 std::vector<llvm::Value*>& agg_args,
1889 const auto& arg_ti =
1891 if (arg_ti.is_fp()) {
1892 agg_args.back() =
executor_->cgen_state_->ir_builder_.CreateBitCast(
1895 const auto& count_distinct_descriptor =
1900 agg_args.push_back(
LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1904 agg_args.push_back(base_dev_addr);
1905 agg_args.push_back(base_host_addr);
1906 emitCall(
"agg_approximate_count_distinct_gpu", agg_args);
1908 emitCall(
"agg_approximate_count_distinct", agg_args);
1912 std::string agg_fname{
"agg_count_distinct"};
1914 agg_fname +=
"_bitmap";
1915 agg_args.push_back(
LL_INT(count_distinct_descriptor.min_val));
1916 agg_args.push_back(
LL_INT(count_distinct_descriptor.bucket_size));
1918 if (agg_info.skip_null_val) {
1919 auto null_lv =
executor_->cgen_state_->castToTypeIn(
1921 ?
static_cast<llvm::Value*
>(
executor_->cgen_state_->inlineFpNull(arg_ti))
1922 : static_cast<llvm::Value*>(
executor_->cgen_state_->inlineIntNull(arg_ti))),
1924 null_lv =
executor_->cgen_state_->ir_builder_.CreateBitCast(
1926 agg_fname +=
"_skip_val";
1927 agg_args.push_back(null_lv);
1931 agg_fname +=
"_gpu";
1934 agg_args.push_back(base_dev_addr);
1935 agg_args.push_back(base_host_addr);
1936 agg_args.push_back(
LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1938 count_distinct_descriptor.bitmapPaddedSizeBytes() %
1939 count_distinct_descriptor.sub_bitmap_count);
1940 agg_args.push_back(
LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1941 count_distinct_descriptor.sub_bitmap_count)));
1946 executor_->cgen_state_->emitExternalCall(
1947 agg_fname, llvm::Type::getVoidTy(
LL_CONTEXT), agg_args);
1952 const size_t target_idx,
1954 std::vector<llvm::Value*>& agg_args,
1960 llvm::BasicBlock *calc, *skip{
nullptr};
1966 auto* cs =
executor_->cgen_state_.get();
1967 auto& irb = cs->ir_builder_;
1969 auto*
const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1970 auto*
const skip_cond = arg_ti.is_fp()
1971 ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1972 : irb.CreateICmpEQ(agg_args.back(), null_value);
1973 calc = llvm::BasicBlock::Create(cs->context_,
"calc_approx_quantile");
1974 skip = llvm::BasicBlock::Create(cs->context_,
"skip_approx_quantile");
1975 irb.CreateCondBr(skip_cond, skip, calc);
1976 cs->current_func_->getBasicBlockList().push_back(calc);
1977 irb.SetInsertPoint(calc);
1979 if (!arg_ti.is_fp()) {
1981 agg_args.back() =
executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1983 cs->emitExternalCall(
1984 "agg_approx_quantile", llvm::Type::getVoidTy(cs->context_), agg_args);
1987 cs->current_func_->getBasicBlockList().push_back(skip);
1988 irb.SetInsertPoint(skip);
1994 std::vector<llvm::Value*>& agg_args,
2000 llvm::BasicBlock *calc, *skip{
nullptr};
2005 bool const is_fp = arg_ti.is_fp();
2006 auto* cs =
executor_->cgen_state_.get();
2007 auto& irb = cs->ir_builder_;
2009 auto*
const null_value =
2010 is_fp ? cs->inlineNull(arg_ti) : cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
2011 auto*
const skip_cond = is_fp ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
2012 : irb.CreateICmpEQ(agg_args.back(), null_value);
2013 calc = llvm::BasicBlock::Create(cs->context_,
"calc_mode");
2014 skip = llvm::BasicBlock::Create(cs->context_,
"skip_mode");
2015 irb.CreateCondBr(skip_cond, skip, calc);
2016 cs->current_func_->getBasicBlockList().push_back(calc);
2017 irb.SetInsertPoint(calc);
2020 auto*
const int_type =
get_int_type(8 * arg_ti.get_size(), cs->context_);
2021 agg_args.back() = irb.CreateBitCast(agg_args.back(), int_type);
2024 cs->emitExternalCall(
"agg_mode_func", llvm::Type::getVoidTy(cs->context_), agg_args);
2027 cs->current_func_->getBasicBlockList().push_back(skip);
2028 irb.SetInsertPoint(skip);
2041 return LL_BUILDER.CreateLoad(gep->getType()->getPointerElementType(), gep);
2056 if (target_ti.is_buffer() &&
2057 !
executor_->plan_state_->isLazyFetchColumn(target_expr)) {
2058 const auto target_lvs =
2059 agg_expr ? code_generator.
codegen(agg_expr->get_arg(),
true, co)
2061 target_expr, !
executor_->plan_state_->allow_lazy_fetch_, co);
2062 if (!func_expr && !arr_expr) {
2065 if (target_ti.is_text_encoding_none()) {
2066 CHECK_EQ(
size_t(3), target_lvs.size());
2067 return {target_lvs[1], target_lvs[2]};
2069 CHECK(target_ti.is_array());
2070 CHECK_EQ(
size_t(1), target_lvs.size());
2071 CHECK(!agg_expr || agg_expr->get_aggtype() ==
kSAMPLE);
2075 const auto& elem_ti = target_ti.get_elem_type();
2077 executor_->cgen_state_->emitExternalCall(
2080 {target_lvs.front(), code_generator.
posArg(target_expr)}),
2081 executor_->cgen_state_->emitExternalCall(
2084 {target_lvs.front(),
2085 code_generator.
posArg(target_expr),
2089 throw std::runtime_error(
2090 "Using array[] operator as argument to an aggregate operator is not "
2093 CHECK(func_expr || arr_expr);
2094 if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
2095 CHECK_EQ(
size_t(1), target_lvs.size());
2096 const auto prefix = target_ti.get_buffer_name();
2097 CHECK(target_ti.is_array() || target_ti.is_text_encoding_none());
2098 const auto target_lv =
LL_BUILDER.CreateLoad(
2099 target_lvs[0]->getType()->getPointerElementType(), target_lvs[0]);
2103 const auto i8p_ty = llvm::PointerType::get(
2105 const auto ptr =
LL_BUILDER.CreatePointerCast(
2106 LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
2107 const auto size =
LL_BUILDER.CreateExtractValue(target_lv, 1);
2108 const auto null_flag =
LL_BUILDER.CreateExtractValue(target_lv, 2);
2109 const auto nullcheck_ok_bb =
2111 const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
2115 const auto nullcheck =
LL_BUILDER.CreateICmpEQ(
2116 null_flag,
executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
2117 LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
2122 auto result_phi =
LL_BUILDER.CreatePHI(i8p_ty, 2, prefix +
"_ptr_return");
2123 result_phi->addIncoming(ptr, nullcheck_ok_bb);
2124 const auto null_arr_sentinel =
LL_BUILDER.CreateIntToPtr(
2125 executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
2126 result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
2128 executor_->cgen_state_->emitExternalCall(
2129 "register_buffer_with_executor_rsm",
2130 llvm::Type::getVoidTy(
executor_->cgen_state_->context_),
2133 LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
2137 return {result_phi, size};
2139 CHECK_EQ(
size_t(2), target_lvs.size());
2140 return {target_lvs[0], target_lvs[1]};
2143 if (target_ti.is_geometry() &&
2144 !
executor_->plan_state_->isLazyFetchColumn(target_expr)) {
2145 auto generate_coord_lvs =
2146 [&](
auto* selected_target_expr,
2147 bool const fetch_columns) -> std::vector<llvm::Value*> {
2148 const auto target_lvs =
2149 code_generator.
codegen(selected_target_expr, fetch_columns, co);
2150 if (dynamic_cast<const Analyzer::GeoOperator*>(target_expr) &&
2157 if (geo_uoper || geo_binoper) {
2159 CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
2163 CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
2169 std::vector<llvm::Value*> coords;
2171 for (
const auto& target_lv : target_lvs) {
2177 const size_t elem_sz = ctr == 0 ? 1 : 4;
2179 int32_t fixlen = -1;
2180 if (target_ti.get_type() ==
kPOINT) {
2183 const auto coords_cd =
executor_->getPhysicalColumnDescriptor(col_var, 1);
2184 if (coords_cd && coords_cd->columnType.get_type() ==
kARRAY) {
2185 fixlen = coords_cd->columnType.get_size();
2190 coords.push_back(
executor_->cgen_state_->emitExternalCall(
2191 "fast_fixlen_array_buff",
2193 {target_lv, code_generator.
posArg(selected_target_expr)}));
2194 auto fixed_len_lv =
executor_->cgen_state_->emitExternalCall(
2195 "determine_fixed_array_len",
2197 {target_lv,
executor_->cgen_state_->llInt(int64_t(fixlen))});
2198 coords.push_back(fixed_len_lv);
2201 coords.push_back(
executor_->cgen_state_->emitExternalCall(
2204 {target_lv, code_generator.
posArg(selected_target_expr)}));
2205 coords.push_back(
executor_->cgen_state_->emitExternalCall(
2209 code_generator.
posArg(selected_target_expr),
2216 return generate_coord_lvs(agg_expr->get_arg(),
true);
2218 return generate_coord_lvs(target_expr,
2219 !
executor_->plan_state_->allow_lazy_fetch_);
2223 bool fetch_column = !
executor_->plan_state_->allow_lazy_fetch_;
2224 return agg_expr ? code_generator.codegen(agg_expr->get_arg(),
true, co)
2225 : code_generator.codegen(target_expr, fetch_column, co);
2229 const std::vector<llvm::Value*>&
args) {
2231 return executor_->cgen_state_->emitCall(fname, args);
2236 auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0,
true);
2237 auto rc_check_condition =
executor_->cgen_state_->ir_builder_.CreateICmp(
2238 llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
2240 executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode,
"rc");
2257 const auto grouped_col_expr =
2259 if (!grouped_col_expr) {
2262 const auto& column_key = grouped_col_expr->
getColumnKey();
2263 if (column_key.table_id <= 0) {
2267 {column_key.db_id, column_key.table_id});
2268 if (td->shardedColumnId == column_key.column_id) {
RUNTIME_EXPORT void agg_approx_quantile(int64_t *agg, const double val)
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
std::vector< Analyzer::Expr * > target_exprs
ApproxQuantileDescriptors initApproxQuantileDescriptors()
SqlWindowFunctionKind getKind() const
ExecutorDeviceType device_type
size_t g_watchdog_baseline_max_groups
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
robin_hood::unordered_set< int64_t > CountDistinctSet
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
llvm::Value * getAdditionalLiteral(const int32_t off)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
llvm::BasicBlock * cond_false_
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
HOST DEVICE int get_size() const
size_t getEntryCount() const
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
RUNTIME_EXPORT void agg_count_distinct(int64_t *agg, const int64_t val)
int hll_size_for_rate(const int err_percent)
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
KeylessInfo get_keyless_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
void codegenMode(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Streaming Top N algorithm.
bool mustUseBaselineSort() const
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
void mark_function_always_inline(llvm::Function *func)
ColRangeInfo getColRangeInfo()
bool hasVarlenOutput() const
QueryDescriptionType hash_type_
llvm::Value * posArg(const Analyzer::Expr *) const
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint, const bool threads_can_reuse_group_by_buffers)
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
bool hasKeylessHash() const
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
size_t getEffectiveKeyWidth() const
void codegenApproxQuantile(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
void checkErrorCode(llvm::Value *retCode)
bool with_dynamic_watchdog
std::vector< ApproxQuantileDescriptor > ApproxQuantileDescriptors
CountDistinctDescriptors init_count_distinct_descriptors(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &group_by_range_info, const ExecutorDeviceType device_type, Executor *executor)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool takes_float_argument(const TargetInfo &target_info)
#define LLVM_ALIGN(alignment)
RUNTIME_EXPORT void agg_mode_func(int64_t *agg, const int64_t val)
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
CountDistinctImplType impl_type_
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
size_t getRowSize() const
Helpers for codegen of target expressions.
void eachAggTarget(std::function< void(Analyzer::AggExpr const *, size_t target_idx)> lambda) const
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
int64_t getMaxVal() const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
llvm::LLVMContext & context_
GroupByAndAggregate(Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
bool useStreamingTopN() const
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
size_t getGroupbyColCount() const
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
RUNTIME_EXPORT void agg_count_distinct_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
const JoinQualsPerNestingLevel join_quals
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
std::optional< size_t > limit
std::list< Analyzer::OrderEntry > order_entries
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, DiamondCodegen &diamond_codegen) const
int64_t g_bitmap_memory_limit
bool is_distinct_target(const TargetInfo &target_info)
size_t g_approx_quantile_buffer
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::shared_ptr< Analyzer::Estimator > estimator
This file includes the class specification for the buffer manager (BufferMgr), and related data struc...
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
const SQLTypeInfo & get_type_info() const
QueryDescriptionType getQueryDescriptionType() const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
ExecutorDeviceType device_type
int64_t getBucket() const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
const shared::ColumnKey & getColumnKey() const
int64_t get_bucketed_cardinality_without_nulls(const ColRangeInfo &col_range_info)
bool expr_is_rowid(const Analyzer::Expr *expr)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptor(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
const ExecutorDeviceType device_type_
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
std::unordered_map< size_t, SQLTypeInfo > target_exprs_original_type_infos
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Function * codegenPerfectHashFunction()
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
void add(Value const value)
Descriptor for the result set buffer layout.
const std::optional< int64_t > group_cardinality_estimation_
size_t g_default_max_groups_buffer_entry_guess
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool didOutputColumnar() const
bool usesGetGroupValueFast() const
bool interleavedBins(const ExecutorDeviceType) const
bool threadsShareMemory() const
int64_t getMinVal() const
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
Estimators to be used when precise cardinality isn't useful.
size_t g_approx_quantile_centroids
int64_t get_epoch_days_from_seconds(const int64_t seconds)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
static size_t getBaselineThreshold(bool for_count_distinct, ExecutorDeviceType device_type)
uint32_t log2_bytes(const uint32_t bytes)
HOST DEVICE bool get_notnull() const
const RelAlgExecutionUnit & ra_exe_unit_
size_t getColOffInBytes(const size_t col_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals