21 #include <llvm/IR/Constants.h>
22 #include <llvm/IR/IRBuilder.h>
23 #include <llvm/IR/Instructions.h>
24 #include <llvm/IR/Verifier.h>
32 template <
typename... ATTRS>
35 ATTRS
const... attrs) {
36 static_assert((std::is_same_v<llvm::Attribute::AttrKind, ATTRS> && ...));
38 static_assert(
sizeof...(ATTRS) <= 8,
"Use a llvm::SmallVector with a larger size.");
39 #if 14 <= LLVM_VERSION_MAJOR
40 llvm::AttrBuilder attr_builder(mod->getContext());
42 llvm::AttrBuilder attr_builder;
44 (attr_builder.addAttribute(attrs), ...);
45 return llvm::AttributeList::get(mod->getContext(), index, attr_builder);
49 template <
size_t NTYPES>
51 llvm::Module
const*
const mod_;
52 llvm::SmallVector<llvm::Type*, NTYPES>
types_;
53 llvm::SmallVector<char const*, NTYPES>
names_;
55 llvm::SmallVector<llvm::AttributeList, NTYPES + 1>
attrs_;
58 Params(llvm::Module
const*
const mod) : mod_(mod) {}
60 template <
typename... ATTRS>
62 static_assert((std::is_same_v<llvm::Attribute::AttrKind, ATTRS> && ...));
67 return llvm::AttributeList::get(mod_->getContext(), attrs_);
70 template <
typename... ATTRS>
72 static_assert((std::is_same_v<llvm::Attribute::AttrKind, ATTRS> && ...));
73 types_.push_back(type);
74 names_.push_back(name);
75 if constexpr (0u <
sizeof...(ATTRS)) {
76 static_assert(1u == llvm::AttributeList::AttrIndex::FirstArgIndex);
77 addAttributes(types_.size(), attrs...);
81 void setNames(llvm::Function::arg_iterator itr)
const {
82 for (
char const*
const name : names_) {
87 auto&
types() {
return types_; }
91 template <
bool IS_GROUP_BY,
size_t NTYPES = 13u>
93 constexpr llvm::Attribute::AttrKind NoCapture = llvm::Attribute::NoCapture;
94 auto*
const i8_type = llvm::IntegerType::get(mod->getContext(), 8);
95 auto*
const i32_type = llvm::IntegerType::get(mod->getContext(), 32);
96 auto*
const i64_type = llvm::IntegerType::get(mod->getContext(), 64);
97 auto*
const pi8_type = llvm::PointerType::get(i8_type, 0);
98 auto*
const ppi8_type = llvm::PointerType::get(pi8_type, 0);
99 auto*
const pi32_type = llvm::PointerType::get(i32_type, 0);
100 auto*
const pi64_type = llvm::PointerType::get(i64_type, 0);
101 auto*
const ppi64_type = llvm::PointerType::get(pi64_type, 0);
107 params.
pushBack(pi32_type,
"error_code");
108 params.
pushBack(pi32_type,
"total_matched");
109 params.
pushBack(ppi64_type, IS_GROUP_BY ?
"group_by_buffers" :
"out");
110 params.
pushBack(i32_type,
"frag_idx");
111 if constexpr (IS_GROUP_BY) {
112 constexpr llvm::Attribute::AttrKind ReadOnly = llvm::Attribute::ReadOnly;
113 constexpr llvm::Attribute::AttrKind UWTable = llvm::Attribute::UWTable;
114 params.
pushBack(pi32_type,
"row_index_resume", NoCapture, ReadOnly);
115 params.
pushBack(ppi8_type,
"byte_stream", NoCapture, ReadOnly);
116 if (hoist_literals) {
117 params.
pushBack(pi8_type,
"literals", NoCapture, ReadOnly);
119 params.
pushBack(pi64_type,
"row_count_ptr", NoCapture, ReadOnly);
120 params.
pushBack(pi64_type,
"frag_row_off_ptr", NoCapture, ReadOnly);
121 params.
pushBack(pi32_type,
"max_matched_ptr", NoCapture, ReadOnly);
122 params.
pushBack(pi64_type,
"agg_init_val", NoCapture, ReadOnly);
123 params.
pushBack(pi64_type,
"join_hash_tables", NoCapture, ReadOnly);
124 params.
pushBack(pi8_type,
"row_func_mgr", NoCapture, ReadOnly);
125 params.
addAttributes(llvm::AttributeList::AttrIndex::FunctionIndex, UWTable);
130 params.
pushBack(pi32_type,
"row_index_resume", NoCapture);
131 params.
pushBack(ppi8_type,
"byte_stream", NoCapture);
132 if (hoist_literals) {
133 params.
pushBack(pi8_type,
"literals", NoCapture);
135 params.
pushBack(pi64_type,
"row_count_ptr", NoCapture);
136 params.
pushBack(pi64_type,
"frag_row_off_ptr", NoCapture);
137 params.
pushBack(pi32_type,
"max_matched_ptr", NoCapture);
138 params.
pushBack(pi64_type,
"agg_init_val", NoCapture);
139 params.
pushBack(pi64_type,
"join_hash_tables", NoCapture);
140 params.
pushBack(pi8_type,
"row_func_mgr", NoCapture);
147 auto type = value->getType();
151 return pointer_type->getPointerElementType();
155 using namespace llvm;
157 std::vector<Type*> func_args;
158 FunctionType* func_type = FunctionType::get(
159 IntegerType::get(mod->getContext(), 32),
163 auto func_ptr = mod->getFunction(name);
165 func_ptr = Function::Create(
167 GlobalValue::ExternalLinkage,
170 func_ptr->setCallingConv(CallingConv::C);
172 func_ptr->setAttributes(
186 using namespace llvm;
188 std::vector<Type*> func_args;
189 FunctionType* func_type = FunctionType::get(
190 IntegerType::get(mod->getContext(), 32),
194 auto func_ptr = mod->getFunction(
"pos_step");
196 func_ptr = Function::Create(
198 GlobalValue::ExternalLinkage,
201 func_ptr->setCallingConv(CallingConv::C);
203 func_ptr->setAttributes(
209 const size_t aggr_col_count,
210 const bool hoist_literals) {
211 using namespace llvm;
213 std::vector<Type*> func_args;
214 auto i8_type = IntegerType::get(mod->getContext(), 8);
215 auto i32_type = IntegerType::get(mod->getContext(), 32);
216 auto i64_type = IntegerType::get(mod->getContext(), 64);
217 auto pi32_type = PointerType::get(i32_type, 0);
218 auto pi64_type = PointerType::get(i64_type, 0);
220 if (aggr_col_count) {
221 for (
size_t i = 0; i < aggr_col_count; ++i) {
222 func_args.push_back(pi64_type);
225 func_args.push_back(pi64_type);
226 func_args.push_back(pi64_type);
227 func_args.push_back(pi32_type);
228 func_args.push_back(pi32_type);
229 func_args.push_back(pi32_type);
230 func_args.push_back(pi32_type);
233 func_args.push_back(pi64_type);
235 func_args.push_back(i64_type);
236 func_args.push_back(pi64_type);
237 func_args.push_back(pi64_type);
238 if (hoist_literals) {
239 func_args.push_back(PointerType::get(i8_type, 0));
241 FunctionType* func_type = FunctionType::get(
246 std::string func_name{
"row_process"};
247 auto func_ptr = mod->getFunction(func_name);
250 func_ptr = Function::Create(
252 GlobalValue::ExternalLinkage,
255 func_ptr->setCallingConv(CallingConv::C);
256 func_ptr->setAttributes(
268 const size_t aggr_col_count,
269 const bool hoist_literals,
270 const bool is_estimate_query,
272 using namespace llvm;
274 auto*
const i32_type = llvm::IntegerType::get(mod->getContext(), 32);
275 auto*
const i64_type = llvm::IntegerType::get(mod->getContext(), 64);
277 llvm::Function*
const func_pos_start =
pos_start(mod);
278 CHECK(func_pos_start);
279 llvm::Function*
const func_pos_step =
pos_step(mod);
280 CHECK(func_pos_step);
282 CHECK(func_group_buff_idx);
283 llvm::Function*
const func_row_process =
284 row_process(mod, is_estimate_query ? 1 : aggr_col_count, hoist_literals);
285 CHECK(func_row_process);
287 constexpr
bool IS_GROUP_BY =
false;
288 Params query_func_params = make_params<IS_GROUP_BY>(mod, hoist_literals);
290 FunctionType* query_func_type = FunctionType::get(
291 Type::getVoidTy(mod->getContext()),
292 query_func_params.types(),
295 std::string query_template_name{
"query_template"};
296 auto query_func_ptr = mod->getFunction(query_template_name);
297 CHECK(!query_func_ptr);
299 query_func_ptr = Function::Create(
301 GlobalValue::ExternalLinkage,
304 query_func_ptr->setCallingConv(CallingConv::C);
305 query_func_ptr->setAttributes(query_func_params.attributeList());
306 query_func_params.setNames(query_func_ptr->arg_begin());
308 auto bb_entry = BasicBlock::Create(mod->getContext(),
".entry", query_func_ptr, 0);
310 BasicBlock::Create(mod->getContext(),
".loop.preheader", query_func_ptr, 0);
311 auto bb_forbody = BasicBlock::Create(mod->getContext(),
".for.body", query_func_ptr, 0);
313 BasicBlock::Create(mod->getContext(),
"._crit_edge", query_func_ptr, 0);
314 auto bb_exit = BasicBlock::Create(mod->getContext(),
".exit", query_func_ptr, 0);
317 llvm::Value*
const agg_init_val =
get_arg_by_name(query_func_ptr,
"agg_init_val");
318 std::vector<Value*> result_ptr_vec;
319 llvm::CallInst* smem_output_buffer{
nullptr};
320 if (!is_estimate_query) {
321 for (
size_t i = 0; i < aggr_col_count; ++i) {
322 auto result_ptr =
new AllocaInst(i64_type, 0,
"result", bb_entry);
324 result_ptr_vec.push_back(result_ptr);
327 auto init_smem_func = mod->getFunction(
"init_shared_mem");
328 CHECK(init_smem_func);
331 smem_output_buffer = CallInst::Create(
333 std::vector<llvm::Value*>{
335 llvm::ConstantInt::get(i32_type, aggr_col_count *
sizeof(int64_t))},
341 llvm::Value*
const row_count_ptr =
get_arg_by_name(query_func_ptr,
"row_count_ptr");
348 row_count->setName(
"row_count");
349 std::vector<Value*> agg_init_val_vec;
350 if (!is_estimate_query) {
351 for (
size_t i = 0; i < aggr_col_count; ++i) {
352 auto idx_lv = ConstantInt::get(i32_type, i);
353 auto agg_init_gep = GetElementPtrInst::CreateInBounds(
354 agg_init_val->getType()->getPointerElementType(),
359 auto agg_init_val =
new LoadInst(
362 agg_init_val_vec.push_back(agg_init_val);
363 auto init_val_st =
new StoreInst(agg_init_val, result_ptr_vec[i],
false, bb_entry);
368 CallInst*
pos_start = CallInst::Create(func_pos_start,
"pos_start", bb_entry);
369 pos_start->setCallingConv(CallingConv::C);
371 llvm::AttributeList pos_start_pal;
374 CallInst*
pos_step = CallInst::Create(func_pos_step,
"pos_step", bb_entry);
375 pos_step->setCallingConv(CallingConv::C);
377 llvm::AttributeList pos_step_pal;
378 pos_step->setAttributes(pos_step_pal);
381 if (!is_estimate_query) {
382 group_buff_idx = CallInst::Create(func_group_buff_idx,
"group_buff_idx", bb_entry);
385 llvm::AttributeList group_buff_idx_pal;
389 CastInst* pos_start_i64 =
new SExtInst(
pos_start, i64_type,
"", bb_entry);
390 ICmpInst* enter_or_not =
391 new ICmpInst(*bb_entry, ICmpInst::ICMP_SLT, pos_start_i64, row_count,
"");
392 BranchInst::Create(bb_preheader, bb_exit, enter_or_not, bb_entry);
395 CastInst* pos_step_i64 =
new SExtInst(
pos_step, i64_type,
"", bb_preheader);
396 BranchInst::Create(bb_forbody, bb_preheader);
400 PHINode* pos = PHINode::Create(i64_type, 2,
"pos", bb_forbody);
401 pos->addIncoming(pos_start_i64, bb_preheader);
402 pos->addIncoming(pos_inc_pre, bb_forbody);
404 std::vector<Value*> row_process_params;
406 row_process_params.insert(
407 row_process_params.end(), result_ptr_vec.begin(), result_ptr_vec.end());
408 if (is_estimate_query) {
409 row_process_params.push_back(
412 row_process_params.push_back(agg_init_val);
413 row_process_params.push_back(pos);
414 row_process_params.push_back(
get_arg_by_name(query_func_ptr,
"frag_row_off_ptr"));
415 row_process_params.push_back(row_count_ptr);
416 if (hoist_literals) {
417 row_process_params.push_back(
get_arg_by_name(query_func_ptr,
"literals"));
420 CallInst::Create(func_row_process, row_process_params,
"", bb_forbody);
423 llvm::AttributeList row_process_pal;
427 BinaryOperator::CreateNSW(Instruction::Add, pos, pos_step_i64,
"", bb_forbody);
428 ICmpInst* loop_or_exit =
429 new ICmpInst(*bb_forbody, ICmpInst::ICMP_SLT, pos_inc, row_count,
"");
430 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, bb_forbody);
433 std::vector<Instruction*> result_vec_pre;
434 if (!is_estimate_query) {
435 for (
size_t i = 0; i < aggr_col_count; ++i) {
442 result_vec_pre.push_back(
result);
446 BranchInst::Create(bb_exit, bb_crit_edge);
460 if (!is_estimate_query) {
461 std::vector<PHINode*> result_vec;
462 for (int64_t i = aggr_col_count - 1; i >= 0; --i) {
464 PHINode::Create(IntegerType::get(mod->getContext(), 64), 2,
"", bb_exit);
465 result->addIncoming(result_vec_pre[i], bb_crit_edge);
466 result->addIncoming(agg_init_val_vec[i], bb_entry);
467 result_vec.insert(result_vec.begin(),
result);
470 llvm::Value*
const frag_idx =
get_arg_by_name(query_func_ptr,
"frag_idx");
471 for (
size_t i = 0; i < aggr_col_count; ++i) {
472 auto col_idx = ConstantInt::get(i32_type, i);
474 auto target_addr = GetElementPtrInst::CreateInBounds(
475 smem_output_buffer->getType()->getPointerElementType(),
482 auto agg_func = mod->getFunction(
"agg_sum_shared");
485 agg_func, std::vector<llvm::Value*>{target_addr, result_vec[i]},
"", bb_exit);
487 auto out_gep = GetElementPtrInst::CreateInBounds(
488 out->getType()->getPointerElementType(), out, col_idx,
"", bb_exit);
492 auto slot_idx = BinaryOperator::CreateAdd(
494 BinaryOperator::CreateMul(frag_idx,
pos_step,
"", bb_exit),
497 auto target_addr = GetElementPtrInst::CreateInBounds(
498 col_buffer->getType()->getPointerElementType(),
503 StoreInst* result_st =
new StoreInst(result_vec[i], target_addr,
false, bb_exit);
509 auto sync_thread_func = mod->getFunction(
"sync_threadblock");
510 CHECK(sync_thread_func);
511 CallInst::Create(sync_thread_func, std::vector<llvm::Value*>{},
"", bb_exit);
512 auto reduce_smem_to_gmem_func = mod->getFunction(
"write_back_non_grouped_agg");
513 CHECK(reduce_smem_to_gmem_func);
517 for (
size_t i = 0; i < aggr_col_count; i++) {
519 GetElementPtrInst::CreateInBounds(out->getType()->getPointerElementType(),
521 ConstantInt::get(i32_type, i),
530 reduce_smem_to_gmem_func,
531 std::vector<llvm::Value*>{
532 smem_output_buffer, gmem_output_buffer, ConstantInt::get(i32_type, i)},
539 ReturnInst::Create(mod->getContext(), bb_exit);
542 pos_inc_pre->replaceAllUsesWith(pos_inc);
545 if (verifyFunction(*query_func_ptr, &llvm::errs())) {
546 LOG(
FATAL) <<
"Generated invalid code.";
555 const bool hoist_literals,
558 const bool check_scan_limit,
563 using namespace llvm;
565 auto*
const i32_type = llvm::IntegerType::get(mod->getContext(), 32);
566 auto*
const i64_type = llvm::IntegerType::get(mod->getContext(), 64);
568 llvm::Function*
const func_pos_start =
pos_start(mod);
569 CHECK(func_pos_start);
570 llvm::Function*
const func_pos_step =
pos_step(mod);
571 CHECK(func_pos_step);
573 CHECK(func_group_buff_idx);
574 llvm::Function*
const func_row_process =
row_process(mod, 0, hoist_literals);
575 CHECK(func_row_process);
576 llvm::Function*
const func_init_shared_mem =
578 : mod->getFunction(
"init_shared_mem_nop");
579 CHECK(func_init_shared_mem);
581 auto func_write_back = mod->getFunction(
"write_back_nop");
582 CHECK(func_write_back);
584 constexpr
bool IS_GROUP_BY =
true;
585 Params query_func_params = make_params<IS_GROUP_BY>(mod, hoist_literals);
587 FunctionType* query_func_type = FunctionType::get(
588 Type::getVoidTy(mod->getContext()),
589 query_func_params.types(),
592 std::string query_name{
"query_group_by_template"};
593 auto query_func_ptr = mod->getFunction(query_name);
594 CHECK(!query_func_ptr);
596 query_func_ptr = Function::Create(
598 GlobalValue::ExternalLinkage,
599 "query_group_by_template",
601 query_func_ptr->setCallingConv(CallingConv::C);
602 query_func_ptr->setAttributes(query_func_params.attributeList());
603 query_func_params.setNames(query_func_ptr->arg_begin());
605 auto bb_entry = BasicBlock::Create(mod->getContext(),
".entry", query_func_ptr, 0);
607 BasicBlock::Create(mod->getContext(),
".loop.preheader", query_func_ptr, 0);
608 auto bb_forbody = BasicBlock::Create(mod->getContext(),
".forbody", query_func_ptr, 0);
610 BasicBlock::Create(mod->getContext(),
"._crit_edge", query_func_ptr, 0);
611 auto bb_exit = BasicBlock::Create(mod->getContext(),
".exit", query_func_ptr, 0);
614 llvm::Value*
const row_count_ptr =
get_arg_by_name(query_func_ptr,
"row_count_ptr");
615 LoadInst* row_count =
new LoadInst(
618 row_count->setName(
"row_count");
620 llvm::Value*
const max_matched_ptr =
get_arg_by_name(query_func_ptr,
"max_matched_ptr");
621 LoadInst* max_matched =
new LoadInst(
625 auto crt_matched_ptr =
new AllocaInst(i32_type, 0,
"crt_matched", bb_entry);
626 auto old_total_matched_ptr =
new AllocaInst(i32_type, 0,
"old_total_matched", bb_entry);
627 CallInst*
pos_start = CallInst::Create(func_pos_start,
"", bb_entry);
628 pos_start->setCallingConv(CallingConv::C);
630 llvm::AttributeList pos_start_pal;
633 CallInst*
pos_step = CallInst::Create(func_pos_step,
"", bb_entry);
634 pos_step->setCallingConv(CallingConv::C);
636 llvm::AttributeList pos_step_pal;
637 pos_step->setAttributes(pos_step_pal);
639 CallInst* group_buff_idx_call = CallInst::Create(func_group_buff_idx,
"", bb_entry);
640 group_buff_idx_call->setCallingConv(CallingConv::C);
641 group_buff_idx_call->setTailCall(
true);
642 llvm::AttributeList group_buff_idx_pal;
643 group_buff_idx_call->setAttributes(group_buff_idx_pal);
646 auto*
const group_by_buffers =
get_arg_by_name(query_func_ptr,
"group_by_buffers");
647 const PointerType* Ty = dyn_cast<PointerType>(group_by_buffers->getType());
650 Value* varlen_output_buffer{
nullptr};
654 auto varlen_output_buffer_gep = GetElementPtrInst::Create(
655 Ty->getPointerElementType(),
657 llvm::ConstantInt::get(llvm::Type::getInt32Ty(mod->getContext()), 0),
660 varlen_output_buffer =
662 varlen_output_buffer_gep,
663 "varlen_output_buffer",
670 llvm::ConstantInt::get(llvm::Type::getInt32Ty(mod->getContext()), 1),
671 "group_buff_idx_varlen_offset",
674 varlen_output_buffer =
675 ConstantPointerNull::get(Type::getInt64PtrTy(mod->getContext()));
677 CHECK(varlen_output_buffer);
679 CastInst* pos_start_i64 =
new SExtInst(
pos_start, i64_type,
"", bb_entry);
680 GetElementPtrInst* group_by_buffers_gep = GetElementPtrInst::Create(
681 Ty->getPointerElementType(), group_by_buffers,
group_buff_idx,
"", bb_entry);
683 group_by_buffers_gep,
687 col_buffer->setName(
"col_buffer");
690 llvm::ConstantInt* shared_mem_bytes_lv =
693 llvm::CallInst* result_buffer =
694 CallInst::Create(func_init_shared_mem,
695 std::vector<llvm::Value*>{col_buffer, shared_mem_bytes_lv},
699 ICmpInst* enter_or_not =
700 new ICmpInst(*bb_entry, ICmpInst::ICMP_SLT, pos_start_i64, row_count,
"");
701 BranchInst::Create(bb_preheader, bb_exit, enter_or_not, bb_entry);
704 CastInst* pos_step_i64 =
new SExtInst(
pos_step, i64_type,
"", bb_preheader);
705 BranchInst::Create(bb_forbody, bb_preheader);
709 PHINode* pos = PHINode::Create(i64_type, check_scan_limit ? 3 : 2,
"pos", bb_forbody);
711 std::vector<Value*> row_process_params;
712 row_process_params.push_back(result_buffer);
713 row_process_params.push_back(varlen_output_buffer);
714 row_process_params.push_back(crt_matched_ptr);
715 row_process_params.push_back(
get_arg_by_name(query_func_ptr,
"total_matched"));
716 row_process_params.push_back(old_total_matched_ptr);
717 row_process_params.push_back(max_matched_ptr);
718 row_process_params.push_back(
get_arg_by_name(query_func_ptr,
"agg_init_val"));
719 row_process_params.push_back(pos);
720 row_process_params.push_back(
get_arg_by_name(query_func_ptr,
"frag_row_off_ptr"));
721 row_process_params.push_back(row_count_ptr);
722 if (hoist_literals) {
723 row_process_params.push_back(
get_arg_by_name(query_func_ptr,
"literals"));
725 if (check_scan_limit) {
726 new StoreInst(ConstantInt::get(IntegerType::get(mod->getContext(), 32), 0),
731 CallInst::Create(func_row_process, row_process_params,
"", bb_forbody);
734 llvm::AttributeList row_process_pal;
739 auto func_sync_warp_protected = mod->getFunction(
"sync_warp_protected");
740 CHECK(func_sync_warp_protected);
741 CallInst::Create(func_sync_warp_protected,
742 std::vector<llvm::Value*>{pos, row_count},
748 BinaryOperator::Create(Instruction::Add, pos, pos_step_i64,
"", bb_forbody);
749 ICmpInst* loop_or_exit =
750 new ICmpInst(*bb_forbody, ICmpInst::ICMP_SLT, pos_inc, row_count,
"");
751 if (check_scan_limit) {
757 auto filter_match = BasicBlock::Create(
758 mod->getContext(),
"filter_match", query_func_ptr, bb_crit_edge);
759 llvm::Value* new_total_matched =
761 old_total_matched_ptr,
766 BinaryOperator::CreateAdd(new_total_matched, crt_matched,
"", filter_match);
767 CHECK(new_total_matched);
768 ICmpInst* limit_not_reached =
new ICmpInst(*filter_match,
772 "limit_not_reached");
776 BinaryOperator::Create(
777 BinaryOperator::And, loop_or_exit, limit_not_reached,
"", filter_match),
779 auto filter_nomatch = BasicBlock::Create(
780 mod->getContext(),
"filter_nomatch", query_func_ptr, bb_crit_edge);
781 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, filter_nomatch);
782 ICmpInst* crt_matched_nz =
new ICmpInst(
783 *bb_forbody, ICmpInst::ICMP_NE, crt_matched, ConstantInt::get(i32_type, 0),
"");
784 BranchInst::Create(filter_match, filter_nomatch, crt_matched_nz, bb_forbody);
785 pos->addIncoming(pos_start_i64, bb_preheader);
786 pos->addIncoming(pos_pre, filter_match);
787 pos->addIncoming(pos_pre, filter_nomatch);
789 pos->addIncoming(pos_start_i64, bb_preheader);
790 pos->addIncoming(pos_pre, bb_forbody);
791 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, bb_forbody);
795 BranchInst::Create(bb_exit, bb_crit_edge);
798 CallInst::Create(func_write_back,
799 std::vector<Value*>{col_buffer, result_buffer, shared_mem_bytes_lv},
803 ReturnInst::Create(mod->getContext(), bb_exit);
806 pos_pre->replaceAllUsesWith(pos_inc);
809 if (verifyFunction(*query_func_ptr, &llvm::errs())) {
810 LOG(
FATAL) <<
"Generated invalid code. ";
void addAttributes(unsigned const index, ATTRS const ...attrs)
llvm::Function * pos_start(llvm::Module *mod)
llvm::SmallVector< llvm::AttributeList, NTYPES+1 > attrs_
llvm::Function * row_process(llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals)
llvm::Function * pos_step(llvm::Module *mod)
bool hasVarlenOutput() const
size_t getSharedMemorySize() const
llvm::Function * group_buff_idx(llvm::Module *mod)
llvm::AttributeList make_attribute_list(llvm::Module const *const mod, unsigned const index, ATTRS const ...attrs)
std::tuple< llvm::Function *, llvm::CallInst * > query_template(llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query, const GpuSharedMemoryContext &gpu_smem_context)
AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs)
Type pointer_type(const Type pointee)
#define LLVM_ALIGN(alignment)
bool isSharedMemoryUsed() const
std::tuple< llvm::Function *, llvm::CallInst * > query_group_by_template(llvm::Module *mod, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit, const GpuSharedMemoryContext &gpu_smem_context)
llvm::Function * default_func_builder(llvm::Module *mod, const std::string &name)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Params(llvm::Module const *const mod)
Params< NTYPES > make_params(llvm::Module const *const mod, bool const hoist_literals)
llvm::Module const *const mod_
llvm::SmallVector< char const *, NTYPES > names_
bool isWarpSyncRequired(const ExecutorDeviceType) const
void setNames(llvm::Function::arg_iterator itr) const
llvm::SmallVector< llvm::Type *, NTYPES > types_
llvm::Type * get_pointer_element_type(llvm::Value *value)
llvm::AttributeList attributeList() const
void pushBack(llvm::Type *const type, char const *const name, ATTRS const ...attrs)