22 #include <llvm/IR/InstIterator.h>
23 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
24 #include <llvm/Transforms/Utils/Cloning.h>
27 const bool contains_left_deep_outer_join,
29 : executor_id_(executor->getExecutorId())
32 , filter_func_(nullptr)
33 , current_func_(nullptr)
34 , row_func_bb_(nullptr)
35 , filter_func_bb_(nullptr)
36 , row_func_call_(nullptr)
37 , filter_func_call_(nullptr)
38 , context_(executor->getContext())
39 , ir_builder_(context_)
40 , contains_left_deep_outer_join_(contains_left_deep_outer_join)
41 , outer_join_match_found_per_level_(std::max(num_query_infos, size_t(1)) - 1)
42 , needs_error_check_(
false)
44 , query_func_(nullptr)
45 , query_func_entry_ir_builder_(context_){};
48 const bool contains_left_deep_outer_join)
50 contains_left_deep_outer_join,
51 Executor::getExecutor(Executor::UNITARY_EXECUTOR_ID).get()) {}
54 : executor_id_(Executor::INVALID_EXECUTOR_ID)
58 , ir_builder_(context_)
59 , contains_left_deep_outer_join_(
false)
60 , needs_error_check_(
false)
62 , query_func_(nullptr)
63 , query_func_entry_ir_builder_(context_){};
72 return llInt(int64_t(0));
98 return llInt(int64_t(0));
122 const size_t byte_width,
123 const bool is_signed) {
124 int64_t max_int{0}, min_int{0};
128 uint64_t max_uint{0}, min_uint{0};
130 max_int =
static_cast<int64_t
>(max_uint);
133 switch (byte_width) {
135 return std::make_pair(::
ll_int(static_cast<int8_t>(max_int),
context_),
138 return std::make_pair(::
ll_int(static_cast<int16_t>(max_int),
context_),
141 return std::make_pair(::
ll_int(static_cast<int32_t>(max_int),
context_),
151 auto src_bits = val->getType()->getScalarSizeInBits();
152 if (src_bits == dst_bits) {
155 if (val->getType()->isIntegerTy()) {
160 if (val->getType()->isPointerTy()) {
164 CHECK(val->getType()->isFloatTy() || val->getType()->isDoubleTy());
169 dst_type = llvm::Type::getDoubleTy(
context_);
172 dst_type = llvm::Type::getFloatTy(
context_);
183 if (!fn->isDeclaration()) {
188 auto func_impl =
getExecutor()->get_rt_module()->getFunction(fn->getName());
189 CHECK(func_impl) << fn->getName().str();
191 if (func_impl->isDeclaration()) {
195 auto DestI = fn->arg_begin();
196 for (
auto arg_it = func_impl->arg_begin(); arg_it != func_impl->arg_end(); ++arg_it) {
197 DestI->setName(arg_it->getName());
198 vmap_[&*arg_it] = &*DestI++;
201 llvm::SmallVector<llvm::ReturnInst*, 8> Returns;
202 #if LLVM_VERSION_MAJOR > 12
203 llvm::CloneFunctionInto(
204 fn, func_impl,
vmap_, llvm::CloneFunctionChangeType::DifferentModule, Returns);
206 llvm::CloneFunctionInto(fn, func_impl,
vmap_,
true, Returns);
209 for (
auto it = llvm::inst_begin(fn), e = llvm::inst_end(fn); it != e; ++it) {
210 if (llvm::isa<llvm::CallInst>(*it)) {
211 auto& call = llvm::cast<llvm::CallInst>(*it);
218 const std::vector<llvm::Value*>&
args) {
220 auto func =
module_->getFunction(fname);
221 CHECK(func) << fname;
230 const std::vector<llvm::Value*>&
args) {
232 auto func =
module_->getFunction(fname);
242 llvm::Value* errorCode,
247 ir_builder_.CreateCondBr(condition, check_ok, check_fail);
256 template <
typename T>
257 llvm::Type*
getTy(llvm::LLVMContext& ctx) {
return getTy<std::remove_pointer_t<T>>(ctx)->getPointerTo(); }
275 virtual llvm::FunctionCallee getFunction(llvm::Module* llvm_module,
276 llvm::LLVMContext& context)
const = 0;
280 template <
typename... TYPES>
285 llvm::LLVMContext& context)
const override {
286 return llvm_module->getOrInsertFunction(name_, getTy<TYPES>(context)...);
290 static const std::unordered_map<std::string, std::shared_ptr<GpuFunctionDefinition>>
292 {
"asin", std::make_shared<GpuFunction<double, double>>(
"Asin")},
293 {
"atanh", std::make_shared<GpuFunction<double, double>>(
"Atanh")},
294 {
"atan", std::make_shared<GpuFunction<double, double>>(
"Atan")},
295 {
"cosh", std::make_shared<GpuFunction<double, double>>(
"Cosh")},
296 {
"cos", std::make_shared<GpuFunction<double, double>>(
"Cos")},
297 {
"exp", std::make_shared<GpuFunction<double, double>>(
"Exp")},
298 {
"log", std::make_shared<GpuFunction<double, double>>(
"ln")},
299 {
"pow", std::make_shared<GpuFunction<double, double, double>>(
"power")},
300 {
"sinh", std::make_shared<GpuFunction<double, double>>(
"Sinh")},
301 {
"sin", std::make_shared<GpuFunction<double, double>>(
"Sin")},
302 {
"sqrt", std::make_shared<GpuFunction<double, double>>(
"Sqrt")},
303 {
"tan", std::make_shared<GpuFunction<double, double>>(
"Tan")}};
307 std::vector<std::string> ret;
310 CHECK(!fn->isDeclaration());
312 for (
auto& basic_block : *fn) {
313 auto& inst_list = basic_block.getInstList();
314 for (
auto inst_itr = inst_list.begin(); inst_itr != inst_list.end(); ++inst_itr) {
315 if (
auto call_inst = llvm::dyn_cast<llvm::CallInst>(inst_itr)) {
317 CHECK(called_func_name);
321 ret.emplace_back(*called_func_name);
330 llvm::Function* fn) {
332 CHECK(!fn->isDeclaration());
336 throw QueryMustRunOnCpu(
"Codegen failed: Could not find replacement functon for " +
338 " to run on gpu. Query step must run in cpu mode.");
340 const auto& gpu_fcn_obj = map_it->second;
342 VLOG(1) <<
"Replacing " << fcn_to_replace <<
" with " << gpu_fcn_obj->name_
343 <<
" for parent function " << fn->getName().str();
345 for (
auto& basic_block : *fn) {
346 auto& inst_list = basic_block.getInstList();
347 for (
auto inst_itr = inst_list.begin(); inst_itr != inst_list.end(); ++inst_itr) {
348 if (
auto call_inst = llvm::dyn_cast<llvm::CallInst>(inst_itr)) {
350 if (called_func && called_func->getName().str() == fcn_to_replace) {
351 std::vector<llvm::Value*>
args;
352 std::vector<llvm::Type*> arg_types;
353 for (
auto& arg : call_inst->args()) {
354 arg_types.push_back(arg.get()->getType());
355 args.push_back(arg.get());
359 auto gpu_func_type = gpu_func.getFunctionType();
360 CHECK(gpu_func_type);
361 CHECK_EQ(gpu_func_type->getReturnType(), called_func->getReturnType());
362 llvm::ReplaceInstWithInst(call_inst,
363 llvm::CallInst::Create(gpu_func, args,
""));
383 llvm::CloneModule(*llvm_module,
vmap_, [always_clone](
const llvm::GlobalValue* gv) {
384 auto func = llvm::dyn_cast<llvm::Function>(gv);
388 return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
389 func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage ||
396 const std::string& fname,
398 const std::vector<llvm::Value*>
args,
399 const std::vector<llvm::Attribute::AttrKind>& fnattrs,
400 const bool has_struct_return) {
401 std::vector<llvm::Type*> arg_types;
402 for (
const auto arg : args) {
404 arg_types.push_back(arg->getType());
406 auto func_ty = llvm::FunctionType::get(ret_type, arg_types,
false);
407 llvm::AttributeList attrs;
408 if (!fnattrs.empty()) {
409 std::vector<std::pair<unsigned, llvm::Attribute>> indexedAttrs;
410 indexedAttrs.reserve(fnattrs.size());
411 for (
auto attr : fnattrs) {
412 indexedAttrs.emplace_back(llvm::AttributeList::FunctionIndex,
413 llvm::Attribute::get(
context_, attr));
415 attrs = llvm::AttributeList::get(
context_,
416 {&indexedAttrs.front(), indexedAttrs.size()});
419 auto func_p =
module_->getOrInsertFunction(fname, func_ty, attrs);
421 auto callee = func_p.getCallee();
422 llvm::Function* func{
nullptr};
423 if (
auto callee_cast = llvm::dyn_cast<llvm::ConstantExpr>(callee)) {
426 CHECK(callee_cast->isCast());
427 CHECK_EQ(callee_cast->getNumOperands(), size_t(1));
428 func = llvm::dyn_cast<llvm::Function>(callee_cast->getOperand(0));
430 func = llvm::dyn_cast<llvm::Function>(callee);
433 llvm::FunctionType* func_type = func_p.getFunctionType();
435 if (has_struct_return) {
436 const auto arg_ti = func_type->getParamType(0);
437 CHECK(arg_ti->isPointerTy() && arg_ti->getPointerElementType()->isStructTy());
438 auto attr_list = func->getAttributes();
439 #if 14 <= LLVM_VERSION_MAJOR
440 llvm::AttrBuilder arr_arg_builder(
context_, attr_list.getParamAttrs(0));
442 llvm::AttrBuilder arr_arg_builder(attr_list.getParamAttributes(0));
444 arr_arg_builder.addAttribute(llvm::Attribute::StructRet);
445 func->addParamAttrs(0, arr_arg_builder);
449 CHECK_EQ(result->getType(), ret_type);
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
llvm::Value * castToTypeIn(llvm::Value *val, const size_t bit_width)
llvm::Value * emitEntryCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::LLVMContext & getExecutorContext() const
std::shared_ptr< Executor > getExecutor() const
void maybeCloneFunctionRecursive(llvm::Function *fn)
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
llvm::IRBuilder ir_builder_
static constexpr ExecutorId INVALID_EXECUTOR_ID
llvm::Type * getTy(llvm::LLVMContext &ctx)
HOST DEVICE SQLTypes get_type() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
GpuFunctionDefinition(char const *name)
llvm::Type * getTy< double >(llvm::LLVMContext &ctx)
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
static const std::unordered_map< std::string, std::shared_ptr< GpuFunctionDefinition > > gpu_replacement_functions
llvm::LLVMContext & context_
llvm::Function * current_func_
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
llvm::FunctionCallee getFunction(llvm::Module *llvm_module, llvm::LLVMContext &context) const override
void replaceFunctionForGpu(const std::string &fcn_to_replace, llvm::Function *fn)
llvm::ConstantFP * llFp(const float v) const
std::vector< std::string > gpuFunctionsToReplace(llvm::Function *fn)
llvm::IRBuilder query_func_entry_ir_builder_
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
std::pair< uint64_t, uint64_t > inline_uint_max_min(const size_t byte_width)
llvm::Constant * inlineNull(const SQLTypeInfo &)
void set_module_shallow_copy(const std::unique_ptr< llvm::Module > &module, bool always_clone=false)
llvm::Function * findCalledFunction(llvm::CallInst &call_inst)
HOST DEVICE EncodingType get_compression() const
static bool alwaysCloneRuntimeFunction(const llvm::Function *func)
void emitErrorCheck(llvm::Value *condition, llvm::Value *errorCode, std::string label)
llvm::ConstantInt * llInt(const T v) const
bool g_enable_watchdog false
llvm::ValueToValueMapTy vmap_
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::pair< int64_t, int64_t > inline_int_max_min(const size_t byte_width)
CgenState(const size_t num_query_infos, const bool contains_left_deep_outer_join, Executor *executor)
std::pair< llvm::ConstantInt *, llvm::ConstantInt * > inlineIntMaxMin(const size_t byte_width, const bool is_signed)
GpuFunction(char const *name)
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)