33 std::static_pointer_cast<Analyzer::ColumnVar>(column->
deep_copy()));
41 result.insert(next_result.begin(), next_result.end());
63 UsedColumnExpressions visitor;
64 const auto used_columns = visitor.visit(expr);
65 std::list<std::shared_ptr<const InputColDescriptor>> global_col_ids;
66 for (
const auto& used_column : used_columns) {
67 const auto& table_key = used_column.first.getScanDesc().getTableKey();
68 global_col_ids.push_back(std::make_shared<InputColDescriptor>(
69 used_column.first.getColId(),
72 used_column.first.getScanDesc().getNestLevel()));
80 const bool fetch_columns,
85 const auto used_columns =
prepare(expr);
86 std::vector<llvm::Type*> arg_types(plan_state_->global_to_local_col_ids_.size() + 1);
87 std::vector<std::shared_ptr<Analyzer::ColumnVar>> inputs(arg_types.size() - 1);
88 auto& ctx =
module_->getContext();
89 for (
const auto& kv : plan_state_->global_to_local_col_ids_) {
90 size_t arg_idx = kv.second;
92 const auto it = used_columns.find(kv.first);
93 const auto col_expr = it->second;
94 inputs[arg_idx] = col_expr;
95 const auto& ti = col_expr->get_type_info();
100 auto ft = llvm::FunctionType::get(
get_int_type(32, ctx), arg_types,
false);
101 auto scalar_expr_func = llvm::Function::Create(
102 ft, llvm::Function::ExternalLinkage,
"scalar_expr",
module_.get());
103 auto bb_entry = llvm::BasicBlock::Create(ctx,
".entry", scalar_expr_func, 0);
110 const auto expr_lvs =
codegen(expr, fetch_columns, co);
111 CHECK_EQ(expr_lvs.size(), size_t(1));
116 std::vector<llvm::Type*> wrapper_arg_types(arg_types.size() + 1);
117 wrapper_arg_types[0] = llvm::PointerType::get(
get_int_type(32, ctx), 0);
118 wrapper_arg_types[1] = arg_types[0];
119 for (
size_t i = 1; i < arg_types.size(); ++i) {
120 wrapper_arg_types[i + 1] = llvm::PointerType::get(arg_types[i], 0);
123 llvm::FunctionType::get(llvm::Type::getVoidTy(ctx), wrapper_arg_types,
false);
124 auto wrapper_scalar_expr_func =
125 llvm::Function::Create(wrapper_ft,
126 llvm::Function::ExternalLinkage,
127 "wrapper_scalar_expr",
129 auto wrapper_bb_entry =
130 llvm::BasicBlock::Create(ctx,
".entry", wrapper_scalar_expr_func, 0);
131 llvm::IRBuilder<> b(ctx);
132 b.SetInsertPoint(wrapper_bb_entry);
133 std::vector<llvm::Value*> loaded_args = {wrapper_scalar_expr_func->arg_begin() + 1};
134 for (
size_t i = 2; i < wrapper_arg_types.size(); ++i) {
135 auto* value = wrapper_scalar_expr_func->arg_begin() + i;
136 loaded_args.push_back(
137 b.CreateLoad(value->getType()->getPointerElementType(), value));
139 auto error_lv = b.CreateCall(scalar_expr_func, loaded_args);
140 b.CreateStore(error_lv, wrapper_scalar_expr_func->arg_begin());
142 return {scalar_expr_func, wrapper_scalar_expr_func, inputs};
144 return {scalar_expr_func,
nullptr, inputs};
164 LOG(
FATAL) <<
"Invalid device type";
172 const bool fetch_column,
182 llvm::Function* func,
183 llvm::Function* wrapper_func,
190 cuda_mgr_ = std::make_unique<CudaMgr_Namespace::CudaMgr>(0);
201 {func, wrapper_func},
ScalarCodeGenerator::ColumnMap visitColumnVar(const Analyzer::ColumnVar *column) const override
CompiledExpression compile(const Analyzer::Expr *expr, const bool fetch_columns, const CompilationOptions &co)
std::vector< llvm::Value * > codegenColumn(const Analyzer::ColumnVar *, const bool fetch_column, const CompilationOptions &) override
llvm::Type * llvm_type_from_sql(const SQLTypeInfo &ti, llvm::LLVMContext &ctx)
std::unordered_map< shared::TableKey, const ColumnDescriptor * > DeletedColumnsMap
std::unique_ptr< PlanState > own_plan_state_
ExecutionEngineWrapper execution_engine_
llvm::ExecutionEngine * get()
llvm::IRBuilder ir_builder_
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
std::shared_ptr< GpuCompilationContext > gpu_compilation_context_
std::shared_ptr< Analyzer::Expr > deep_copy() const override
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
HOST DEVICE SQLTypes get_type() const
std::unordered_map< InputColDescriptor, std::shared_ptr< Analyzer::ColumnVar >> ColumnMap
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Function * wrapper_func
const CudaMgr_Namespace::CudaMgr * cuda_mgr
llvm::Function * row_func_
std::unique_ptr< CudaMgr_Namespace::CudaMgr > cuda_mgr_
llvm::TargetMachine * nvptx_target_machine
int getLocalColumnId(const Analyzer::ColumnVar *col_var, const bool fetch_column)
std::vector< InputTableInfo > g_table_infos
bool row_func_not_inlined
ScalarCodeGenerator::ColumnMap aggregateResult(const ScalarCodeGenerator::ColumnMap &aggregate, const ScalarCodeGenerator::ColumnMap &next_result) const override
void allocateLocalColumnIds(const std::list< std::shared_ptr< const InputColDescriptor >> &global_col_ids)
const SQLTypeInfo & get_type_info() const
ExecutorDeviceType device_type
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
const shared::ColumnKey & getColumnKey() const
static std::shared_ptr< GpuCompilationContext > generateNativeGPUCode(Executor *executor, llvm::Function *func, llvm::Function *wrapper_func, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co, const GPUTarget &gpu_target)
ColumnMap prepare(const Analyzer::Expr *)
std::unique_ptr< CgenState > own_cgen_state_
std::vector< void * > generateNativeGPUCode(Executor *executor, llvm::Function *func, llvm::Function *wrapper_func, const CompilationOptions &co)
std::unique_ptr< llvm::Module > module_
std::vector< void * > generateNativeCode(Executor *executor, const CompiledExpression &compiled_expression, const CompilationOptions &co)
int32_t get_rte_idx() const
static std::unique_ptr< llvm::TargetMachine > initializeNVPTXBackend(const CudaMgr_Namespace::NvidiaDeviceArch arch)