26 #include <tbb/parallel_for.h>
31 const std::vector<std::vector<DecisionTreeEntry>>& decision_trees,
32 const std::vector<int64_t>& decision_tree_offsets,
33 const bool compute_avg)
40 , num_trees_(decision_trees.size())
41 , compute_avg_(compute_avg) {
65 const std::vector<std::vector<DecisionTreeEntry>>& decision_trees,
66 const std::vector<int64_t>& decision_tree_offsets) {
68 const size_t num_trees = decision_trees.size();
70 CHECK_EQ(num_trees, decision_tree_offsets.size() - 1);
71 const size_t num_tree_entries = decision_tree_offsets[num_trees];
77 executor_->getRowSetMemoryOwner()->allocate(decision_tree_offsets_size_bytes_);
80 reinterpret_cast<const int8_t*>(decision_tree_offsets.data()),
81 decision_tree_offsets_size_bytes_);
84 tbb::blocked_range<size_t>(0, num_trees), [&](
const tbb::blocked_range<size_t>& r) {
85 const auto start_tree_idx = r.begin();
86 const auto end_tree_idx = r.end();
87 for (
size_t tree_idx = start_tree_idx; tree_idx < end_tree_idx; ++tree_idx) {
90 reinterpret_cast<const int8_t*>(decision_trees[tree_idx].data()),
100 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
107 auto decision_tree_table_device_buffer =
reinterpret_cast<const int8_t*
>(
109 auto decision_tree_offsets_device_buffer =
reinterpret_cast<const int8_t*
>(
112 reinterpret_cast<CUdeviceptr>(decision_tree_table_device_buffer),
118 reinterpret_cast<CUdeviceptr>(decision_tree_offsets_device_buffer),
135 std::pair<std::vector<std::shared_ptr<const Analyzer::Constant>>,
136 std::vector<const Analyzer::Constant*>>
138 const std::vector<const int8_t*>& kernel_buffers,
139 const bool hoist_literals) {
140 std::vector<std::shared_ptr<const Analyzer::Constant>> kernel_buffer_constants_owned;
141 std::vector<const Analyzer::Constant*> kernel_buffer_constants;
142 for (
const auto kernel_buffer : kernel_buffers) {
143 const int64_t kernel_buffer_handle =
reinterpret_cast<int64_t
>(kernel_buffer);
144 const auto kernel_buffer_handle_literal =
148 kernel_buffer_handle_literal->get_type_info().get_compression());
149 kernel_buffer_constants_owned.push_back(kernel_buffer_handle_literal);
150 kernel_buffer_constants.push_back(kernel_buffer_handle_literal.get());
152 CHECK_GE(kernel_buffer_constants.size(), 1UL);
153 CHECK(hoist_literals || kernel_buffer_constants.size() == 1UL);
155 return std::make_pair(kernel_buffer_constants_owned, kernel_buffer_constants);
159 const std::vector<llvm::Value*>& regressor_inputs,
170 auto cgen_state_ptr =
executor_->getCgenStatePtr();
173 const auto [decision_tree_table_constants_owned, decision_tree_table_constants] =
177 const auto [decision_tree_offsets_constants_owned, decision_tree_offsets_constants] =
183 const auto decision_tree_table_handle_lvs =
187 : code_generator.
codegen(decision_tree_table_constants[0],
false, co);
189 const auto decision_tree_offsets_handle_lvs =
193 : code_generator.
codegen(decision_tree_offsets_constants[0],
false, co);
195 auto& builder = cgen_state_ptr->ir_builder_;
196 const int32_t num_regressors =
static_cast<int32_t
>(regressor_inputs.size());
197 auto regressor_ty = llvm::Type::getDoubleTy(cgen_state_ptr->context_);
198 llvm::ArrayType* regressor_arr_type =
199 llvm::ArrayType::get(regressor_ty, num_regressors);
200 auto regressor_local_storage_lv =
201 builder.CreateAlloca(regressor_arr_type,
nullptr,
"Regressor_Local_Storage");
202 auto idx_lv = cgen_state_ptr->llInt(0);
203 auto regressor_local_storage_gep = llvm::GetElementPtrInst::CreateInBounds(
204 regressor_local_storage_lv->getType()->getScalarType()->getPointerElementType(),
205 regressor_local_storage_lv,
208 builder.GetInsertBlock());
209 for (int32_t reg_idx = 0; reg_idx < num_regressors; ++reg_idx) {
210 auto reg_ptr = builder.CreateGEP(
211 regressor_local_storage_lv->getType()->getScalarType()->getPointerElementType(),
212 regressor_local_storage_lv,
213 {cgen_state_ptr->llInt(0), cgen_state_ptr->llInt(reg_idx)},
215 builder.CreateStore(regressor_inputs[reg_idx], reg_ptr);
219 return cgen_state_ptr->emitCall(
220 "tree_model_reg_predict",
221 {regressor_local_storage_gep,
222 cgen_state_ptr->castToTypeIn(decision_tree_table_handle_lvs.front(), 64),
223 cgen_state_ptr->castToTypeIn(decision_tree_offsets_handle_lvs.front(), 64),
224 cgen_state_ptr->llInt(num_regressors),
227 cgen_state_ptr->llFp(translated_null_value)});
void createKernelBuffers()
int64_t decision_tree_table_size_bytes_
int8_t * host_decision_tree_offsets_
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
TreeModelPredictionMgr(const Data_Namespace::MemoryLevel memory_level, Executor *executor, const std::vector< std::vector< DecisionTreeEntry >> &decision_trees, const std::vector< int64_t > &decision_tree_offsets, const bool compute_avg)
llvm::Value * codegen(const std::vector< llvm::Value * > ®ressor_inputs, const CompilationOptions &co) const
Data_Namespace::DataMgr * getDataMgr() const
std::vector< llvm::Value * > codegenHoistedConstants(const std::vector< const Analyzer::Constant * > &constants, const EncodingType enc_type, const shared::StringDictKey &dict_id)
std::vector< Data_Namespace::AbstractBuffer * > decision_tree_table_device_buffers_
int8_t * host_decision_tree_table_
Data_Namespace::DataMgr * data_mgr_
int64_t decision_tree_offsets_size_bytes_
std::vector< const int8_t * > kernel_decision_tree_tables_
Classes representing a parse tree.
~TreeModelPredictionMgr()
ExecutorDeviceType device_type
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
std::pair< std::vector< std::shared_ptr< const Analyzer::Constant > >, std::vector< const Analyzer::Constant * > > generate_kernel_buffer_constants(CgenState *cgen_state_ptr, const std::vector< const int8_t * > &kernel_buffers, const bool hoist_literals)
constexpr double inline_fp_null_value< double >()
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
std::vector< Data_Namespace::AbstractBuffer * > decision_tree_offsets_device_buffers_
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
void copy_to_nvidia_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
std::vector< const int8_t * > kernel_decision_tree_offsets_
#define DEBUG_TIMER(name)
void allocateAndPopulateHostBuffers(const std::vector< std::vector< DecisionTreeEntry >> &decision_trees, const std::vector< int64_t > &decision_tree_offsets)
device_count_(device_count)
Allocate GPU memory using GpuBuffers via DataMgr.
void free(AbstractBuffer *buffer)
const Data_Namespace::MemoryLevel memory_level_
memory_level_(memory_level)