17 #ifndef QUERYENGINE_EXECUTE_H
18 #define QUERYENGINE_EXECUTE_H
22 #include <condition_variable>
32 #include <unordered_map>
33 #include <unordered_set>
35 #include <llvm/IR/Function.h>
36 #include <llvm/IR/Value.h>
37 #include <llvm/Linker/Linker.h>
38 #include <llvm/Transforms/Utils/ValueMapper.h>
39 #include <rapidjson/document.h>
104 const std::string& query_str,
105 const std::string& submitted_time)
112 const size_t executor_id,
113 const std::string& query_str,
114 const std::string& submitted_time)
121 const size_t executor_id,
122 const std::string& query_str,
123 const std::string& submitted_time,
155 std::map<const QuerySessionId, std::map<std::string, QuerySessionStatus>>;
169 for (
auto& arg : func->args()) {
170 if (arg.getName() ==
name) {
179 #if 10 <= LLVM_VERSION_MAJOR
180 return index < func->arg_size() ? func->getArg(index) :
nullptr;
182 return index < func->arg_size() ? func->arg_begin() + index :
nullptr;
189 for (
auto& arg : func->args()) {
190 if (arg.getName() ==
name) {
224 if (!cast_expr || cast_expr->get_optype() !=
kCAST) {
227 return cast_expr->get_operand();
247 const int table_id) {
249 const auto it = temporary_tables->find(table_id);
250 CHECK(it != temporary_tables->end());
258 CHECK(cd || temporary_tables);
265 return temp->getColType(col_id);
271 : std::runtime_error(
"Retry query compilation with no GPU lazy fetch.") {}
277 : std::runtime_error(
"Retry query compilation with new scan limit.")
291 : std::runtime_error(
"Retry query compilation with no compaction.") {}
322 ParseIRError(
const std::string message) : std::runtime_error(message) {}
328 : std::runtime_error(
329 "NONE ENCODED String types are not supported as input result set.") {}
349 const std::shared_ptr<ResultSet>& rs);
351 std::vector<TargetValue>
getEntryAt(
const size_t index)
const override;
356 return rs_->getRowSetMemOwner()->getLiteralStringDictProxy();
370 using Callback = std::function<void(const UpdateLogForFragment&, TableUpdateMetadata&)>;
377 std::shared_ptr<ResultSet>
rs_;
386 namespace ExecutorResourceMgr_Namespace {
410 return cache_key.
hash();
416 static_assert(
sizeof(
float) == 4 &&
sizeof(
double) == 8,
417 "Host hardware not supported, unexpected size of float / double.");
418 static_assert(
sizeof(time_t) == 8,
419 "Host hardware not supported, 64-bit time support is required.");
428 const size_t block_size_x,
429 const size_t grid_size_x,
430 const size_t max_gpu_slab_size,
431 const std::string& debug_dir,
432 const std::string& debug_file);
440 const int current_db_id) {
441 bool clearEntireCache =
true;
444 if (!table_chunk_key_prefix.empty()) {
453 clearEntireCache =
false;
456 if (clearEntireCache) {
467 void reset(
bool discard_runtime_modules_only =
false);
469 template <
typename F>
481 std::lock_guard<std::mutex> register_lock(
486 executor_item.second->reset(
true);
493 register_extension_functions();
501 const std::string& debug_dir =
"",
502 const std::string& debug_file =
"",
516 static void addUdfIrToModule(
const std::string& udf_ir_filename,
const bool is_cuda_ir);
540 std::lock_guard<std::mutex> lock(
579 const bool with_generation)
const {
586 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
587 const bool with_generation)
const;
593 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
594 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
595 const bool with_generation)
const;
600 const std::vector<StringOps_Namespace::StringOpInfo>& source_string_op_infos,
601 const std::vector<StringOps_Namespace::StringOpInfo>& dest_source_string_op_infos,
602 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
const;
606 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
607 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
608 const bool with_generation)
const;
615 return cgen_state_->contains_left_deep_outer_join_;
639 const std::set<shared::TableKey>& table_keys_to_fetch)
const;
642 const std::set<shared::TableKey>& table_ids_to_fetch,
643 const bool include_lazy_fetched_cols)
const;
675 const std::vector<InputDescriptor>& input_descs,
676 const std::vector<InputTableInfo>& query_infos,
677 const std::vector<std::pair<int32_t, FragmentsList>>& device_fragment_lists)
const;
681 const std::vector<Analyzer::Expr*>& target_exprs)
const;
691 const unsigned pending_query_check_freq)
const;
705 const std::vector<InputTableInfo>&,
710 const bool has_cardinality_estimation,
714 const std::vector<InputTableInfo>& table_infos,
719 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
725 const std::shared_ptr<RowSetMemoryOwner>& row_set_mem_owner);
743 llvm::Value* cond_lv,
760 llvm::Value* aggregate_state);
775 llvm::Value* current_row_pos_lv);
786 bool for_window_frame_naviation,
788 bool is_timestamp_type_frame,
789 llvm::Value* order_key_null_val,
802 llvm::Value* partition_index_lv)
const;
809 llvm::Value* partition_index_lv)
const;
816 llvm::Value* order_key_col_null_val_lv,
828 bool for_start_bound,
832 bool for_start_bound,
834 bool is_timestamp_type_frame,
835 llvm::Value* order_key_null_val,
846 const std::string& order_col_type,
847 const std::string& op_type,
848 bool for_timestamp_type)
const;
854 llvm::Value* crt_val,
855 llvm::Value* window_func_null_val);
860 llvm::Value* aggregate_state);
890 std::function<void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo&)>;
903 const std::set<size_t>& fragment_indexes_param);
913 const std::vector<InputTableInfo>& table_infos,
926 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
944 const std::vector<InputTableInfo>& table_infos,
947 const bool allow_single_frag_table_opt,
948 const size_t context_count,
952 std::unordered_set<int>& available_gpus,
953 int& available_cpus);
960 std::vector<std::unique_ptr<ExecutionKernel>>&& kernels,
962 const size_t requested_num_threads);
965 std::vector<std::unique_ptr<ExecutionKernel>>&& kernels,
994 std::vector<std::unique_ptr<ExecutionKernel>>&& kernels,
996 const std::vector<InputDescriptor>& input_descs,
1002 const size_t table_idx,
1003 const size_t outer_frag_idx,
1004 std::map<shared::TableKey, const TableFragments*>& selected_tables_fragments,
1005 const std::unordered_map<shared::TableKey, const Analyzer::BinOper*>&
1006 inner_table_id_to_join_condition);
1011 const int inner_table_id,
1012 const std::unordered_map<shared::TableKey, const Analyzer::BinOper*>&
1013 inner_table_id_to_join_condition,
1019 const int device_id,
1021 const std::map<shared::TableKey, const TableFragments*>&,
1023 std::list<ChunkIter>&,
1024 std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
1026 const size_t thread_idx,
1027 const bool allow_runtime_interrupt);
1031 const int device_id,
1033 const std::map<shared::TableKey, const TableFragments*>&,
1035 std::list<ChunkIter>&,
1036 std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
1038 const size_t thread_idx,
1039 const bool allow_runtime_interrupt);
1041 std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
1044 const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
1045 const std::vector<InputDescriptor>& input_descs,
1046 const std::map<shared::TableKey, const TableFragments*>& all_tables_fragments);
1049 std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
1050 std::vector<size_t>& local_col_to_frag_pos,
1051 const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
1056 std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
1061 const size_t scan_idx,
1067 const bool hoist_literals,
1070 std::vector<std::vector<const int8_t*>>& col_buffers,
1071 const std::vector<size_t> outer_tab_frag_ids,
1073 const std::vector<std::vector<int64_t>>& num_rows,
1074 const std::vector<std::vector<uint64_t>>& frag_offsets,
1076 const int device_id,
1078 const int64_t limit,
1079 const uint32_t start_rowid,
1080 const uint32_t num_tables,
1081 const bool allow_runtime_interrupt,
1083 const bool optimize_cuda_block_and_grid_sizes,
1084 const int64_t rows_to_process = -1);
1089 const bool hoist_literals,
1091 const std::vector<Analyzer::Expr*>& target_exprs,
1093 std::vector<std::vector<const int8_t*>>& col_buffers,
1095 const std::vector<std::vector<int64_t>>& num_rows,
1096 const std::vector<std::vector<uint64_t>>& frag_offsets,
1098 const int device_id,
1099 const uint32_t start_rowid,
1100 const uint32_t num_tables,
1101 const bool allow_runtime_interrupt,
1103 const bool optimize_cuda_block_and_grid_sizes,
1104 const int64_t rows_to_process = -1);
1109 const int64_t agg_init_val,
1110 const int8_t out_byte_width,
1111 const int64_t* out_vec,
1112 const size_t out_vec_sz,
1113 const bool is_group_by,
1114 const bool float_argument_input);
1120 const int device_id);
1123 std::vector<std::pair<
ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
1124 std::shared_ptr<RowSetMemoryOwner>,
1126 std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>
1128 const std::vector<std::pair<
ResultSetPtr, std::vector<size_t>>>& results_per_device)
1131 std::vector<std::pair<
ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
1132 std::shared_ptr<RowSetMemoryOwner>,
1136 std::vector<std::pair<
ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
1137 std::shared_ptr<RowSetMemoryOwner>,
1142 const bool allow_single_frag_table_opt,
1143 const std::vector<InputTableInfo>&,
1147 std::shared_ptr<RowSetMemoryOwner>,
1149 const bool has_cardinality_estimation,
1156 std::tuple<CompilationResult, std::unique_ptr<QueryMemoryDescriptor>>
compileWorkUnit(
1157 const std::vector<InputTableInfo>& query_infos,
1163 const bool allow_lazy_fetch,
1164 std::shared_ptr<RowSetMemoryOwner>,
1165 const size_t max_groups_buffer_entry_count,
1166 const int8_t crt_min_byte_width,
1167 const bool has_cardinality_estimation,
1177 const std::vector<InputTableInfo>& query_infos,
1184 const size_t level_idx,
1189 std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
1191 const size_t level_idx,
1200 const std::vector<InputTableInfo>& query_infos,
1202 std::vector<std::string>& fail_reasons);
1205 const size_t level_idx);
1209 llvm::Function* query_func,
1210 llvm::BasicBlock* entry_bb,
1221 bool run_with_dynamic_watchdog,
1222 bool run_with_allowing_runtime_interrupt,
1223 const std::vector<JoinLoop>& join_loops,
1225 const std::vector<InputTableInfo>& input_table_infos);
1228 unsigned const error_code_idx,
1229 bool hoist_literals,
1230 bool allow_runtime_query_interrupt);
1233 const std::vector<InputTableInfo>& query_infos);
1241 const std::shared_ptr<Analyzer::BinOper>& qual_bin_oper,
1242 const std::vector<InputTableInfo>& query_infos,
1245 const HashType preferred_hash_type,
1251 const std::vector<InputTableInfo>& query_infos,
1258 const std::unordered_set<llvm::Function*>&,
1263 std::unordered_set<llvm::Function*>&,
1264 const bool no_inline,
1266 const bool is_gpu_smem_used,
1268 std::string
generatePTX(
const std::string&)
const;
1279 const size_t col_width,
1281 const bool translate_null_val,
1282 const int64_t translated_null_val,
1284 std::stack<llvm::BasicBlock*>&,
1285 const bool thread_mem_shared);
1287 llvm::Value*
castToFP(llvm::Value*,
1292 std::tuple<RelAlgExecutionUnit, PlanState::DeletedColumnsMap>
addDeletedColumn(
1308 const std::list<std::shared_ptr<Analyzer::Expr>>& simple_quals,
1309 const std::vector<uint64_t>& frag_offsets,
1310 const size_t frag_idx);
1316 const std::vector<uint64_t>& frag_offsets,
1317 const size_t frag_idx);
1320 const std::unordered_set<PhysicalInput>& phys_inputs);
1322 const std::unordered_set<PhysicalInput>& phys_inputs);
1324 const std::unordered_set<shared::TableKey>& phys_table_keys);
1327 void setupCaching(
const std::unordered_set<PhysicalInput>& phys_inputs,
1328 const std::unordered_set<shared::TableKey>& phys_table_keys);
1343 const std::string& query_str,
1344 const std::string& submitted,
1345 const size_t executor_id,
1350 const std::string& submitted_time_str,
1356 const std::string& query_session,
1363 const std::string& submitted_time_str,
1368 const std::string& submitted_time_str,
1369 const size_t executor_id,
1378 const std::string& query_str,
1379 const std::string& query_submitted_time);
1382 const std::string& submitted_time_str);
1384 const std::string& submitted_time_str,
1387 const std::string& query_str,
1388 const std::string& submitted_time_str,
1389 const size_t executor_id,
1405 const size_t cache_value);
1422 executor_item.second->update_extension_modules(update_runtime_modules_only);
1426 const size_t num_gpu_slots,
1427 const size_t cpu_result_mem,
1428 const size_t cpu_buffer_pool_mem,
1429 const size_t gpu_buffer_pool_mem,
1430 const double per_query_max_cpu_slots_ratio,
1431 const double per_query_max_cpu_result_mem_ratio,
1432 const bool allow_cpu_kernel_concurrency,
1433 const bool allow_cpu_gpu_kernel_concurrency,
1434 const bool allow_cpu_slot_oversubscription_concurrency,
1435 const bool allow_cpu_result_mem_oversubscription,
1436 const double max_available_resource_use_ratio);
1446 const size_t resource_quantity);
1461 concurrent_resource_grant_policy);
1465 const std::unordered_map<int, CgenState::LiteralValues>& literals,
1466 const int device_id);
1468 static size_t align(
const size_t off_in,
const size_t alignment) {
1469 size_t off = off_in;
1470 if (off % alignment != 0) {
1471 off += (alignment - off % alignment);
1488 const bool allow_lazy_fetch,
1489 const std::vector<InputTableInfo>& query_infos,
1509 static const std::unique_ptr<llvm::Module> empty;
1627 static std::shared_ptr<ExecutorResourceMgr_Namespace::ExecutorResourceMgr>
1666 std::string null_check_suffix{
"_nullable"};
1669 null_check_suffix +=
"_rhs";
1672 null_check_suffix +=
"_lhs";
1674 return null_check_suffix;
1684 auto oper = (uoper && uoper->get_optype() ==
kCAST) ? uoper->
get_operand() : expr;
1686 return (arr && arr->isLocalAlloc() && arr->get_type_info().is_fixlen_array());
1695 const size_t cpu_count,
1696 const size_t gpu_count);
1706 return "template_module";
1708 return "rt_geos_module";
1710 return "rt_libdevice_module";
1712 return "udf_cpu_module";
1714 return "udf_gpu_module";
1716 return "rt_udf_cpu_module";
1718 return "rt_udf_gpu_module";
1720 LOG(
FATAL) <<
"Invalid LLVM module kind.";
1724 namespace foreign_storage {
1728 #endif // QUERYENGINE_EXECUTE_H
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
void logSystemGPUMemoryStatus(std::string const &tag, size_t const thread_idx) const
A container for various stats about the current state of the ExecutorResourcePool. Note that ExecutorResourcePool does not persist a struct of this type, but rather builds one on the fly when ExecutorResourcePool::get_resource_info() is called.
SQLTypeInfo getColumnType(const size_t col_idx) const
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
const std::string debug_dir_
llvm::Value * translated_value
void executeWorkUnitPerFragment(const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
Compiles and dispatches a work unit per fragment processing results with the per fragment callback...
bool is_agg(const Analyzer::Expr *expr)
static void invalidateCachesByTable(size_t table_key)
AggregatedColRange computeColRangesCache(const std::unordered_set< PhysicalInput > &phys_inputs)
void enableRuntimeQueryInterrupt(const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
const QueryPlanDAG getLatestQueryPlanDagExtracted() const
std::vector< std::unique_ptr< ExecutionKernel > > createKernels(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
static heavyai::shared_mutex execute_mutex_
const std::unique_ptr< llvm::Module > & get_udf_module(bool is_gpu=false) const
static QuerySessionMap queries_session_map_
CudaMgr_Namespace::CudaMgr * cudaMgr() const
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
bool checkIsQuerySessionInterrupted(const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
bool has_libdevice_module() const
int64_t kernel_queue_time_ms_
size_t maxGpuSlabSize() const
ExecutorMutexHolder acquireExecuteMutex()
Data_Namespace::DataMgr * data_mgr_
std::vector< size_t > getTableFragmentIndices(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< shared::TableKey, const TableFragments * > &selected_tables_fragments, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
int64_t compilation_queue_time_ms_
const std::string & getQuerySubmittedTime()
CompilationRetryNoLazyFetch()
std::map< const ColumnDescriptor *, std::set< int32_t >> ColumnToFragmentsMap
const std::unique_ptr< llvm::Module > & get_geos_module() const
static void initialize_extension_module_sources()
void codegenWindowFunctionStateInit(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
void checkPendingQueryStatus(const QuerySessionId &query_session)
const StringDictionaryProxy::IdMap * getJoinIntersectionStringProxyTranslationMap(const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
static void registerActiveModule(void *module, const int device_id)
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
class for a per-database catalog. also includes metadata for the current database and the current use...
std::unordered_map< size_t, std::vector< llvm::Value * > > fetch_cache_
std::vector< int8_t * > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
std::unordered_map< shared::TableKey, const ColumnDescriptor * > DeletedColumnsMap
FetchCacheAnchor(CgenState *cgen_state)
const std::unique_ptr< llvm::Module > & get_extension_module(ExtModuleKinds kind) const
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
StringDictionaryProxy * getLiteralDictionary() const override
std::atomic< bool > interrupted_
static ResultSetRecyclerHolder resultset_recycler_holder_
Fragmenter_Namespace::RowDataProvider RowDataProvider
static const int max_gpu_count
GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
static ExecutorResourceMgr_Namespace::ResourcePoolInfo get_executor_resource_pool_info()
std::map< const QuerySessionId, std::map< std::string, QuerySessionStatus >> QuerySessionMap
size_t const getFragmentIndex() const
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
void AutoTrackBuffersInRuntimeIR()
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Cache for physical column ranges. Set by the aggregator on the leaves.
std::pair< QuerySessionId, std::string > CurrentQueryStatus
bool is_constructed_point(const Analyzer::Expr *expr)
llvm::Value * codegenFrameBound(bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
heavyai::unique_lock< heavyai::shared_mutex > unique_lock
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
std::function< llvm::BasicBlock *(llvm::BasicBlock *, llvm::BasicBlock *, const std::string &, llvm::Function *, CgenState *)> HoistedFiltersCallback
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
static const size_t baseline_threshold
void updateQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
const std::unique_ptr< llvm::Module > & get_rt_udf_module(bool is_gpu=false) const
std::unordered_set< int > get_available_gpus(const Data_Namespace::DataMgr *data_mgr)
static constexpr ExecutorId INVALID_EXECUTOR_ID
std::unordered_map< size_t, std::vector< llvm::Value * > > saved_fetch_cache
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
bool isArchPascalOrLater() const
bool hasLazyFetchColumns(const std::vector< Analyzer::Expr * > &target_exprs) const
void launchKernelsImpl(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const size_t requested_num_threads)
llvm::Value * aggregateWindowStatePtr(CodeGenerator *code_generator, const CompilationOptions &co)
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
const ColumnDescriptor * get_metadata_for_column(const ::shared::ColumnKey &column_key)
ResourceType
Stores the resource type for a ExecutorResourcePool request.
FetchResult fetchUnionChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type) const
static std::pair< int64_t, int32_t > reduceResults(const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
void insertErrorCodeChecker(llvm::Function *query_func, unsigned const error_code_idx, bool hoist_literals, bool allow_runtime_query_interrupt)
Macros and functions for groupby buffer compaction.
const StringDictionaryProxy::IdMap * getStringProxyTranslationMap(const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
QuerySessionId current_query_session_
ResultSetRecyclerHolder & getResultSetRecyclerHolder()
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
heavyai::shared_mutex & getSessionLock()
std::string getFramingFuncName(const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
std::unordered_set< shared::TableKey > table_keys
const QuerySessionStatus::QueryStatus getQueryStatus()
const std::string query_str_
QuerySessionStatus(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time)
ExecutorResourceMgr_Namespace::ChunkRequestInfo getChunkRequestInfo(const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos, const std::vector< std::pair< int32_t, FragmentsList >> &device_fragment_lists) const
Determines a unique list of chunks and their associated byte sizes for a given query plan...
AggregatedColRange agg_col_range_cache_
std::shared_ptr< ResultSet > ResultSetPtr
static void * gpu_active_modules_[max_gpu_count]
std::unique_ptr< CgenState > cgen_state_
llvm::Value * original_value
void enrollQuerySession(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
static void init_resource_mgr(const size_t num_cpu_slots, const size_t num_gpu_slots, const size_t cpu_result_mem, const size_t cpu_buffer_pool_mem, const size_t gpu_buffer_pool_mem, const double per_query_max_cpu_slots_ratio, const double per_query_max_cpu_result_mem_ratio, const bool allow_cpu_kernel_concurrency, const bool allow_cpu_gpu_kernel_concurrency, const bool allow_cpu_slot_oversubscription_concurrency, const bool allow_cpu_result_mem_oversubscription, const double max_available_resource_use_ratio)
ParseIRError(const std::string message)
Specifies the policies for resource grants in the presence of other requests, both under situations o...
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
static uint32_t gpu_active_modules_device_mask_
TableUpdateMetadata executeUpdate(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
HOST DEVICE SQLTypes get_type() const
FragmentSkipStatus canSkipFragmentForFpQual(const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
static void invalidateCaches()
int deviceCount(const ExecutorDeviceType) const
llvm::Value * castToIntPtrTyIn(llvm::Value *val, const size_t bit_width)
void reset(bool discard_runtime_modules_only=false)
CompilationRetryNoCompaction()
static std::mutex kernel_mutex_
unsigned numBlocksPerMP() const
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key, const bool with_generation) const
std::unique_ptr< CgenState > cgen_state_
Container for compilation results and assorted options for a single execution unit.
static const size_t auto_cpu_mem_bytes
bool checkCurrentQuerySession(const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
void clearCaches(bool runtime_only=false)
std::unique_ptr< WindowProjectNodeContext > window_project_node_context_owned_
void addTransientStringLiterals(const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
std::vector< FragmentsPerTable > FragmentsList
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
const QuerySessionId query_session_
std::shared_ptr< HashJoin > hash_table
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
ResultSetPtr executeWorkUnit(size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
static void clearCardinalityCache()
bool checkNonKernelTimeInterrupted() const
static void clearMemory(const Data_Namespace::MemoryLevel memory_level)
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::string &submitted_time)
std::function< void(const UpdateLogForFragment &, TableUpdateMetadata &)> Callback
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
static void resume_executor_queue()
RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
CardinalityCacheKey(const RelAlgExecutionUnit &ra_exe_unit)
std::unordered_map< shared::TableKey, const Analyzer::BinOper * > getInnerTabIdToJoinCond() const
const std::unique_ptr< llvm::Module > & get_libdevice_module() const
QueryMustRunOnCpu(const std::string &err)
std::shared_lock< T > shared_lock
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
void setQueryStatus(const QuerySessionStatus::QueryStatus &status)
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
const ExecutorId executor_id_
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
std::map< QuerySessionId, bool > InterruptFlagMap
const size_t max_gpu_slab_size_
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
ResultSetPtr collectAllDeviceResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
const ColumnDescriptor * getPhysicalColumnDescriptor(const Analyzer::ColumnVar *, int) const
std::lock_guard< std::mutex > lock_
static void unregisterActiveModule(const int device_id)
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > compileWorkUnit(const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
TableIdToNodeMap table_id_to_node_map_
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
static void set_executor_resource_pool_resource(const ExecutorResourceMgr_Namespace::ResourceType resource_type, const size_t resource_quantity)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
std::vector< llvm::Value * > LLVMValueVector
void logSystemCPUMemoryStatus(std::string const &tag, size_t const thread_idx) const
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
std::vector< TargetValue > getEntryAt(const size_t index) const override
int get_logical_size() const
decltype(FragmentInfoType::fragmentId) const getFragmentId() const
int64_t deviceCycles(int milliseconds) const
std::string generatePTX(const std::string &) const
std::vector< llvm::Value * > prepareRangeModeFuncArgs(bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
std::mutex str_dict_mutex_
friend class PendingExecutionClosure
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Fragmenter_Namespace::TableInfo getTableInfo(const shared::TableKey &table_key) const
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
const SQLTypeInfo getFirstOrderColTypeInfo(WindowFunctionContext *window_func_context) const
static void set_concurrent_resource_grant_policy(const ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy &concurrent_resource_grant_policy)
const std::string debug_file_
void populate_string_dictionary(int32_t table_id, int32_t col_id, int32_t db_id)
decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
void setColRangeCache(const AggregatedColRange &aggregated_col_range)
bool containsLeftDeepOuterJoin() const
static QueryPlanDAG latest_query_plan_extracted_
void addToCardinalityCache(const CardinalityCacheKey &cache_key, const size_t cache_value)
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const shared::TableKey &outer_table_key, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
size_t getNumCurentSessionsEnrolled() const
bool has_rt_module() const
unsigned get_index_by_name(llvm::Function *func, const std::string &name)
std::shared_ptr< CompilationContext > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
const ColumnDescriptor * get_column_descriptor(const shared::ColumnKey &column_key)
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
bool is_timeinterval() const
CachedCardinality getCachedCardinality(const CardinalityCacheKey &cache_key)
static InterruptFlagMap queries_interrupt_flag_
FragmentInfoType const & getFragmentInfo() const
std::unique_lock< T > unique_lock
std::unique_ptr< PlanState > plan_state_
void initializeNVPTXBackend() const
static const ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy get_concurrent_resource_grant_policy(const ExecutorResourceMgr_Namespace::ResourceType resource_type)
std::map< int32_t, std::set< int32_t >> TableToFragmentIds
const std::string submitted_time_
ResultSetPtr executeTableFunction(const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo)
Compiles and dispatches a table function; that is, a function that takes as input one or more columns...
llvm::Value * castToFP(llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
std::pair< bool, size_t > CachedCardinality
void setupCaching(const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< shared::TableKey > &phys_table_keys)
static void invalidateCardinalityCacheForTable(const shared::TableKey &table_key)
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
bool containsTableKey(const shared::TableKey &table_key) const
bool checkIsQuerySessionEnrolled(const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
specifies the content in-memory of a row in the column metadata table
std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const override
static std::map< int, std::shared_ptr< Executor > > executors_
std::pair< llvm::BasicBlock *, llvm::Value * > codegenWindowResetStateControlFlow(CodeGenerator *code_generator, const CompilationOptions &co)
QuerySessionStatus::QueryStatus getQuerySessionStatus(const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
std::string toString(const Executor::ExtModuleKinds &kind)
static const size_t auto_num_threads
const TemporaryTables * getTemporaryTables()
std::string get_null_check_suffix(const SQLTypeInfo &lhs_ti, const SQLTypeInfo &rhs_ti)
const std::unique_ptr< llvm::Module > & get_rt_module() const
void launchKernelsLocked(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
static std::unordered_map< CardinalityCacheKey, size_t > cardinality_cache_
std::string dumpCache() const
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
const std::vector< size_t > getExecutorIdsRunningQuery(const QuerySessionId &interrupt_session) const
ResultSetPtr resultsUnion(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
void registerExtractedQueryPlanDag(const QueryPlanDAG &query_plan_dag)
std::shared_ptr< ResultSet > rs_
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
QuerySessionId & getCurrentQuerySession(heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
llvm::Value * codegenLoadCurrentValueFromColBuf(WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
static void addUdfIrToModule(const std::string &udf_ir_filename, const bool is_cuda_ir)
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::unique_ptr< QueryCompilationDescriptor > QueryCompilationDescriptorOwned
void setGridSize(unsigned grid_size)
HOST DEVICE EncodingType get_compression() const
void codegenWindowAvgEpilogue(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *crt_val, llvm::Value *window_func_null_val)
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition ¤t_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
const std::string getOrderKeyTypeName(WindowFunctionContext *window_func_context) const
static heavyai::shared_mutex recycler_mutex_
static void update_after_registration(bool update_runtime_modules_only=false)
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
std::shared_ptr< CompilationContext > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
InputTableInfoCache input_table_info_cache_
size_t getNumBytesForFetchedRow(const std::set< shared::TableKey > &table_keys_to_fetch) const
void setBlockSize(unsigned block_size)
const Expr * get_operand() const
std::chrono::steady_clock::time_point lock_queue_clock_
llvm::Value * codegenConditionalAggregateCondValSelector(llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
std::pair< bool, int64_t > skipFragment(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
unsigned gridSize() const
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
llvm::Value * spillDoubleElement(llvm::Value *elem_val, llvm::Type *elem_ty)
TableGenerations computeTableGenerations(const std::unordered_set< shared::TableKey > &phys_table_keys)
friend class KernelSubtask
PlanState * getPlanStatePtr() const
static std::map< ExtModuleKinds, std::string > extension_module_sources
StringDictionaryGenerations computeStringDictionaryGenerations(const std::unordered_set< PhysicalInput > &phys_inputs)
bool has_udf_module(bool is_gpu=false) const
Data_Namespace::DataMgr * getDataMgr() const
bool needLinearizeAllFragments(const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
void setExecutorId(const size_t executor_id)
llvm::Value * codegenWindowFunctionAggregate(CodeGenerator *code_generator, const CompilationOptions &co)
std::unique_ptr< llvm::LLVMContext > context_
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *current_row_pos_lv)
CgenState * getCgenStatePtr() const
FragmentInfoType const & fragment_info_
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
static heavyai::shared_mutex executor_session_mutex_
const std::string getQueryStr()
std::pair< bool, int64_t > skipFragmentInnerJoins(const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
TableGenerations table_generations_
llvm::Value * codegenWindowNavigationFunctionOnFrame(const CompilationOptions &co)
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
const size_t getExecutorId()
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
void buildSelectedFragsMappingForUnion(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
static void registerExtensionFunctions(F register_extension_functions)
std::string QuerySessionId
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
ResultSetPtr reduceMultiDeviceResultSets(std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > getUniqueThreadSharedResultSets(const std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device) const
QueryPlanDagCache & getQueryPlanDagCache()
bool has_extension_module(ExtModuleKinds kind) const
std::map< shared::ColumnKey, size_t > getColumnByteWidthMap(const std::set< shared::TableKey > &table_ids_to_fetch, const bool include_lazy_fetched_cols) const
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
heavyai::shared_lock< heavyai::shared_mutex > shared_lock
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
CgenStateManager(Executor &executor)
std::mutex compilation_mutex_
void interrupt(const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
heavyai::shared_mutex & getDataRecyclerLock()
bool has_rt_udf_module(bool is_gpu=false) const
static void pause_executor_queue()
std::vector< llvm::Value * > inlineHoistedLiterals()
static size_t getBaselineThreshold(bool for_count_distinct, ExecutorDeviceType device_type)
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co) const
ExecutorResourceMgr is the central manager for resources available to all executors in the system...
uint32_t log2_bytes(const uint32_t bytes)
ExpressionRange getColRange(const PhysicalInput &) const
std::string numeric_type_name(const SQLTypeInfo &ti)
CurrentQueryStatus attachExecutorToQuerySession(const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
void redeclareFilterFunction()
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::string &submitted_time, const QuerySessionStatus::QueryStatus &query_status)
void launchKernelsViaResourceMgr(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const QueryMemoryDescriptor &query_mem_desc)
Launches a vector of kernels for a given query step, gated/scheduled by ExecutorResourceMgr.
UpdateLogForFragment(FragmentInfoType const &fragment_info, size_t const, const std::shared_ptr< ResultSet > &rs)
bool is_unnest(const Analyzer::Expr *expr)
static constexpr ExecutorId UNITARY_EXECUTOR_ID
const TableGeneration & getTableGeneration(const shared::TableKey &table_key) const
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
auto getResultSet() const
unsigned blockSize() const
std::shared_timed_mutex shared_mutex
static std::mutex register_runtime_extension_functions_mutex_
Specifies all DataMgr chunks needed for a query step/request, along with their sizes in bytes...
size_t getOrderKeySize(WindowFunctionContext *window_func_context) const
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
Execution unit for relational algebra. It's a low-level description of any relational algebra operati...
HOST DEVICE bool get_notnull() const
ExecutorId getExecutorId() const
static size_t align(const size_t off_in, const size_t alignment)
static heavyai::shared_mutex executors_cache_mutex_
std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptorOwned
size_t const getRowCount() const override
bool operator==(const CardinalityCacheKey &other) const
const QuerySessionId getQuerySession()
void clearQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str)
llvm::Value * codegenAggregateWindowState(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
QuerySessionStatus::QueryStatus query_status_
void setQuerySessionAsInterrupted(const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
static size_t get_executor_resource_pool_total_resource_quantity(const ExecutorResourceMgr_Namespace::ResourceType resource_type)
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
std::vector< QuerySessionStatus > getQuerySessionInfo(const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Descriptor for the fragments required for an execution kernel.
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
Fragmenter_Namespace::FragmentInfo FragmentInfoType
static size_t getArenaBlockSize()
const StringDictionaryProxy::TranslationMap< Datum > * getStringProxyNumericTranslationMap(const shared::StringDictKey &source_dict_key, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
std::mutex gpu_exec_mutex_[max_gpu_count]
llvm::LLVMContext & getContext()
llvm::Value * codegenWindowFunction(const size_t target_index, const CompilationOptions &co)
std::vector< int > getTableChunkKey(const int getCurrentDBId) const
bool addToQuerySessionList(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
SQLOps get_optype() const
static QueryPlanDagCache query_plan_dag_cache_
size_t operator()(const CardinalityCacheKey &cache_key) const
WindowFunctionContext * active_window_function_
llvm::Value * get_arg_by_index(llvm::Function *func, unsigned const index)
static std::mutex gpu_active_modules_mutex_
static void nukeCacheOfExecutors()
void clearMetaInfoCache()
size_t const getEntryCount() const override
const TemporaryTables * temporary_tables_
CompilationRetryNewScanLimit(const size_t new_scan_limit)
std::vector< llvm::Value * > prepareRowModeFuncArgs(bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const
WatchdogException(const std::string &cause)
bool has_geos_module() const
void update_extension_modules(bool update_runtime_modules_only=false)
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
bool isArchMaxwell(const ExecutorDeviceType dt) const
size_t get_loop_join_size(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
std::map< ExtModuleKinds, std::unique_ptr< llvm::Module > > extension_modules_
bool isFragmentFullyDeleted(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &fragment)