OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

class  CgenStateManager
 
struct  ExecutorMutexHolder
 
class  FetchCacheAnchor
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

enum  ExtModuleKinds {
  ExtModuleKinds::template_module, ExtModuleKinds::udf_cpu_module, ExtModuleKinds::udf_gpu_module, ExtModuleKinds::rt_udf_cpu_module,
  ExtModuleKinds::rt_udf_gpu_module, ExtModuleKinds::rt_geos_module, ExtModuleKinds::rt_libdevice_module
}
 
using ExecutorId = size_t
 
using CachedCardinality = std::pair< bool, size_t >
 

Public Member Functions

 Executor (const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
 
void clearCaches (bool runtime_only=false)
 
std::string dumpCache () const
 
void reset (bool discard_runtime_modules_only=false)
 
const std::unique_ptr
< llvm::Module > & 
get_rt_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_rt_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_geos_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_libdevice_module () const
 
bool has_rt_module () const
 
bool has_udf_module (bool is_gpu=false) const
 
bool has_rt_udf_module (bool is_gpu=false) const
 
bool has_geos_module () const
 
bool has_libdevice_module () const
 
const TemporaryTablesgetTemporaryTables ()
 
StringDictionaryProxygetStringDictionaryProxy (const shared::StringDictKey &dict_key, const bool with_generation) const
 
StringDictionaryProxygetStringDictionaryProxy (const shared::StringDictKey &dict_key, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getStringProxyTranslationMap (const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getJoinIntersectionStringProxyTranslationMap (const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
 
const
StringDictionaryProxy::TranslationMap
< Datum > * 
getStringProxyNumericTranslationMap (const shared::StringDictKey &source_dict_key, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
Data_Namespace::DataMgrgetDataMgr () const
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const shared::TableKey &table_key) const
 
const TableGenerationgetTableGeneration (const shared::TableKey &table_key) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow (const std::set< shared::TableKey > &table_keys_to_fetch) const
 
std::map< shared::ColumnKey,
size_t > 
getColumnByteWidthMap (const std::set< shared::TableKey > &table_ids_to_fetch, const bool include_lazy_fetched_cols) const
 
size_t getNumBytesForFetchedRow (const std::set< int > &table_ids_to_fetch) const
 
ExecutorResourceMgr_Namespace::ChunkRequestInfo getChunkRequestInfo (const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos, const std::vector< std::pair< int32_t, FragmentsList >> &device_fragment_lists) const
 Determines a unique list of chunks and their associated byte sizes for a given query plan. More...
 
bool hasLazyFetchColumns (const std::vector< Analyzer::Expr * > &target_exprs) const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void interrupt (const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
 
void resetInterrupt ()
 
void enableRuntimeQueryInterrupt (const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
 
int8_t warpSize () const
 
unsigned gridSize () const
 
void setGridSize (unsigned grid_size)
 
void resetGridSize ()
 
unsigned numBlocksPerMP () const
 
unsigned blockSize () const
 
void setBlockSize (unsigned block_size)
 
void resetBlockSize ()
 
size_t maxGpuSlabSize () const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
TableUpdateMetadata executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
 
void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
 
int deviceCount (const ExecutorDeviceType) const
 
void logSystemCPUMemoryStatus (std::string const &tag, size_t const thread_idx) const
 
void logSystemGPUMemoryStatus (std::string const &tag, size_t const thread_idx) const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< shared::TableKey > &phys_table_keys)
 
void setColRangeCache (const AggregatedColRange &aggregated_col_range)
 
ExecutorId getExecutorId () const
 
QuerySessionIdgetCurrentQuerySession (heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
QuerySessionStatus::QueryStatus getQuerySessionStatus (const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkCurrentQuerySession (const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
void invalidateRunningQuerySession (heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool addToQuerySessionList (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool removeFromQuerySessionList (const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
void setQuerySessionAsInterrupted (const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool checkIsQuerySessionInterrupted (const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkIsQuerySessionEnrolled (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool updateQuerySessionStatusWithLock (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool updateQuerySessionExecutorAssignment (const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
std::vector< QuerySessionStatusgetQuerySessionInfo (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
heavyai::shared_mutexgetSessionLock ()
 
CurrentQueryStatus attachExecutorToQuerySession (const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
 
void checkPendingQueryStatus (const QuerySessionId &query_session)
 
void clearQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str)
 
void updateQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
 
void enrollQuerySession (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
 
size_t getNumCurentSessionsEnrolled () const
 
const std::vector< size_t > getExecutorIdsRunningQuery (const QuerySessionId &interrupt_session) const
 
bool checkNonKernelTimeInterrupted () const
 
void registerExtractedQueryPlanDag (const QueryPlanDAG &query_plan_dag)
 
const QueryPlanDAG getLatestQueryPlanDagExtracted () const
 
void addToCardinalityCache (const CardinalityCacheKey &cache_key, const size_t cache_value)
 
CachedCardinality getCachedCardinality (const CardinalityCacheKey &cache_key)
 
heavyai::shared_mutexgetDataRecyclerLock ()
 
QueryPlanDagCachegetQueryPlanDagCache ()
 
ResultSetRecyclerHoldergetResultSetRecyclerHolder ()
 
CgenStategetCgenStatePtr () const
 
PlanStategetPlanStatePtr () const
 
llvm::LLVMContext & getContext ()
 
void update_extension_modules (bool update_runtime_modules_only=false)
 

Static Public Member Functions

static void clearExternalCaches (bool for_update, const TableDescriptor *td, const int current_db_id)
 
template<typename F >
static void registerExtensionFunctions (F register_extension_functions)
 
static std::shared_ptr< ExecutorgetExecutor (const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static size_t getArenaBlockSize ()
 
static void addUdfIrToModule (const std::string &udf_ir_filename, const bool is_cuda_ir)
 
static void initialize_extension_module_sources ()
 
static void registerActiveModule (void *module, const int device_id)
 
static void unregisterActiveModule (const int device_id)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void clearCardinalityCache ()
 
static void invalidateCardinalityCacheForTable (const shared::TableKey &table_key)
 
static void update_after_registration (bool update_runtime_modules_only=false)
 
static void init_resource_mgr (const size_t num_cpu_slots, const size_t num_gpu_slots, const size_t cpu_result_mem, const size_t cpu_buffer_pool_mem, const size_t gpu_buffer_pool_mem, const double per_query_max_cpu_slots_ratio, const double per_query_max_cpu_result_mem_ratio, const bool allow_cpu_kernel_concurrency, const bool allow_cpu_gpu_kernel_concurrency, const bool allow_cpu_slot_oversubscription_concurrency, const bool allow_cpu_result_mem_oversubscription, const double max_available_resource_use_ratio)
 
static void pause_executor_queue ()
 
static void resume_executor_queue ()
 
static size_t get_executor_resource_pool_total_resource_quantity (const ExecutorResourceMgr_Namespace::ResourceType resource_type)
 
static
ExecutorResourceMgr_Namespace::ResourcePoolInfo 
get_executor_resource_pool_info ()
 
static void set_executor_resource_pool_resource (const ExecutorResourceMgr_Namespace::ResourceType resource_type, const size_t resource_quantity)
 
static size_t getBaselineThreshold (bool for_count_distinct, ExecutorDeviceType device_type)
 
static const
ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy 
get_concurrent_resource_grant_policy (const ExecutorResourceMgr_Namespace::ResourceType resource_type)
 
static void set_concurrent_resource_grant_policy (const ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy &concurrent_resource_grant_policy)
 

Public Attributes

std::mutex compilation_mutex_
 

Static Public Attributes

static constexpr ExecutorId UNITARY_EXECUTOR_ID = 0
 
static constexpr ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX
 
static std::map
< ExtModuleKinds, std::string > 
extension_module_sources
 
static std::mutex register_runtime_extension_functions_mutex_
 
static std::mutex kernel_mutex_
 
static const size_t auto_cpu_mem_bytes {size_t(0)}
 
static std::shared_ptr
< ExecutorResourceMgr_Namespace::ExecutorResourceMgr
executor_resource_mgr_ = nullptr
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenConditionalAggregateCondValSelector (llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
 
llvm::Value * codegenWindowFunctionAggregate (CodeGenerator *code_generator, const CompilationOptions &co)
 
std::pair< llvm::BasicBlock
*, llvm::Value * > 
codegenWindowResetStateControlFlow (CodeGenerator *code_generator, const CompilationOptions &co)
 
void codegenWindowFunctionStateInit (CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
llvm::Value * codegenWindowNavigationFunctionOnFrame (const CompilationOptions &co)
 
llvm::Value * codegenCurrentPartitionIndex (const WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *current_row_pos_lv)
 
llvm::Value * codegenFrameBoundExpr (const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
 
llvm::Value * codegenFrameBound (bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
 
std::pair< std::string,
llvm::Value * > 
codegenLoadOrderKeyBufPtr (WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co) const
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenFrameNullRange (WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
 
WindowPartitionBufferPtrs codegenLoadPartitionBuffers (WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenWindowFrameBounds (WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenFrameBoundRange (const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
 
std::vector< llvm::Value * > prepareRowModeFuncArgs (bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const
 
std::vector< llvm::Value * > prepareRangeModeFuncArgs (bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
 
const std::string getOrderKeyTypeName (WindowFunctionContext *window_func_context) const
 
llvm::Value * codegenLoadCurrentValueFromColBuf (WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
 
size_t getOrderKeySize (WindowFunctionContext *window_func_context) const
 
const SQLTypeInfo getFirstOrderColTypeInfo (WindowFunctionContext *window_func_context) const
 
std::string getFramingFuncName (const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
 
void codegenWindowAvgEpilogue (CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *crt_val, llvm::Value *window_func_null_val)
 
llvm::Value * codegenAggregateWindowState (CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
 
llvm::Value * aggregateWindowStatePtr (CodeGenerator *code_generator, const CompilationOptions &co)
 
CudaMgr_Namespace::CudaMgrcudaMgr () const
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
bool needLinearizeAllFragments (const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type) const
 
std::unordered_map
< shared::TableKey, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
std::vector< std::unique_ptr
< ExecutionKernel > > 
createKernels (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
 
void launchKernelsImpl (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const size_t requested_num_threads)
 
void launchKernelsLocked (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
 
void launchKernelsViaResourceMgr (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const QueryMemoryDescriptor &query_mem_desc)
 Launches a vector of kernels for a given query step, gated/scheduled by ExecutorResourceMgr. More...
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< shared::TableKey, const TableFragments * > &selected_tables_fragments, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
FetchResult fetchUnionChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
void buildSelectedFragsMappingForUnion (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const shared::TableKey &outer_table_key, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
 
ResultSetPtr resultsUnion (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< int8_t * > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
std::vector< std::pair
< ResultSetPtr, std::vector
< size_t > > > 
getUniqueThreadSharedResultSets (const std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
void AutoTrackBuffersInRuntimeIR ()
 
std::tuple< CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr< HashJoinbuildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
void redeclareFilterFunction ()
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
 
void insertErrorCodeChecker (llvm::Function *query_func, unsigned const error_code_idx, bool hoist_literals, bool allow_runtime_query_interrupt)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
std::tuple
< RelAlgExecutionUnit,
PlanState::DeletedColumnsMap
addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
bool isFragmentFullyDeleted (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &fragment)
 
FragmentSkipStatus canSkipFragmentForFpQual (const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (const std::unordered_set< shared::TableKey > &phys_table_keys)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
const std::unique_ptr
< llvm::Module > & 
get_extension_module (ExtModuleKinds kind) const
 
bool has_extension_module (ExtModuleKinds kind) const
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 
ExecutorMutexHolder acquireExecuteMutex ()
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

const ExecutorId executor_id_
 
std::unique_ptr
< llvm::LLVMContext > 
context_
 
std::unique_ptr< CgenStatecgen_state_
 
std::map< ExtModuleKinds,
std::unique_ptr< llvm::Module > > 
extension_modules_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::atomic< bool > interrupted_ {false}
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
unsigned block_size_x_
 
unsigned grid_size_x_
 
const size_t max_gpu_slab_size_
 
const std::string debug_dir_
 
const std::string debug_file_
 
Data_Namespace::DataMgrdata_mgr_
 
const TemporaryTablestemporary_tables_
 
TableIdToNodeMap table_id_to_node_map_
 
int64_t kernel_queue_time_ms_ = 0
 
int64_t compilation_queue_time_ms_ = 0
 
std::unique_ptr
< WindowProjectNodeContext
window_project_node_context_owned_
 
WindowFunctionContextactive_window_function_ {nullptr}
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
TableGenerations table_generations_
 
QuerySessionId current_query_session_
 

Static Private Attributes

static const int max_gpu_count
 
static const size_t auto_num_threads {size_t(0)}
 
static std::mutex gpu_active_modules_mutex_
 
static uint32_t gpu_active_modules_device_mask_ {0x0}
 
static void * gpu_active_modules_ [max_gpu_count]
 
static const size_t baseline_threshold
 
static heavyai::shared_mutex executor_session_mutex_
 
static InterruptFlagMap queries_interrupt_flag_
 
static QuerySessionMap queries_session_map_
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static heavyai::shared_mutex execute_mutex_
 
static heavyai::shared_mutex executors_cache_mutex_
 
static QueryPlanDagCache query_plan_dag_cache_
 
static heavyai::shared_mutex recycler_mutex_
 
static std::unordered_map
< CardinalityCacheKey, size_t > 
cardinality_cache_
 
static ResultSetRecyclerHolder resultset_recycler_holder_
 
static QueryPlanDAG latest_query_plan_extracted_ {EMPTY_QUERY_PLAN}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
struct DiamondCodegen
 
class ExecutionKernel
 
class KernelSubtask
 
class HashJoin
 
class BoundingBoxIntersectJoinHashTable
 
class RangeJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class StringDictionaryTranslationMgr
 
class LeafAggregator
 
class PerfectJoinHashTable
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
class WindowProjectNodeContext
 

Detailed Description

Definition at line 415 of file Execute.h.

Member Typedef Documentation

using Executor::CachedCardinality = std::pair<bool, size_t>

Definition at line 1403 of file Execute.h.

using Executor::ExecutorId = size_t

Definition at line 422 of file Execute.h.

Definition at line 890 of file Execute.h.

Member Enumeration Documentation

Enumerator
template_module 
udf_cpu_module 
udf_gpu_module 
rt_udf_cpu_module 
rt_udf_gpu_module 
rt_geos_module 
rt_libdevice_module 

Definition at line 518 of file Execute.h.

518  {
519  template_module, // RuntimeFunctions.bc
520  udf_cpu_module, // Load-time UDFs for CPU execution
521  udf_gpu_module, // Load-time UDFs for GPU execution
522  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
523  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
524  rt_geos_module, // geos functions
525  rt_libdevice_module // math library functions for GPU execution
526  };
std::unique_ptr< llvm::Module > udf_gpu_module
std::unique_ptr< llvm::Module > udf_cpu_module

Constructor & Destructor Documentation

Executor::Executor ( const ExecutorId  id,
Data_Namespace::DataMgr data_mgr,
const size_t  block_size_x,
const size_t  grid_size_x,
const size_t  max_gpu_slab_size,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 276 of file Execute.cpp.

283  : executor_id_(executor_id)
284  , context_(new llvm::LLVMContext())
285  , cgen_state_(new CgenState({}, false, this))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const ExecutorId executor_id_
Definition: Execute.h:1476
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477

Member Function Documentation

ExecutorMutexHolder Executor::acquireExecuteMutex ( )
inlineprivate

Definition at line 1591 of file Execute.h.

References execute_mutex_, executor_id_, Executor::ExecutorMutexHolder::shared_lock, Executor::ExecutorMutexHolder::unique_lock, and UNITARY_EXECUTOR_ID.

1591  {
1592  ExecutorMutexHolder ret;
1594  // Only one unitary executor can run at a time
1596  } else {
1598  }
1599  return ret;
1600  }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1585
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1476
std::unique_lock< T > unique_lock
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 4475 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::add_deleted_col_to_map(), CHECK, CompilationOptions::filter_on_deleted_column, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and TABLE.

Referenced by executeWorkUnitImpl(), and executeWorkUnitPerFragment().

4477  {
4478  if (!co.filter_on_deleted_column) {
4479  return std::make_tuple(ra_exe_unit, PlanState::DeletedColumnsMap{});
4480  }
4481  auto ra_exe_unit_with_deleted = ra_exe_unit;
4482  PlanState::DeletedColumnsMap deleted_cols_map;
4483  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
4484  if (input_table.getSourceType() != InputSourceType::TABLE) {
4485  continue;
4486  }
4487  const auto& table_key = input_table.getTableKey();
4488  const auto catalog =
4490  CHECK(catalog);
4491  const auto td = catalog->getMetadataForTable(table_key.table_id);
4492  CHECK(td);
4493  const auto deleted_cd = catalog->getDeletedColumnIfRowsDeleted(td);
4494  if (!deleted_cd) {
4495  continue;
4496  }
4497  CHECK(deleted_cd->columnType.is_boolean());
4498  // check deleted column is not already present
4499  bool found = false;
4500  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
4501  if (input_col.get()->getColId() == deleted_cd->columnId &&
4502  input_col.get()->getScanDesc().getTableKey() == table_key &&
4503  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
4504  found = true;
4505  add_deleted_col_to_map(deleted_cols_map, deleted_cd, table_key);
4506  break;
4507  }
4508  }
4509  if (!found) {
4510  // add deleted column
4511  ra_exe_unit_with_deleted.input_col_descs.emplace_back(
4512  new InputColDescriptor(deleted_cd->columnId,
4513  deleted_cd->tableId,
4514  table_key.db_id,
4515  input_table.getNestLevel()));
4516  add_deleted_col_to_map(deleted_cols_map, deleted_cd, table_key);
4517  }
4518  }
4519  return std::make_tuple(ra_exe_unit_with_deleted, deleted_cols_map);
4520 }
std::unordered_map< shared::TableKey, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
static SysCatalog & instance()
Definition: SysCatalog.h:343
void add_deleted_col_to_map(PlanState::DeletedColumnsMap &deleted_cols_map, const ColumnDescriptor *deleted_cd, const shared::TableKey &table_key)
Definition: Execute.cpp:4463
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 1186 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

1187  {
1189  // Iterators are added for loop-outer joins when the head of the loop is generated,
1190  // then once again when the body if generated. Allow this instead of special handling
1191  // of call sites.
1192  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
1193  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
1194  return it->second;
1195  }
1196  CHECK(!prev_iters.empty());
1197  llvm::Value* matching_row_index = prev_iters.back();
1198  const auto it_ok =
1199  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
1200  CHECK(it_ok.second);
1201  return matching_row_index;
1202 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
void Executor::addToCardinalityCache ( const CardinalityCacheKey cache_key,
const size_t  cache_value 
)

Definition at line 5289 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, recycler_mutex_, and VLOG.

5290  {
5293  cardinality_cache_[cache_key] = cache_value;
5294  VLOG(1) << "Put estimated cardinality to the cache";
5295  }
5296 }
std::unique_lock< T > unique_lock
static std::unordered_map< CardinalityCacheKey, size_t > cardinality_cache_
Definition: Execute.h:1607
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
bool g_use_estimator_result_cache
Definition: Execute.cpp:139
#define VLOG(n)
Definition: Logger.h:388
bool Executor::addToQuerySessionList ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_status,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 5120 of file Execute.cpp.

References queries_interrupt_flag_, and queries_session_map_.

Referenced by enrollQuerySession().

5126  {
5127  // an internal API that enrolls the query session into the Executor's session map
5128  if (queries_session_map_.count(query_session)) {
5129  if (queries_session_map_.at(query_session).count(submitted_time_str)) {
5130  queries_session_map_.at(query_session).erase(submitted_time_str);
5131  queries_session_map_.at(query_session)
5132  .emplace(submitted_time_str,
5133  QuerySessionStatus(query_session,
5134  executor_id,
5135  query_str,
5136  submitted_time_str,
5137  query_status));
5138  } else {
5139  queries_session_map_.at(query_session)
5140  .emplace(submitted_time_str,
5141  QuerySessionStatus(query_session,
5142  executor_id,
5143  query_str,
5144  submitted_time_str,
5145  query_status));
5146  }
5147  } else {
5148  std::map<std::string, QuerySessionStatus> executor_per_query_map;
5149  executor_per_query_map.emplace(
5150  submitted_time_str,
5152  query_session, executor_id, query_str, submitted_time_str, query_status));
5153  queries_session_map_.emplace(query_session, executor_per_query_map);
5154  }
5155  return queries_interrupt_flag_.emplace(query_session, false).second;
5156 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578

+ Here is the caller graph for this function:

void Executor::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
const std::shared_ptr< RowSetMemoryOwner > &  row_set_mem_owner 
)

Definition at line 2523 of file Execute.cpp.

References CHECK, getStringDictionaryProxy(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kMODE, kSAMPLE, kSINGLE_VALUE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_union, and ScalarExprVisitor< T >::visit().

2525  {
2526  TransientDictIdVisitor dict_id_visitor;
2527 
2528  auto visit_expr =
2529  [this, &dict_id_visitor, &row_set_mem_owner](const Analyzer::Expr* expr) {
2530  if (!expr) {
2531  return;
2532  }
2533  const auto& dict_key = dict_id_visitor.visit(expr);
2534  if (dict_key.dict_id >= 0) {
2535  auto sdp = getStringDictionaryProxy(dict_key, row_set_mem_owner, true);
2536  CHECK(sdp);
2537  TransientStringLiteralsVisitor visitor(sdp, this);
2538  visitor.visit(expr);
2539  }
2540  };
2541 
2542  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2543  visit_expr(group_expr.get());
2544  }
2545 
2546  for (const auto& group_expr : ra_exe_unit.quals) {
2547  visit_expr(group_expr.get());
2548  }
2549 
2550  for (const auto& group_expr : ra_exe_unit.simple_quals) {
2551  visit_expr(group_expr.get());
2552  }
2553 
2554  const auto visit_target_expr = [&](const Analyzer::Expr* target_expr) {
2555  const auto& target_type = target_expr->get_type_info();
2556  if (!target_type.is_string() || target_type.get_compression() == kENCODING_DICT) {
2557  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2558  if (agg_expr) {
2559  // The following agg types require taking into account transient string values
2560  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kSINGLE_VALUE ||
2561  agg_expr->get_aggtype() == kSAMPLE || agg_expr->get_aggtype() == kMODE) {
2562  visit_expr(agg_expr->get_arg());
2563  }
2564  } else {
2565  visit_expr(target_expr);
2566  }
2567  }
2568  };
2569  const auto& target_exprs = ra_exe_unit.target_exprs;
2570  std::for_each(target_exprs.begin(), target_exprs.end(), visit_target_expr);
2571  const auto& target_exprs_union = ra_exe_unit.target_exprs_union;
2572  std::for_each(target_exprs_union.begin(), target_exprs_union.end(), visit_target_expr);
2573 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key, const bool with_generation) const
Definition: Execute.h:578
std::vector< Analyzer::Expr * > target_exprs_union
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:86
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

void Executor::addUdfIrToModule ( const std::string &  udf_ir_filename,
const bool  is_cuda_ir 
)
static

Definition at line 1956 of file NativeCodegen.cpp.

Referenced by DBHandler::initialize().

1957  {
1961  udf_ir_filename;
1962 }
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:528

+ Here is the caller graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 242 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, CodegenUtil::createPtrWithHoistedMemoryAddr(), anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kFLOAT, and WindowFunctionContext::NUM_EXECUTION_DEVICES.

243  {
245  const auto window_func_context =
247  const auto window_func = window_func_context->getWindowFunction();
248  const auto arg_ti = get_adjusted_window_type_info(window_func);
249  llvm::Type* aggregate_state_type =
250  arg_ti.get_type() == kFLOAT
251  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
252  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
253  const auto aggregate_state_i64 = cgen_state_->llInt(
254  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
256  cgen_state_.get(),
257  code_generator,
258  co,
259  aggregate_state_i64,
260  aggregate_state_type,
262  .front();
263 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 1468 of file Execute.h.

Referenced by serializeLiterals().

1468  {
1469  size_t off = off_in;
1470  if (off % alignment != 0) {
1471  off += (alignment - off % alignment);
1472  }
1473  return off;
1474  }

+ Here is the caller graph for this function:

CurrentQueryStatus Executor::attachExecutorToQuerySession ( const QuerySessionId query_session_id,
const std::string &  query_str,
const std::string &  query_submitted_time 
)

Definition at line 5018 of file Execute.cpp.

References executor_id_, executor_session_mutex_, updateQuerySessionExecutorAssignment(), and updateQuerySessionStatusWithLock().

5021  {
5022  if (!query_session_id.empty()) {
5023  // if session is valid, do update 1) the exact executor id and 2) query status
5026  query_session_id, query_submitted_time, executor_id_, write_lock);
5027  updateQuerySessionStatusWithLock(query_session_id,
5028  query_submitted_time,
5029  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR,
5030  write_lock);
5031  }
5032  return {query_session_id, query_str};
5033 }
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5158
const ExecutorId executor_id_
Definition: Execute.h:1476
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5184
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574

+ Here is the call graph for this function:

void Executor::AutoTrackBuffersInRuntimeIR ( )
private

Definition at line 2303 of file NativeCodegen.cpp.

2303  {
2304  llvm::Module* M = cgen_state_->module_;
2305  if (M->getFunction("allocate_varlen_buffer") == nullptr) {
2306  return;
2307  }
2308 
2309  // read metadata
2310  bool should_track = false;
2311  auto* flag = M->getModuleFlag("manage_memory_buffer");
2312  if (auto* cnt = llvm::mdconst::extract_or_null<llvm::ConstantInt>(flag)) {
2313  if (cnt->getZExtValue() == 1) {
2314  should_track = true;
2315  }
2316  }
2317 
2318  if (!should_track) {
2319  // metadata is not present
2320  return;
2321  }
2322 
2323  LOG(INFO) << "Found 'manage_memory_buffer' metadata.";
2324  llvm::SmallVector<llvm::CallInst*, 4> calls_to_analyze;
2325 
2326  for (llvm::Function& F : *M) {
2327  for (llvm::BasicBlock& BB : F) {
2328  for (llvm::Instruction& I : BB) {
2329  if (llvm::CallInst* CI = llvm::dyn_cast<llvm::CallInst>(&I)) {
2330  // Keep track of calls to "allocate_varlen_buffer" for later processing
2331  auto const called_func_name = CodegenUtil::getCalledFunctionName(*CI);
2332  if (called_func_name && *called_func_name == "allocate_varlen_buffer") {
2333  calls_to_analyze.push_back(CI);
2334  }
2335  }
2336  }
2337  }
2338  }
2339 
2340  // for each call to "allocate_varlen_buffer", check if there's a corresponding
2341  // call to "register_buffer_with_executor_rsm". If not, add a call to it
2342  llvm::IRBuilder<> Builder(cgen_state_->context_);
2343  auto i64 = get_int_type(64, cgen_state_->context_);
2344  auto i8p = get_int_ptr_type(8, cgen_state_->context_);
2345  auto void_ = llvm::Type::getVoidTy(cgen_state_->context_);
2346  llvm::FunctionType* fnty = llvm::FunctionType::get(void_, {i64, i8p}, false);
2347  llvm::FunctionCallee register_buffer_fn =
2348  M->getOrInsertFunction("register_buffer_with_executor_rsm", fnty, {});
2349 
2350  int64_t executor_addr = reinterpret_cast<int64_t>(this);
2351  for (llvm::CallInst* CI : calls_to_analyze) {
2352  bool found = false;
2353  // for each user of the function, check if its a callinst
2354  // and if the callinst is calling "register_buffer_with_executor_rsm"
2355  // if no such instruction exist, add one registering the buffer
2356  for (llvm::User* U : CI->users()) {
2357  if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(U)) {
2358  auto const func_name = CodegenUtil::getCalledFunctionName(*call);
2359  if (func_name && *func_name == "register_buffer_with_executor_rsm") {
2360  found = true;
2361  break;
2362  }
2363  }
2364  }
2365  if (!found) {
2366  Builder.SetInsertPoint(CI->getNextNode());
2367  Builder.CreateCall(register_buffer_fn,
2368  {ll_int(executor_addr, cgen_state_->context_), CI});
2369  }
2370  }
2371 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
#define LOG(tag)
Definition: Logger.h:285
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Type * get_int_ptr_type(const int width, llvm::LLVMContext &context)
unsigned Executor::blockSize ( ) const

Definition at line 4366 of file Execute.cpp.

References block_size_x_, CHECK, data_mgr_, CudaMgr_Namespace::CudaMgr::getAllDeviceProperties(), and Data_Namespace::DataMgr::getCudaMgr().

Referenced by collectAllDeviceShardedTopResults(), executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeTableFunction(), executeWorkUnitImpl(), reduceMultiDeviceResults(), reduceMultiDeviceResultSets(), and resultsUnion().

4366  {
4367  CHECK(data_mgr_);
4368  const auto cuda_mgr = data_mgr_->getCudaMgr();
4369  if (!cuda_mgr) {
4370  return 0;
4371  }
4372  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
4373  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
4374 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:177
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
unsigned block_size_x_
Definition: Execute.h:1552
#define CHECK(condition)
Definition: Logger.h:291
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:134

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashJoin > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
size_t  level_idx,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 1027 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), AUTOMATIC_IR_METADATA, anonymous_namespace{IRCodegen.cpp}::check_valid_join_qual(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, RelAlgExecutionUnit::hash_table_build_plan_dag, IS_EQUIVALENCE, LEFT, OneToOne, JoinCondition::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::table_id_to_node_map, JoinCondition::type, and VLOG.

1034  {
1036  std::shared_ptr<HashJoin> current_level_hash_table;
1037  auto handleNonHashtableQual = [&ra_exe_unit, &level_idx, this](
1038  JoinType join_type,
1039  std::shared_ptr<Analyzer::Expr> qual) {
1040  if (join_type == JoinType::LEFT) {
1041  plan_state_->addNonHashtableQualForLeftJoin(level_idx, qual);
1042  } else {
1043  add_qualifier_to_execution_unit(ra_exe_unit, qual);
1044  }
1045  };
1046  for (const auto& join_qual : current_level_join_conditions.quals) {
1047  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
1048  if (current_level_hash_table || !qual_bin_oper ||
1049  !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
1050  handleNonHashtableQual(current_level_join_conditions.type, join_qual);
1051  if (!current_level_hash_table) {
1052  fail_reasons.emplace_back("No equijoin expression found");
1053  }
1054  continue;
1055  }
1056  check_valid_join_qual(qual_bin_oper);
1057  JoinHashTableOrError hash_table_or_error;
1058  if (!current_level_hash_table) {
1059  hash_table_or_error = buildHashTableForQualifier(
1060  qual_bin_oper,
1061  query_infos,
1064  current_level_join_conditions.type,
1066  column_cache,
1067  ra_exe_unit.hash_table_build_plan_dag,
1068  ra_exe_unit.query_hint,
1069  ra_exe_unit.table_id_to_node_map);
1070  current_level_hash_table = hash_table_or_error.hash_table;
1071  }
1072  if (hash_table_or_error.hash_table) {
1073  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
1074  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
1075  } else {
1076  fail_reasons.push_back(hash_table_or_error.fail_reason);
1077  if (!current_level_hash_table) {
1078  VLOG(2) << "Building a hashtable based on a qual " << qual_bin_oper->toString()
1079  << " fails: " << hash_table_or_error.fail_reason;
1080  }
1081  handleNonHashtableQual(current_level_join_conditions.type, qual_bin_oper);
1082  }
1083  }
1084  return current_level_hash_table;
1085 }
JoinType
Definition: sqldefs.h:238
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:72
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
TableIdToNodeMap table_id_to_node_map
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:535
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:4309
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define VLOG(n)
Definition: Logger.h:388
HashTableBuildDagMap hash_table_build_plan_dag
void check_valid_join_qual(std::shared_ptr< Analyzer::BinOper > &bin_oper)
Definition: IRCodegen.cpp:586

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
ColumnCacheMap column_cache,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
private

Definition at line 4309 of file Execute.cpp.

References deviceCountForMemoryLevel(), g_enable_bbox_intersect_hashjoin, g_enable_dynamic_watchdog, HashJoin::getInstance(), and interrupted_.

4318  {
4319  if (!g_enable_bbox_intersect_hashjoin && qual_bin_oper->is_bbox_intersect_oper()) {
4320  return {nullptr,
4321  "Bounding box intersection disabled, attempting to fall back to loop join"};
4322  }
4323  if (g_enable_dynamic_watchdog && interrupted_.load()) {
4324  throw QueryExecutionError(ErrorCode::INTERRUPTED);
4325  }
4326  try {
4327  auto tbl = HashJoin::getInstance(qual_bin_oper,
4328  query_infos,
4329  memory_level,
4330  join_type,
4331  preferred_hash_type,
4332  deviceCountForMemoryLevel(memory_level),
4333  column_cache,
4334  this,
4335  hashtable_build_dag_map,
4336  query_hint,
4337  table_id_to_node_map);
4338  return {tbl, ""};
4339  } catch (const HashJoinFail& e) {
4340  return {nullptr, e.what()};
4341  }
4342 }
std::atomic< bool > interrupted_
Definition: Execute.h:1543
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
bool g_enable_bbox_intersect_hashjoin
Definition: Execute.cpp:109
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1330
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:285

+ Here is the call graph for this function:

JoinLoop::HoistedFiltersCallback Executor::buildHoistLeftHandSideFiltersCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const shared::TableKey inner_table_key,
const CompilationOptions co 
)
private

Definition at line 859 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CodeGenerator::codegen(), g_enable_left_join_filter_hoisting, RelAlgExecutionUnit::join_quals, LEFT, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, CodeGenerator::toBool(), and VLOG.

863  {
865  return nullptr;
866  }
867 
868  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
869  if (level_idx == 0 && current_level_join_conditions.type == JoinType::LEFT) {
870  const auto& condition = current_level_join_conditions.quals.front();
871  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(condition.get());
872  CHECK(bin_oper) << condition->toString();
873  const auto rhs =
874  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_right_operand());
875  const auto lhs =
876  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_left_operand());
877  if (lhs && rhs && lhs->getTableKey() != rhs->getTableKey()) {
878  const Analyzer::ColumnVar* selected_lhs{nullptr};
879  // grab the left hand side column -- this is somewhat similar to normalize column
880  // pair, and a better solution may be to hoist that function out of the join
881  // framework and normalize columns at the top of build join loops
882  if (lhs->getTableKey() == inner_table_id) {
883  selected_lhs = rhs;
884  } else if (rhs->getTableKey() == inner_table_id) {
885  selected_lhs = lhs;
886  }
887  if (selected_lhs) {
888  std::list<std::shared_ptr<Analyzer::Expr>> hoisted_quals;
889  // get all LHS-only filters
890  auto should_hoist_qual = [&hoisted_quals](const auto& qual,
891  const shared::TableKey& table_key) {
892  CHECK(qual);
893 
894  ExprTableIdVisitor visitor;
895  const auto table_keys = visitor.visit(qual.get());
896  if (table_keys.size() == 1 && table_keys.find(table_key) != table_keys.end()) {
897  hoisted_quals.push_back(qual);
898  }
899  };
900  for (const auto& qual : ra_exe_unit.simple_quals) {
901  should_hoist_qual(qual, selected_lhs->getTableKey());
902  }
903  for (const auto& qual : ra_exe_unit.quals) {
904  should_hoist_qual(qual, selected_lhs->getTableKey());
905  }
906 
907  // build the filters callback and return it
908  if (!hoisted_quals.empty()) {
909  return [this, hoisted_quals, co](llvm::BasicBlock* true_bb,
910  llvm::BasicBlock* exit_bb,
911  const std::string& loop_name,
912  llvm::Function* parent_func,
913  CgenState* cgen_state) -> llvm::BasicBlock* {
914  // make sure we have quals to hoist
915  bool has_quals_to_hoist = false;
916  for (const auto& qual : hoisted_quals) {
917  // check to see if the filter was previously hoisted. if all filters were
918  // previously hoisted, this callback becomes a noop
919  if (plan_state_->hoisted_filters_.count(qual) == 0) {
920  has_quals_to_hoist = true;
921  break;
922  }
923  }
924 
925  if (!has_quals_to_hoist) {
926  return nullptr;
927  }
928 
929  AUTOMATIC_IR_METADATA(cgen_state);
930 
931  llvm::IRBuilder<>& builder = cgen_state->ir_builder_;
932  auto& context = builder.getContext();
933 
934  const auto filter_bb =
935  llvm::BasicBlock::Create(context,
936  "hoisted_left_join_filters_" + loop_name,
937  parent_func,
938  /*insert_before=*/true_bb);
939  builder.SetInsertPoint(filter_bb);
940 
941  llvm::Value* filter_lv = cgen_state_->llBool(true);
942  CodeGenerator code_generator(this);
944  for (const auto& qual : hoisted_quals) {
945  if (plan_state_->hoisted_filters_.insert(qual).second) {
946  // qual was inserted into the hoisted filters map, which means we have not
947  // seen this qual before. Generate filter.
948  VLOG(1) << "Generating code for hoisted left hand side qualifier "
949  << qual->toString();
950  auto cond = code_generator.toBool(
951  code_generator.codegen(qual.get(), true, co).front());
952  filter_lv = builder.CreateAnd(filter_lv, cond);
953  }
954  }
955  CHECK(filter_lv->getType()->isIntegerTy(1));
956 
957  builder.CreateCondBr(filter_lv, true_bb, exit_bb);
958  return filter_bb;
959  };
960  }
961  }
962  }
963  }
964  return nullptr;
965 }
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:107
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
#define VLOG(n)
Definition: Logger.h:388
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 968 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, CodeGenerator::codegen(), CompilationOptions::filter_on_deleted_column, RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

970  {
972  if (!co.filter_on_deleted_column) {
973  return nullptr;
974  }
975  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
976  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
977  if (input_desc.getSourceType() != InputSourceType::TABLE) {
978  return nullptr;
979  }
980 
981  const auto deleted_cd = plan_state_->getDeletedColForTable(input_desc.getTableKey());
982  if (!deleted_cd) {
983  return nullptr;
984  }
985  CHECK(deleted_cd->columnType.is_boolean());
986  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(
987  deleted_cd->columnType,
988  shared::ColumnKey{input_desc.getTableKey(), deleted_cd->columnId},
989  input_desc.getNestLevel());
990  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
991  llvm::Value* have_more_inner_rows) {
992  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
993  // Avoid fetching the deleted column from a position which is not valid.
994  // An invalid position can be returned by a one to one hash lookup (negative)
995  // or at the end of iteration over a set of matching values.
996  llvm::Value* is_valid_it{nullptr};
997  if (have_more_inner_rows) {
998  is_valid_it = have_more_inner_rows;
999  } else {
1000  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
1001  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
1002  }
1003  const auto it_valid_bb = llvm::BasicBlock::Create(
1004  cgen_state_->context_, "it_valid", cgen_state_->current_func_);
1005  const auto it_not_valid_bb = llvm::BasicBlock::Create(
1006  cgen_state_->context_, "it_not_valid", cgen_state_->current_func_);
1007  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
1008  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
1009  cgen_state_->context_, "row_is_deleted", cgen_state_->current_func_);
1010  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
1011  CodeGenerator code_generator(this);
1012  const auto row_is_deleted = code_generator.toBool(
1013  code_generator.codegen(deleted_expr.get(), true, co).front());
1014  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
1015  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
1016  const auto row_is_deleted_default = cgen_state_->llBool(false);
1017  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
1018  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
1019  auto row_is_deleted_or_default =
1020  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
1021  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
1022  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
1023  return row_is_deleted_or_default;
1024  };
1025 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:303
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1186
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 610 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), INJECT_TIMER, CgenState::ir_builder_, RelAlgExecutionUnit::join_quals, LEFT, PlanState::left_join_non_hashtable_quals_, CgenState::llBool(), MultiSet, OneToOne, CgenState::outer_join_match_found_per_level_, CodeGenerator::plan_state_, Set, Singleton, JoinLoopDomain::slot_lookup_result, CodeGenerator::toBool(), and JoinLoopDomain::values_buffer.

615  {
618  std::vector<JoinLoop> join_loops;
619  for (size_t level_idx = 0, current_hash_table_idx = 0;
620  level_idx < ra_exe_unit.join_quals.size();
621  ++level_idx) {
622  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
623  std::vector<std::string> fail_reasons;
624  const auto current_level_hash_table =
625  buildCurrentLevelHashTable(current_level_join_conditions,
626  level_idx,
627  ra_exe_unit,
628  co,
629  query_infos,
630  column_cache,
631  fail_reasons);
632  const auto found_outer_join_matches_cb =
633  [this, level_idx](llvm::Value* found_outer_join_matches) {
634  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
635  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
636  cgen_state_->outer_join_match_found_per_level_[level_idx] =
637  found_outer_join_matches;
638  };
639  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
640  auto rem_left_join_quals_it =
641  plan_state_->left_join_non_hashtable_quals_.find(level_idx);
642  bool has_remaining_left_join_quals =
643  rem_left_join_quals_it != plan_state_->left_join_non_hashtable_quals_.end() &&
644  !rem_left_join_quals_it->second.empty();
645  const auto outer_join_condition_remaining_quals_cb =
646  [this, level_idx, &co](const std::vector<llvm::Value*>& prev_iters) {
647  // when we have multiple quals for the left join in the current join level
648  // we first try to build a hashtable by using one of the possible qual,
649  // and deal with remaining quals as extra join conditions
650  FetchCacheAnchor anchor(cgen_state_.get());
651  addJoinLoopIterator(prev_iters, level_idx + 1);
652  llvm::Value* left_join_cond = cgen_state_->llBool(true);
653  CodeGenerator code_generator(this);
654  auto it = plan_state_->left_join_non_hashtable_quals_.find(level_idx);
655  if (it != plan_state_->left_join_non_hashtable_quals_.end()) {
656  for (auto expr : it->second) {
657  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
658  left_join_cond,
659  code_generator.toBool(
660  code_generator.codegen(expr.get(), true, co).front()));
661  }
662  }
663  return left_join_cond;
664  };
665  if (current_level_hash_table) {
666  const auto hoisted_filters_cb = buildHoistLeftHandSideFiltersCb(
667  ra_exe_unit, level_idx, current_level_hash_table->getInnerTableId(), co);
668  if (current_level_hash_table->getHashType() == HashType::OneToOne) {
669  join_loops.emplace_back(
670  /*kind=*/JoinLoopKind::Singleton,
671  /*type=*/current_level_join_conditions.type,
672  /*iteration_domain_codegen=*/
673  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
674  const std::vector<llvm::Value*>& prev_iters) {
675  addJoinLoopIterator(prev_iters, level_idx);
676  JoinLoopDomain domain{{0}};
677  domain.slot_lookup_result =
678  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
679  return domain;
680  },
681  /*outer_condition_match=*/
682  current_level_join_conditions.type == JoinType::LEFT &&
683  has_remaining_left_join_quals
684  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
685  outer_join_condition_remaining_quals_cb)
686  : nullptr,
687  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
688  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
689  : nullptr,
690  /*hoisted_filters=*/hoisted_filters_cb,
691  /*is_deleted=*/is_deleted_cb,
692  /*nested_loop_join=*/false);
693  } else if (auto range_join_table =
694  dynamic_cast<RangeJoinHashTable*>(current_level_hash_table.get())) {
695  join_loops.emplace_back(
696  /* kind= */ JoinLoopKind::MultiSet,
697  /* type= */ current_level_join_conditions.type,
698  /* iteration_domain_codegen= */
699  [this,
700  range_join_table,
701  current_hash_table_idx,
702  level_idx,
703  current_level_hash_table,
704  &co](const std::vector<llvm::Value*>& prev_iters) {
705  addJoinLoopIterator(prev_iters, level_idx);
706  JoinLoopDomain domain{{0}};
707  CHECK(!prev_iters.empty());
708  const auto matching_set = range_join_table->codegenMatchingSetWithOffset(
709  co, current_hash_table_idx, prev_iters.back());
710  domain.values_buffer = matching_set.elements;
711  domain.element_count = matching_set.count;
712  return domain;
713  },
714  /* outer_condition_match= */
715  current_level_join_conditions.type == JoinType::LEFT
716  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
717  outer_join_condition_remaining_quals_cb)
718  : nullptr,
719  /* found_outer_matches= */
720  current_level_join_conditions.type == JoinType::LEFT
721  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
722  : nullptr,
723  /* hoisted_filters= */ nullptr, // <<! TODO
724  /* is_deleted= */ is_deleted_cb,
725  /*nested_loop_join=*/false);
726  } else {
727  join_loops.emplace_back(
728  /*kind=*/JoinLoopKind::Set,
729  /*type=*/current_level_join_conditions.type,
730  /*iteration_domain_codegen=*/
731  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
732  const std::vector<llvm::Value*>& prev_iters) {
733  addJoinLoopIterator(prev_iters, level_idx);
734  JoinLoopDomain domain{{0}};
735  const auto matching_set = current_level_hash_table->codegenMatchingSet(
736  co, current_hash_table_idx);
737  domain.values_buffer = matching_set.elements;
738  domain.element_count = matching_set.count;
739  domain.error_code = matching_set.error_code;
740  return domain;
741  },
742  /*outer_condition_match=*/
743  current_level_join_conditions.type == JoinType::LEFT
744  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
745  outer_join_condition_remaining_quals_cb)
746  : nullptr,
747  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
748  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
749  : nullptr,
750  /*hoisted_filters=*/hoisted_filters_cb,
751  /*is_deleted=*/is_deleted_cb,
752  /*nested_loop_join=*/false);
753  }
754  ++current_hash_table_idx;
755  } else {
756  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
757  ? "No equijoin expression found"
758  : boost::algorithm::join(fail_reasons, " | ");
760  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
761  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
762  // condition.
763  VLOG(1) << "Unable to build hash table, falling back to loop join: "
764  << fail_reasons_str;
765  const auto outer_join_condition_cb =
766  [this, level_idx, &co, &current_level_join_conditions](
767  const std::vector<llvm::Value*>& prev_iters) {
768  // The values generated for the match path don't dominate all uses
769  // since on the non-match path nulls are generated. Reset the cache
770  // once the condition is generated to avoid incorrect reuse.
771  FetchCacheAnchor anchor(cgen_state_.get());
772  addJoinLoopIterator(prev_iters, level_idx + 1);
773  llvm::Value* left_join_cond = cgen_state_->llBool(true);
774  CodeGenerator code_generator(this);
775  for (auto expr : current_level_join_conditions.quals) {
776  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
777  left_join_cond,
778  code_generator.toBool(
779  code_generator.codegen(expr.get(), true, co).front()));
780  }
781  return left_join_cond;
782  };
783  join_loops.emplace_back(
784  /*kind=*/JoinLoopKind::UpperBound,
785  /*type=*/current_level_join_conditions.type,
786  /*iteration_domain_codegen=*/
787  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
788  addJoinLoopIterator(prev_iters, level_idx);
789  JoinLoopDomain domain{{0}};
790  auto* arg = get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan");
791  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
792  arg->getType()->getScalarType()->getPointerElementType(),
793  arg,
794  cgen_state_->llInt(int32_t(level_idx + 1)));
795  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(
796  rows_per_scan_ptr->getType()->getPointerElementType(),
797  rows_per_scan_ptr,
798  "num_rows_per_scan");
799  return domain;
800  },
801  /*outer_condition_match=*/
802  current_level_join_conditions.type == JoinType::LEFT
803  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
804  outer_join_condition_cb)
805  : nullptr,
806  /*found_outer_matches=*/
807  current_level_join_conditions.type == JoinType::LEFT
808  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
809  : nullptr,
810  /*hoisted_filters=*/nullptr,
811  /*is_deleted=*/is_deleted_cb,
812  /*nested_loop_join=*/true);
813  }
814  }
815  return join_loops;
816 }
llvm::Value * values_buffer
Definition: JoinLoop.h:49
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
#define INJECT_TIMER(DESC)
Definition: measure.h:122
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:47
#define CHECK_LT(x, y)
Definition: Logger.h:303
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:1027
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1186
#define CHECK(condition)
Definition: Logger.h:291
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:545
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:610
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:968
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
Definition: IRCodegen.cpp:859
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3774 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, getFragmentCount(), RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchChunks().

3779  {
3780  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
3781  size_t frag_pos{0};
3782  const auto& input_descs = ra_exe_unit.input_descs;
3783  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3784  const auto& table_key = input_descs[scan_idx].getTableKey();
3785  CHECK_EQ(selected_fragments[scan_idx].table_key, table_key);
3786  selected_fragments_crossjoin.push_back(
3787  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
3788  for (const auto& col_id : col_global_ids) {
3789  CHECK(col_id);
3790  const auto& input_desc = col_id->getScanDesc();
3791  if (input_desc.getTableKey() != table_key ||
3792  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
3793  continue;
3794  }
3795  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3796  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3797  CHECK_LT(static_cast<size_t>(it->second),
3798  plan_state_->global_to_local_col_ids_.size());
3799  local_col_to_frag_pos[it->second] = frag_pos;
3800  }
3801  ++frag_pos;
3802  }
3803 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define CHECK_LT(x, y)
Definition: Logger.h:303
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3760
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::buildSelectedFragsMappingForUnion ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3805 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs.

Referenced by fetchUnionChunks().

3808  {
3809  const auto& input_descs = ra_exe_unit.input_descs;
3810  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3811  // selected_fragments is set in assignFragsToKernelDispatch execution_kernel.fragments
3812  if (selected_fragments[0].table_key == input_descs[scan_idx].getTableKey()) {
3813  selected_fragments_crossjoin.push_back({size_t(1)});
3814  }
3815  }
3816 }
std::vector< InputDescriptor > input_descs

+ Here is the caller graph for this function:

FragmentSkipStatus Executor::canSkipFragmentForFpQual ( const Analyzer::BinOper comp_expr,
const Analyzer::ColumnVar lhs_col,
const Fragmenter_Namespace::FragmentInfo fragment,
const Analyzer::Constant rhs_const 
) const
private

Definition at line 4598 of file Execute.cpp.

References CHECK, shared::ColumnKey::column_id, extract_max_stat_fp_type(), extract_min_stat_fp_type(), Analyzer::Constant::get_constval(), Analyzer::BinOper::get_optype(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), Analyzer::ColumnVar::getColumnKey(), INVALID, kDOUBLE, kEQ, kFLOAT, kGE, kGT, kLE, kLT, NOT_SKIPPABLE, and SKIPPABLE.

Referenced by skipFragment().

4602  {
4603  auto col_id = lhs_col->getColumnKey().column_id;
4604  auto chunk_meta_it = fragment.getChunkMetadataMap().find(col_id);
4605  if (chunk_meta_it == fragment.getChunkMetadataMap().end()) {
4607  }
4608  double chunk_min{0.};
4609  double chunk_max{0.};
4610  const auto& chunk_type = lhs_col->get_type_info();
4611  chunk_min = extract_min_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4612  chunk_max = extract_max_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4613  if (chunk_min > chunk_max) {
4615  }
4616 
4617  const auto datum_fp = rhs_const->get_constval();
4618  const auto rhs_type = rhs_const->get_type_info().get_type();
4619  CHECK(rhs_type == kFLOAT || rhs_type == kDOUBLE);
4620 
4621  // Do we need to codegen the constant like the integer path does?
4622  const auto rhs_val = rhs_type == kFLOAT ? datum_fp.floatval : datum_fp.doubleval;
4623 
4624  // Todo: dedup the following comparison code with the integer/timestamp path, it is
4625  // slightly tricky due to do cleanly as we do not have rowid on this path
4626  switch (comp_expr->get_optype()) {
4627  case kGE:
4628  if (chunk_max < rhs_val) {
4630  }
4631  break;
4632  case kGT:
4633  if (chunk_max <= rhs_val) {
4635  }
4636  break;
4637  case kLE:
4638  if (chunk_min > rhs_val) {
4640  }
4641  break;
4642  case kLT:
4643  if (chunk_min >= rhs_val) {
4645  }
4646  break;
4647  case kEQ:
4648  if (chunk_min > rhs_val || chunk_max < rhs_val) {
4650  }
4651  break;
4652  default:
4653  break;
4654  }
4656 }
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
Definition: sqldefs.h:37
Definition: sqldefs.h:38
Definition: sqldefs.h:32
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
SQLOps get_optype() const
Definition: Analyzer.h:452
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ChunkMetadataMap & getChunkMetadataMap() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
Definition: sqldefs.h:36
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
Datum get_constval() const
Definition: Analyzer.h:348
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:35

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  value,
SQLTypeInfo const &  from_ti,
SQLTypeInfo const &  to_ti 
)
private

Definition at line 4401 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, exp_to_scale(), logger::FATAL, SQLTypeInfo::get_scale(), SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_number(), and LOG.

4403  {
4405  if (value->getType()->isIntegerTy() && from_ti.is_number() && to_ti.is_fp() &&
4406  (!from_ti.is_fp() || from_ti.get_size() != to_ti.get_size())) {
4407  llvm::Type* fp_type{nullptr};
4408  switch (to_ti.get_size()) {
4409  case 4:
4410  fp_type = llvm::Type::getFloatTy(cgen_state_->context_);
4411  break;
4412  case 8:
4413  fp_type = llvm::Type::getDoubleTy(cgen_state_->context_);
4414  break;
4415  default:
4416  LOG(FATAL) << "Unsupported FP size: " << to_ti.get_size();
4417  }
4418  value = cgen_state_->ir_builder_.CreateSIToFP(value, fp_type);
4419  if (from_ti.get_scale()) {
4420  value = cgen_state_->ir_builder_.CreateFDiv(
4421  value,
4422  llvm::ConstantFP::get(value->getType(), exp_to_scale(from_ti.get_scale())));
4423  }
4424  }
4425  return value;
4426 }
#define LOG(tag)
Definition: Logger.h:285
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
#define AUTOMATIC_IR_METADATA(CGENSTATE)
uint64_t exp_to_scale(const unsigned exp)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 4428 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, CHECK, CHECK_LT, and get_int_type().

4428  {
4430  CHECK(val->getType()->isPointerTy());
4431 
4432  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
4433  const auto val_type = val_ptr_type->getPointerElementType();
4434  size_t val_width = 0;
4435  if (val_type->isIntegerTy()) {
4436  val_width = val_type->getIntegerBitWidth();
4437  } else {
4438  if (val_type->isFloatTy()) {
4439  val_width = 32;
4440  } else {
4441  CHECK(val_type->isDoubleTy());
4442  val_width = 64;
4443  }
4444  }
4445  CHECK_LT(size_t(0), val_width);
4446  if (bitWidth == val_width) {
4447  return val;
4448  }
4449  return cgen_state_->ir_builder_.CreateBitCast(
4450  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
4451 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

bool Executor::checkCurrentQuerySession ( const std::string &  candidate_query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4991 of file Execute.cpp.

References current_query_session_.

4993  {
4994  // if current_query_session is equal to the candidate_query_session,
4995  // or it is empty session we consider
4996  return !candidate_query_session.empty() &&
4997  (current_query_session_ == candidate_query_session);
4998 }
QuerySessionId current_query_session_
Definition: Execute.h:1576
bool Executor::checkIsQuerySessionEnrolled ( const QuerySessionId query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 5265 of file Execute.cpp.

References queries_session_map_.

Referenced by executeWorkUnitImpl().

5267  {
5268  if (query_session.empty()) {
5269  return false;
5270  }
5271  return !query_session.empty() && queries_session_map_.count(query_session);
5272 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580

+ Here is the caller graph for this function:

bool Executor::checkIsQuerySessionInterrupted ( const std::string &  query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 5254 of file Execute.cpp.

References queries_interrupt_flag_.

Referenced by executePlanWithGroupBy(), executePlanWithoutGroupBy(), fetchChunks(), and fetchUnionChunks().

5256  {
5257  if (query_session.empty()) {
5258  return false;
5259  }
5260  auto flag_it = queries_interrupt_flag_.find(query_session);
5261  return !query_session.empty() && flag_it != queries_interrupt_flag_.end() &&
5262  flag_it->second;
5263 }
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578

+ Here is the caller graph for this function:

bool Executor::checkNonKernelTimeInterrupted ( ) const

Definition at line 5363 of file Execute.cpp.

References current_query_session_, executor_id_, executor_session_mutex_, queries_interrupt_flag_, and UNITARY_EXECUTOR_ID.

5363  {
5364  // this function should be called within an executor which is assigned
5365  // to the specific query thread (that indicates we already enroll the session)
5366  // check whether this is called from non unitary executor
5368  return false;
5369  };
5371  auto flag_it = queries_interrupt_flag_.find(current_query_session_);
5372  return !current_query_session_.empty() && flag_it != queries_interrupt_flag_.end() &&
5373  flag_it->second;
5374 }
QuerySessionId current_query_session_
Definition: Execute.h:1576
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1476
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
void Executor::checkPendingQueryStatus ( const QuerySessionId query_session)

Definition at line 5035 of file Execute.cpp.

References executor_session_mutex_, queries_interrupt_flag_, queries_session_map_, and VLOG.

5035  {
5036  // check whether we are okay to execute the "pending" query
5037  // i.e., before running the query check if this query session is "ALREADY" interrupted
5039  if (query_session.empty()) {
5040  return;
5041  }
5042  if (queries_interrupt_flag_.find(query_session) == queries_interrupt_flag_.end()) {
5043  // something goes wrong since we assume this is caller's responsibility
5044  // (call this function only for enrolled query session)
5045  if (!queries_session_map_.count(query_session)) {
5046  VLOG(1) << "Interrupting pending query is not available since the query session is "
5047  "not enrolled";
5048  } else {
5049  // here the query session is enrolled but the interrupt flag is not registered
5050  VLOG(1)
5051  << "Interrupting pending query is not available since its interrupt flag is "
5052  "not registered";
5053  }
5054  return;
5055  }
5056  if (queries_interrupt_flag_[query_session]) {
5057  throw QueryExecutionError(ErrorCode::INTERRUPTED);
5058  }
5059 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
std::shared_lock< T > shared_lock
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
#define VLOG(n)
Definition: Logger.h:388
void Executor::clearCaches ( bool  runtime_only = false)
void Executor::clearCardinalityCache ( )
static

Definition at line 5309 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, and recycler_mutex_.

Referenced by clearExternalCaches().

5309  {
5312  cardinality_cache_.clear();
5313  }
5314 }
std::unique_lock< T > unique_lock
static std::unordered_map< CardinalityCacheKey, size_t > cardinality_cache_
Definition: Execute.h:1607
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
bool g_use_estimator_result_cache
Definition: Execute.cpp:139

+ Here is the caller graph for this function:

static void Executor::clearExternalCaches ( bool  for_update,
const TableDescriptor td,
const int  current_db_id 
)
inlinestatic

Definition at line 438 of file Execute.h.

References clearCardinalityCache(), TableDescriptor::getTableChunkKey(), hash_value(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable(), invalidateCardinalityCacheForTable(), and TableDescriptor::tableId.

Referenced by AlterTableAlterColumnCommand::clearInMemoryData(), clearMemory(), DropForeignTableCommand::execute(), Parser::InsertIntoTableAsSelectStmt::execute(), Parser::DropTableStmt::execute(), Parser::TruncateTableStmt::execute(), Parser::OptimizeTableStmt::execute(), Parser::AddColumnStmt::execute(), Parser::DropColumnStmt::execute(), Parser::AlterTableParamStmt::execute(), Parser::CopyTableStmt::execute(), RelAlgExecutor::executeDelete(), RelAlgExecutor::executeSimpleInsert(), RelAlgExecutor::executeUpdate(), Catalog_Namespace::Catalog::invalidateCachesForTable(), foreign_storage::refresh_foreign_table_unlocked(), DBHandler::set_table_epochs(), Catalog_Namespace::Catalog::setUncappedTableEpoch(), and DBHandler::shutdown().

440  {
441  bool clearEntireCache = true;
442  if (td) {
443  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
444  if (!table_chunk_key_prefix.empty()) {
445  auto table_key = boost::hash_value(table_chunk_key_prefix);
447  if (for_update) {
449  } else {
451  }
453  clearEntireCache = false;
454  }
455  }
456  if (clearEntireCache) {
458  if (for_update) {
460  } else {
462  }
464  }
465  }
static void invalidateCachesByTable(size_t table_key)
static void invalidateCaches()
static void clearCardinalityCache()
Definition: Execute.cpp:5309
static void invalidateCardinalityCacheForTable(const shared::TableKey &table_key)
Definition: Execute.cpp:5316
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3548
std::vector< int > getTableChunkKey(const int getCurrentDBId) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 535 of file Execute.cpp.

References clearExternalCaches(), Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, execute_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

535  {
536  switch (memory_level) {
540  execute_mutex_); // Don't flush memory while queries are running
541 
542  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
543  // The hash table cache uses CPU memory not managed by the buffer manager. In the
544  // future, we should manage these allocations with the buffer manager directly.
545  // For now, assume the user wants to purge the hash table cache when they clear
546  // CPU memory (currently used in ExecuteTest to lower memory pressure)
547  // TODO: Move JoinHashTableCacheInvalidator to Executor::clearExternalCaches();
549  }
550  Executor::clearExternalCaches(true, nullptr, 0);
552  break;
553  }
554  default: {
555  throw std::runtime_error(
556  "Clearing memory levels other than the CPU level or GPU level is not "
557  "supported.");
558  }
559  }
560 }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1585
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:515
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:234
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::unique_lock< T > unique_lock
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
Definition: Execute.h:438

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 1054 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::clear(), AggregatedColRange::clear(), InputTableInfoCache::clear(), input_table_info_cache_, and table_generations_.

1054  {
1058 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1572
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1571
TableGenerations table_generations_
Definition: Execute.h:1573

+ Here is the call graph for this function:

void Executor::clearQuerySessionStatus ( const QuerySessionId query_session,
const std::string &  submitted_time_str 
)

Definition at line 5061 of file Execute.cpp.

References current_query_session_, executor_session_mutex_, invalidateRunningQuerySession(), removeFromQuerySessionList(), and resetInterrupt().

5062  {
5064  // clear the interrupt-related info for a finished query
5065  if (query_session.empty()) {
5066  return;
5067  }
5068  removeFromQuerySessionList(query_session, submitted_time_str, session_write_lock);
5069  if (query_session.compare(current_query_session_) == 0) {
5070  invalidateRunningQuerySession(session_write_lock);
5071  resetInterrupt();
5072  }
5073 }
QuerySessionId current_query_session_
Definition: Execute.h:1576
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5209
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
void resetInterrupt()
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5013

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  aggregate_state 
)
private

Definition at line 1510 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, COUNT, CodegenUtil::createPtrWithHoistedMemoryAddr(), anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, kFLOAT, and WindowFunctionContext::NUM_EXECUTION_DEVICES.

1512  {
1514  const auto pi32_type =
1515  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1516  const auto pi64_type =
1517  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1518  const auto window_func_context =
1520  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
1521  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1522  const auto aggregate_state_type =
1523  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1524  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1525  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1526  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1527  auto aggregate_state_count = CodegenUtil::createPtrWithHoistedMemoryAddr(
1528  cgen_state_.get(),
1529  code_generator,
1530  co,
1531  aggregate_state_count_i64,
1532  aggregate_state_type,
1534  .front();
1535  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1536  switch (window_func_ti.get_type()) {
1537  case kFLOAT: {
1538  return cgen_state_->emitCall(
1539  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
1540  }
1541  case kDOUBLE: {
1542  return cgen_state_->emitCall(
1543  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
1544  }
1545  case kDECIMAL: {
1546  return cgen_state_->emitCall(
1547  "load_avg_decimal",
1548  {aggregate_state,
1549  aggregate_state_count,
1550  double_null_lv,
1551  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
1552  }
1553  default: {
1554  return cgen_state_->emitCall(
1555  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
1556  }
1557  }
1558  }
1559  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1560  return cgen_state_->ir_builder_.CreateLoad(
1561  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1562  }
1563  switch (window_func_ti.get_type()) {
1564  case kFLOAT: {
1565  return cgen_state_->emitCall("load_float", {aggregate_state});
1566  }
1567  case kDOUBLE: {
1568  return cgen_state_->emitCall("load_double", {aggregate_state});
1569  }
1570  default: {
1571  return cgen_state_->ir_builder_.CreateLoad(
1572  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1573  }
1574  }
1575 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2925
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenConditionalAggregateCondValSelector ( llvm::Value *  cond_lv,
SQLAgg const  aggKind,
CompilationOptions const &  co 
) const
private

Definition at line 1577 of file WindowFunctionIR.cpp.

References CHECK, and kSUM_IF.

1580  {
1581  llvm::Value* res_cond_lv{nullptr};
1582  switch (aggKind) {
1583  case kSUM_IF:
1584  if (cond_lv->getType()->isIntegerTy(1)) {
1585  // cond_expr returns i1 type val, just need to cast to i8 type
1586  // i.e., cond_expr IS NULL
1587  res_cond_lv = cgen_state_->castToTypeIn(cond_lv, 8);
1588  } else {
1589  CHECK(cond_lv->getType()->isIntegerTy(8));
1590  // cond_expr may have null value instead of upcasted bool (i1-type) value
1591  // so we have to correctly set true condition
1592  // i.e., i8 @gt_int32_t_nullable_lhs(..., i64 -2147483648, i8 -128)
1593  // has one of the following i8-type values: 1, 0, -128
1594  auto true_cond_lv =
1595  cgen_state_->ir_builder_.CreateICmpEQ(cond_lv, cgen_state_->llInt((int8_t)1));
1596  res_cond_lv = cgen_state_->ir_builder_.CreateSelect(
1597  true_cond_lv, cgen_state_->llInt((int8_t)1), cgen_state_->llInt((int8_t)0));
1598  }
1599  break;
1600  default:
1601  break;
1602  }
1603  return res_cond_lv;
1604 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * Executor::codegenCurrentPartitionIndex ( const WindowFunctionContext window_func_context,
CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  current_row_pos_lv 
)
private

Definition at line 781 of file WindowFunctionIR.cpp.

References CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowFunctionContext::elementCount(), get_int_type(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFunction::isFrameNavigateWindowFunction(), WindowFunctionContext::NUM_EXECUTION_DEVICES, WindowFunctionContext::partitionCount(), WindowFunctionContext::partitionNumCountBuf(), and WindowFunctionContext::payload().

785  {
786  const auto pi64_type =
787  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
788  const auto pi32_type =
789  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
790  auto row_pos_lv = current_row_pos_lv;
791  if (window_func_context->getWindowFunction()->isFrameNavigateWindowFunction()) {
792  // `current_row_pos_lv` indicates the index of the current row, but to figure out
793  // it's index of window partition it belongs to, we need a special approach
794  // especially for window framing navigation function for instance, when we have
795  // five rows having two columns pc and val such as (2,1), (2,2), (2,3), (1,1),
796  // (1,2), we build a OneToMany Perfect Hash Table as: offset: 0 2 / count: 2 3 /
797  // payload: i1, i2, i3, i4, i5 where i1 ~ i3 and i4 ~ i5 are rows for partition 1
798  // (i.e., pc = 1) and 2 (i.e., prc = 2), respectively. But when processing the first
799  // row (2, 1), the original `current_row_pos_lv` stands for zero so computing which
800  // partitions it belongs to is hard unless hashing the value at runtime. Even if we
801  // do hash, we cannot know the exact hash slot unless we do binary + linear searches
802  // multiple times (via payload buffer and the ordered payload buffer) i.e., when the
803  // row (1,2) is assigned to the partition[4], we cannot find the hash slot index '4'
804  // by using `current_row_pos_lv` unless doing a costly operation like a linear
805  // search over the entire window partition Instead, we collect a hash slot that each
806  // row is assigned to and keep this info at the payload buffer
807  // `hash_slot_idx_ptr_lv` and use it for computing window frame navigation functions
808  auto* const hash_slot_idx_ptr =
809  window_func_context->payload() + window_func_context->elementCount();
810  auto hash_slot_idx_buf_lv =
811  cgen_state_->llInt(reinterpret_cast<int64_t>(hash_slot_idx_ptr));
812  auto hash_slot_idx_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
813  cgen_state_.get(),
814  code_generator,
815  co,
816  hash_slot_idx_buf_lv,
817  pi32_type,
819  .front();
820  auto hash_slot_idx_load_lv = cgen_state_->ir_builder_.CreateGEP(
821  hash_slot_idx_ptr_lv->getType()->getPointerElementType(),
822  hash_slot_idx_ptr_lv,
823  current_row_pos_lv);
824  row_pos_lv = cgen_state_->castToTypeIn(
825  cgen_state_->ir_builder_.CreateLoad(
826  hash_slot_idx_load_lv->getType()->getPointerElementType(),
827  hash_slot_idx_load_lv,
828  "cur_row_hash_slot_idx"),
829  64);
830  }
831  auto partition_count_lv = cgen_state_->llInt(window_func_context->partitionCount());
832  auto partition_num_count_buf_lv = cgen_state_->llInt(
833  reinterpret_cast<int64_t>(window_func_context->partitionNumCountBuf()));
834  auto partition_num_count_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
835  cgen_state_.get(),
836  code_generator,
837  co,
838  partition_num_count_buf_lv,
839  pi64_type,
841  .front();
842  return cgen_state_->emitCall(
843  "compute_int64_t_lower_bound",
844  {partition_count_lv, row_pos_lv, partition_num_count_ptr_lv});
845 }
bool isFrameNavigateWindowFunction() const
Definition: Analyzer.h:2979
size_t elementCount() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t partitionCount() const
static const int NUM_EXECUTION_DEVICES
const int64_t * partitionNumCountBuf() const
const Analyzer::WindowFunction * getWindowFunction() const
const int32_t * payload() const
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBound ( bool  for_start_bound,
bool  for_range_mode,
bool  for_window_frame_naviation,
const Analyzer::WindowFrame frame_bound,
bool  is_timestamp_type_frame,
llvm::Value *  order_key_null_val,
const WindowFrameBoundFuncArgs args 
)
private

Definition at line 649 of file WindowFunctionIR.cpp.

References CHECK, CURRENT_ROW, WindowFrameBoundFuncArgs::current_row_pos_lv, EXPR_FOLLOWING, EXPR_PRECEDING, WindowFrameBoundFuncArgs::frame_end_bound_expr_lv, WindowFrameBoundFuncArgs::frame_start_bound_expr_lv, Analyzer::WindowFrame::getBoundType(), WindowFrameBoundFuncArgs::int64_t_one_val_lv, WindowFrameBoundFuncArgs::int64_t_zero_val_lv, WindowFrameBoundFuncArgs::num_elem_current_partition_lv, WindowFrameBoundFuncArgs::order_type_col_name, UNBOUNDED_FOLLOWING, and UNBOUNDED_PRECEDING.

655  {
656  const auto bound_type = frame_bound->getBoundType();
657  auto adjust_frame_end_bound = [&](llvm::Value* target_bound_lv) {
658  return cgen_state_->ir_builder_.CreateSub(target_bound_lv, args.int64_t_one_val_lv);
659  };
661  CHECK(for_start_bound) << "frame end cannot be UNBOUNDED PRECEDING";
662  return args.int64_t_zero_val_lv;
663  } else if (bound_type == SqlWindowFrameBoundType::UNBOUNDED_FOLLOWING) {
664  CHECK(!for_start_bound) << "frame start cannot be UNBOUNDED FOLLOWING";
665  // adjust frame bound w.r.t the open frame interval if necessary
666  return for_window_frame_naviation
667  ? adjust_frame_end_bound(args.num_elem_current_partition_lv)
668  : args.num_elem_current_partition_lv;
669  }
670  std::vector<llvm::Value*> func_args;
671  std::string op_name =
672  bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING ? "add" : "sub";
673  if (!for_range_mode) {
674  llvm::Value* current_row_bound_expr_lv{nullptr};
675  if (for_window_frame_naviation) {
676  // we already know a current row's index in (ordered) window frame in this case
677  auto bound_expr =
678  for_start_bound ? args.frame_start_bound_expr_lv : args.frame_end_bound_expr_lv;
679  if (bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING) {
680  current_row_bound_expr_lv =
681  cgen_state_->ir_builder_.CreateAdd(args.current_row_pos_lv, bound_expr);
682  } else if (bound_type == SqlWindowFrameBoundType::EXPR_PRECEDING) {
683  current_row_bound_expr_lv =
684  cgen_state_->ir_builder_.CreateSub(args.current_row_pos_lv, bound_expr);
685  } else {
687  current_row_bound_expr_lv = args.current_row_pos_lv;
688  }
689  // adjust frame bound w.r.t the open frame interval
690  if (for_start_bound) {
691  return cgen_state_->ir_builder_.CreateSelect(
692  cgen_state_->ir_builder_.CreateICmpSLT(current_row_bound_expr_lv,
693  args.int64_t_zero_val_lv),
694  args.int64_t_zero_val_lv,
695  current_row_bound_expr_lv);
696  } else {
697  return cgen_state_->ir_builder_.CreateSelect(
698  cgen_state_->ir_builder_.CreateICmpSGE(current_row_bound_expr_lv,
700  adjust_frame_end_bound(args.num_elem_current_partition_lv),
701  current_row_bound_expr_lv);
702  }
703  } else {
704  std::string func_class = for_start_bound ? "start" : "end";
705  auto const func_name = "compute_row_mode_" + func_class + "_index_" + op_name;
706  func_args = prepareRowModeFuncArgs(for_start_bound, bound_type, args);
707  current_row_bound_expr_lv = cgen_state_->emitCall(func_name, func_args);
708  }
709  return current_row_bound_expr_lv;
710  } else {
711  std::string func_class = for_start_bound ? "lower" : "upper";
712  auto const func_name = getFramingFuncName(
713  func_class,
714  args.order_type_col_name,
715  op_name,
716  bound_type != SqlWindowFrameBoundType::CURRENT_ROW && is_timestamp_type_frame);
717  func_args = prepareRangeModeFuncArgs(
718  for_start_bound, frame_bound, is_timestamp_type_frame, order_key_null_val, args);
719  auto frame_bound_lv = cgen_state_->emitCall(func_name, func_args);
720  if (!for_start_bound && for_window_frame_naviation) {
721  // adjust frame end bound w.r.t the open frame interval
722  frame_bound_lv = cgen_state_->ir_builder_.CreateSelect(
723  cgen_state_->ir_builder_.CreateICmpSGE(frame_bound_lv,
725  adjust_frame_end_bound(args.num_elem_current_partition_lv),
726  frame_bound_lv);
727  }
728  return frame_bound_lv;
729  }
730 }
llvm::Value * num_elem_current_partition_lv
llvm::Value * current_row_pos_lv
llvm::Value * frame_end_bound_expr_lv
std::string getFramingFuncName(const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
SqlWindowFrameBoundType getBoundType() const
Definition: Analyzer.h:2826
std::vector< llvm::Value * > prepareRangeModeFuncArgs(bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * int64_t_zero_val_lv
llvm::Value * int64_t_one_val_lv
llvm::Value * frame_start_bound_expr_lv
std::string order_type_col_name
std::vector< llvm::Value * > prepareRowModeFuncArgs(bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBoundExpr ( const Analyzer::WindowFunction window_func,
const Analyzer::WindowFrame frame_bound,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 598 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegen(), EXPR_FOLLOWING, EXPR_PRECEDING, g_cluster, SQLTypeInfo::get_size(), Analyzer::Expr::get_type_info(), Analyzer::WindowFrame::getBoundExpr(), Analyzer::WindowFunction::getOrderKeys(), Analyzer::WindowFunction::hasRangeModeFraming(), kBIGINT, kINT, and kSMALLINT.

601  {
602  auto needs_bound_expr_codegen = [](const Analyzer::WindowFrame* window_frame) {
603  return window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_FOLLOWING ||
604  window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_PRECEDING;
605  };
606  const auto order_col_ti = window_func->getOrderKeys().front()->get_type_info();
607  auto encode_date_col_val = [&order_col_ti, this](llvm::Value* bound_expr_lv) {
608  if (order_col_ti.get_comp_param() == 16) {
609  return cgen_state_->emitCall(
610  "fixed_width_date_encode_noinline",
611  {bound_expr_lv,
612  cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(SQLTypeInfo(kSMALLINT)),
613  32),
614  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
615  } else {
616  return cgen_state_->emitCall("fixed_width_date_encode_noinline",
617  {bound_expr_lv,
618  cgen_state_->inlineIntNull(SQLTypeInfo(kINT)),
619  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
620  }
621  };
622  llvm::Value* bound_expr_lv{nullptr};
623  if (needs_bound_expr_codegen(frame_bound)) {
624  auto bound_expr = frame_bound->getBoundExpr();
625  if (auto dateadd_expr = dynamic_cast<const Analyzer::DateaddExpr*>(bound_expr)) {
626  if (dateadd_expr->get_datetime_expr()->get_type_info().is_encoded_timestamp()) {
627  dateadd_expr->set_fixed_encoding_null_val();
628  }
629  }
630  auto bound_expr_lvs = code_generator.codegen(bound_expr, true, co);
631  bound_expr_lv = bound_expr_lvs.front();
632  if (order_col_ti.is_date() && window_func->hasRangeModeFraming()) {
633  if (g_cluster) {
634  throw std::runtime_error(
635  "Range mode with date type ordering column is not supported yet.");
636  }
637  bound_expr_lv = encode_date_col_val(bound_expr_lv);
638  }
639  if (frame_bound->getBoundExpr()->get_type_info().get_size() != 8) {
640  bound_expr_lv = cgen_state_->castToTypeIn(bound_expr_lv, 64);
641  }
642  } else {
643  bound_expr_lv = cgen_state_->llInt((int64_t)-1);
644  }
645  CHECK(bound_expr_lv);
646  return bound_expr_lv;
647 }
bool hasRangeModeFraming() const
Definition: Analyzer.h:2959
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2933
const Analyzer::Expr * getBoundExpr() const
Definition: Analyzer.h:2828
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK(condition)
Definition: Logger.h:291
bool g_cluster
Definition: sqltypes.h:72

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenFrameBoundRange ( const Analyzer::WindowFunction window_func,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 1065 of file WindowFunctionIR.cpp.

References CHECK, Analyzer::WindowFunction::getFrameEndBound(), and Analyzer::WindowFunction::getFrameStartBound().

1068  {
1069  const auto frame_start_bound = window_func->getFrameStartBound();
1070  const auto frame_end_bound = window_func->getFrameEndBound();
1071  auto frame_start_bound_expr_lv =
1072  codegenFrameBoundExpr(window_func, frame_start_bound, code_generator, co);
1073  auto frame_end_bound_expr_lv =
1074  codegenFrameBoundExpr(window_func, frame_end_bound, code_generator, co);
1075  CHECK(frame_start_bound_expr_lv);
1076  CHECK(frame_end_bound_expr_lv);
1077  return std::make_pair(frame_start_bound_expr_lv, frame_end_bound_expr_lv);
1078 }
const Analyzer::WindowFrame * getFrameStartBound() const
Definition: Analyzer.h:2937
const Analyzer::WindowFrame * getFrameEndBound() const
Definition: Analyzer.h:2944
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenFrameNullRange ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  partition_index_lv 
) const
private

Definition at line 904 of file WindowFunctionIR.cpp.

References CodegenUtil::createPtrWithHoistedMemoryAddr(), get_int_type(), WindowFunctionContext::getNullValueEndPos(), WindowFunctionContext::getNullValueStartPos(), and WindowFunctionContext::NUM_EXECUTION_DEVICES.

908  {
909  const auto pi64_type =
910  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
911  const auto null_start_pos_buf = cgen_state_->llInt(
912  reinterpret_cast<int64_t>(window_func_context->getNullValueStartPos()));
913  const auto null_start_pos_buf_ptr = CodegenUtil::createPtrWithHoistedMemoryAddr(
914  cgen_state_.get(),
915  code_generator,
916  co,
917  null_start_pos_buf,
918  pi64_type,
920  .front();
921  const auto null_start_pos_ptr =
922  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
923  null_start_pos_buf_ptr,
924  partition_index_lv);
925  auto null_start_pos_lv = cgen_state_->ir_builder_.CreateLoad(
926  null_start_pos_ptr->getType()->getPointerElementType(),
927  null_start_pos_ptr,
928  "null_start_pos");
929  const auto null_end_pos_buf = cgen_state_->llInt(
930  reinterpret_cast<int64_t>(window_func_context->getNullValueEndPos()));
931  const auto null_end_pos_buf_ptr = CodegenUtil::createPtrWithHoistedMemoryAddr(
932  cgen_state_.get(),
933  code_generator,
934  co,
935  null_end_pos_buf,
936  pi64_type,
938  .front();
939  const auto null_end_pos_ptr = cgen_state_->ir_builder_.CreateGEP(
940  get_int_type(64, cgen_state_->context_), null_end_pos_buf_ptr, partition_index_lv);
941  auto null_end_pos_lv = cgen_state_->ir_builder_.CreateLoad(
942  null_end_pos_ptr->getType()->getPointerElementType(),
943  null_end_pos_ptr,
944  "null_end_pos");
945  return std::make_pair(null_start_pos_lv, null_end_pos_lv);
946 }
int64_t * getNullValueEndPos() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
int64_t * getNullValueStartPos() const
static const int NUM_EXECUTION_DEVICES
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 1204 of file IRCodegen.cpp.

References ExecutionOptions::allow_runtime_query_interrupt, anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, JoinLoop::codegen(), CompilationOptions::device_type, JoinLoopDomain::element_count, get_int_array_type(), get_int_type(), INNER, MultiSet, CodeGenerator::posArg(), GroupByAndAggregate::query_infos_, query_mem_desc, Set, and ExecutionOptions::with_dynamic_watchdog.

1211  {
1213  const auto exit_bb =
1214  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->current_func_);
1215  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
1216  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1217  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1218  CodeGenerator code_generator(this);
1219 
1220  llvm::BasicBlock* loops_entry_bb{nullptr};
1221  auto has_range_join =
1222  std::any_of(join_loops.begin(), join_loops.end(), [](const auto& join_loop) {
1223  return join_loop.kind() == JoinLoopKind::MultiSet;
1224  });
1225  if (has_range_join) {
1226  CHECK_EQ(join_loops.size(), size_t(1));
1227  const auto element_count =
1228  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 9);
1229 
1230  auto compute_packed_offset = [](const int32_t x, const int32_t y) -> uint64_t {
1231  const uint64_t y_shifted = static_cast<uint64_t>(y) << 32;
1232  return y_shifted | static_cast<uint32_t>(x);
1233  };
1234 
1235  const auto values_arr = std::vector<llvm::Constant*>{
1236  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
1237  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1238  compute_packed_offset(0, 1)),
1239  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1240  compute_packed_offset(0, -1)),
1241  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1242  compute_packed_offset(1, 0)),
1243  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1244  compute_packed_offset(1, 1)),
1245  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1246  compute_packed_offset(1, -1)),
1247  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1248  compute_packed_offset(-1, 0)),
1249  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1250  compute_packed_offset(-1, 1)),
1251  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1252  compute_packed_offset(-1, -1))};
1253 
1254  const auto constant_values_array = llvm::ConstantArray::get(
1255  get_int_array_type(64, 9, cgen_state_->context_), values_arr);
1256  CHECK(cgen_state_->module_);
1257  const auto values =
1258  new llvm::GlobalVariable(*cgen_state_->module_,
1259  get_int_array_type(64, 9, cgen_state_->context_),
1260  true,
1261  llvm::GlobalValue::LinkageTypes::InternalLinkage,
1262  constant_values_array);
1263  JoinLoop join_loop(
1266  [element_count, values](const std::vector<llvm::Value*>& v) {
1267  JoinLoopDomain domain{{0}};
1268  domain.element_count = element_count;
1269  domain.values_buffer = values;
1270  return domain;
1271  },
1272  nullptr,
1273  nullptr,
1274  nullptr,
1275  nullptr,
1276  "range_key_loop");
1277 
1278  loops_entry_bb = JoinLoop::codegen(
1279  {join_loop},
1280  [this,
1281  query_func,
1282  &query_mem_desc,
1283  &co,
1284  &eo,
1285  &group_by_and_aggregate,
1286  &join_loops,
1287  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1288  auto& builder = cgen_state_->ir_builder_;
1289 
1290  auto body_exit_bb =
1291  llvm::BasicBlock::Create(cgen_state_->context_,
1292  "range_key_inner_body_exit",
1293  builder.GetInsertBlock()->getParent());
1294 
1295  auto range_key_body_bb =
1296  llvm::BasicBlock::Create(cgen_state_->context_,
1297  "range_key_loop_body",
1298  builder.GetInsertBlock()->getParent());
1299  builder.SetInsertPoint(range_key_body_bb);
1300 
1301  const auto body_loops_entry_bb = JoinLoop::codegen(
1302  join_loops,
1303  [this,
1304  query_func,
1305  &query_mem_desc,
1306  &co,
1307  &eo,
1308  &group_by_and_aggregate,
1309  &join_loops,
1310  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1311  addJoinLoopIterator(prev_iters, join_loops.size());
1312  auto& builder = cgen_state_->ir_builder_;
1313  const auto loop_body_bb =
1314  llvm::BasicBlock::Create(builder.getContext(),
1315  "loop_body",
1316  builder.GetInsertBlock()->getParent());
1317  builder.SetInsertPoint(loop_body_bb);
1318  const bool can_return_error =
1319  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1320  if (can_return_error || cgen_state_->needs_error_check_ ||
1321  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1322  createErrorCheckControlFlow(query_func,
1323  eo.with_dynamic_watchdog,
1324  eo.allow_runtime_query_interrupt,
1325  join_loops,
1326  co.device_type,
1327  group_by_and_aggregate.query_infos_);
1328  }
1329  return loop_body_bb;
1330  },
1331  prev_iters.back(),
1332  body_exit_bb,
1333  cgen_state_.get());
1334 
1335  builder.SetInsertPoint(range_key_body_bb);
1336  cgen_state_->ir_builder_.CreateBr(body_loops_entry_bb);
1337 
1338  builder.SetInsertPoint(body_exit_bb);
1339  return range_key_body_bb;
1340  },
1341  code_generator.posArg(nullptr),
1342  exit_bb,
1343  cgen_state_.get());
1344  } else {
1345  loops_entry_bb = JoinLoop::codegen(
1346  join_loops,
1347  /*body_codegen=*/
1348  [this,
1349  query_func,
1350  &query_mem_desc,
1351  &co,
1352  &eo,
1353  &group_by_and_aggregate,
1354  &join_loops,
1355  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1357  addJoinLoopIterator(prev_iters, join_loops.size());
1358  auto& builder = cgen_state_->ir_builder_;
1359  const auto loop_body_bb = llvm::BasicBlock::Create(
1360  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
1361  builder.SetInsertPoint(loop_body_bb);
1362  const bool can_return_error =
1363  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1364  if (can_return_error || cgen_state_->needs_error_check_ ||
1365  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1366  createErrorCheckControlFlow(query_func,
1367  eo.with_dynamic_watchdog,
1368  eo.allow_runtime_query_interrupt,
1369  join_loops,
1370  co.device_type,
1371  group_by_and_aggregate.query_infos_);
1372  }
1373  return loop_body_bb;
1374  },
1375  /*outer_iter=*/code_generator.posArg(nullptr),
1376  exit_bb,
1377  cgen_state_.get());
1378  }
1379  CHECK(loops_entry_bb);
1380  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1381  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
1382 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
llvm::Value * element_count
Definition: JoinLoop.h:46
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, CgenState *cgen_state)
Definition: JoinLoop.cpp:50
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1186
#define CHECK(condition)
Definition: Logger.h:291
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenLoadCurrentValueFromColBuf ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
WindowFrameBoundFuncArgs args 
) const
private

Definition at line 753 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegenWindowPosition(), WindowFrameBoundFuncArgs::current_row_pos_lv, get_fp_type(), get_int_type(), Analyzer::WindowFunction::getOrderKeys(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFunction::isFrameNavigateWindowFunction(), and WindowFrameBoundFuncArgs::order_key_buf_ptr_lv.

756  {
757  llvm::Value* current_col_value_ptr_lv{nullptr};
758  const auto order_key_size_in_byte = getOrderKeySize(window_func_context) * 8;
759  auto const order_key_ptr =
760  window_func_context->getWindowFunction()->getOrderKeys().front();
761  CHECK(order_key_ptr);
762  auto const order_col_ti = order_key_ptr->get_type_info();
763  auto const order_col_llvm_type =
764  order_col_ti.is_fp() ? get_fp_type(order_key_size_in_byte, cgen_state_->context_)
765  : get_int_type(order_key_size_in_byte, cgen_state_->context_);
766  if (!window_func_context->getWindowFunction()->isFrameNavigateWindowFunction()) {
767  auto rowid_in_partition_lv = code_generator.codegenWindowPosition(
768  window_func_context, args.current_row_pos_lv);
769  current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
770  order_col_llvm_type, args.order_key_buf_ptr_lv, rowid_in_partition_lv);
771  } else {
772  current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
773  order_col_llvm_type, args.order_key_buf_ptr_lv, args.current_row_pos_lv);
774  }
775  return cgen_state_->ir_builder_.CreateLoad(
776  current_col_value_ptr_lv->getType()->getPointerElementType(),
777  current_col_value_ptr_lv,
778  "current_col_value");
779 }
bool isFrameNavigateWindowFunction() const
Definition: Analyzer.h:2979
llvm::Value * current_row_pos_lv
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2933
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:235
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
llvm::Value * order_key_buf_ptr_lv
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const
size_t getOrderKeySize(WindowFunctionContext *window_func_context) const

+ Here is the call graph for this function:

std::pair< std::string, llvm::Value * > Executor::codegenLoadOrderKeyBufPtr ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
const CompilationOptions co 
) const
private

Definition at line 948 of file WindowFunctionIR.cpp.

References CodegenUtil::createPtrWithHoistedMemoryAddr(), anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getOrderKeyColumnBufferTypes(), Analyzer::WindowFunction::getOrderKeys(), WindowFunctionContext::getWindowFunction(), and WindowFunctionContext::NUM_EXECUTION_DEVICES.

951  {
952  auto const order_key_ti =
953  window_func_context->getWindowFunction()->getOrderKeys().front()->get_type_info();
954  auto const order_key_size = order_key_ti.get_size();
955  auto const order_col_type_name = get_col_type_name_by_size(
956  order_key_size,
957  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
958  size_t order_key_size_in_byte = order_key_size * 8;
959  auto const order_key_type =
960  order_key_ti.is_fp() ? get_fp_type(order_key_size_in_byte, cgen_state_->context_)
961  : get_int_type(order_key_size_in_byte, cgen_state_->context_);
962  auto const order_key_buf_type = llvm::PointerType::get(order_key_type, 0);
963  auto const order_key_buf = cgen_state_->llInt(
964  reinterpret_cast<int64_t>(window_func_context->getOrderKeyColumnBuffers().front()));
965  auto const order_key_buf_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
966  cgen_state_.get(),
967  code_generator,
968  co,
969  order_key_buf,
970  order_key_buf_type,
972  .front();
973  return std::make_pair(order_col_type_name, order_key_buf_ptr_lv);
974 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2933
static const int NUM_EXECUTION_DEVICES
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
const Analyzer::WindowFunction * getWindowFunction() const
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

WindowPartitionBufferPtrs Executor::codegenLoadPartitionBuffers ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  partition_index_lv 
) const
private

Definition at line 976 of file WindowFunctionIR.cpp.

References WindowFunctionContext::counts(), CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowPartitionBufferPtrs::current_partition_start_offset_lv, get_int_type(), WindowPartitionBufferPtrs::num_elem_current_partition_lv, WindowFunctionContext::NUM_EXECUTION_DEVICES, WindowFunctionContext::partitionStartOffset(), WindowFunctionContext::payload(), WindowFunctionContext::sortedPartition(), WindowPartitionBufferPtrs::target_partition_rowid_ptr_lv, and WindowPartitionBufferPtrs::target_partition_sorted_rowid_ptr_lv.

980  {
981  WindowPartitionBufferPtrs bufferPtrs;
982  const auto pi64_type =
983  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
984  const auto pi32_type =
985  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
986 
987  // partial sum of # elems of partitions
988  auto partition_start_offset_buf_lv = cgen_state_->llInt(
989  reinterpret_cast<int64_t>(window_func_context->partitionStartOffset()));
990  auto partition_start_offset_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
991  cgen_state_.get(),
992  code_generator,
993  co,
994  partition_start_offset_buf_lv,
995  pi64_type,
997  .front();
998 
999  // get start offset of the current partition
1000  auto current_partition_start_offset_ptr_lv =
1001  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
1002  partition_start_offset_ptr_lv,
1003  partition_index_lv);
1004  bufferPtrs.current_partition_start_offset_lv = cgen_state_->ir_builder_.CreateLoad(
1005  current_partition_start_offset_ptr_lv->getType()->getPointerElementType(),
1006  current_partition_start_offset_ptr_lv);
1007 
1008  // row_id buf of the current partition
1009  const auto partition_rowid_buf_lv =
1010  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->payload()));
1011  const auto partition_rowid_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
1012  cgen_state_.get(),
1013  code_generator,
1014  co,
1015  partition_rowid_buf_lv,
1016  pi32_type,
1018  .front();
1019  bufferPtrs.target_partition_rowid_ptr_lv =
1020  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
1021  partition_rowid_ptr_lv,
1023 
1024  // row_id buf of ordered current partition
1025  const auto sorted_rowid_lv = cgen_state_->llInt(
1026  reinterpret_cast<int64_t>(window_func_context->sortedPartition()));
1027  const auto sorted_rowid_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
1028  cgen_state_.get(),
1029  code_generator,
1030  co,
1031  sorted_rowid_lv,
1032  pi64_type,
1034  .front();
1036  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
1037  sorted_rowid_ptr_lv,
1039 
1040  // # elems per partition
1041  const auto partition_count_buf =
1042  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->counts()));
1043  auto partition_count_buf_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
1044  cgen_state_.get(),
1045  code_generator,
1046  co,
1047  partition_count_buf,
1048  pi32_type,
1050  .front();
1051 
1052  // # elems of the given partition
1053  const auto num_elem_current_partition_ptr =
1054  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
1055  partition_count_buf_ptr_lv,
1056  partition_index_lv);
1057  bufferPtrs.num_elem_current_partition_lv = cgen_state_->castToTypeIn(
1058  cgen_state_->ir_builder_.CreateLoad(
1059  num_elem_current_partition_ptr->getType()->getPointerElementType(),
1060  num_elem_current_partition_ptr),
1061  64);
1062  return bufferPtrs;
1063 }
llvm::Value * current_partition_start_offset_lv
llvm::Value * num_elem_current_partition_lv
const int32_t * counts() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Value * target_partition_sorted_rowid_ptr_lv
llvm::Value * target_partition_rowid_ptr_lv
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const int64_t * partitionStartOffset() const
static const int NUM_EXECUTION_DEVICES
const int64_t * sortedPartition() const
const int32_t * payload() const
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3311 of file NativeCodegen.cpp.

3313  {
3315  if (!co.filter_on_deleted_column) {
3316  return nullptr;
3317  }
3318  CHECK(!ra_exe_unit.input_descs.empty());
3319  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
3320  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
3321  return nullptr;
3322  }
3323  const auto& table_key = outer_input_desc.getTableKey();
3324  const auto deleted_cd = plan_state_->getDeletedColForTable(table_key);
3325  if (!deleted_cd) {
3326  return nullptr;
3327  }
3328  CHECK(deleted_cd->columnType.is_boolean());
3329  const auto deleted_expr =
3330  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
3331  shared::ColumnKey{table_key, deleted_cd->columnId},
3332  outer_input_desc.getNestLevel());
3333  CodeGenerator code_generator(this);
3334  const auto is_deleted =
3335  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
3336  const auto is_deleted_bb = llvm::BasicBlock::Create(
3337  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
3338  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
3339  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
3340  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
3341  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
3342  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3343  cgen_state_->ir_builder_.SetInsertPoint(bb);
3344  return bb;
3345 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
void Executor::codegenWindowAvgEpilogue ( CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  crt_val,
llvm::Value *  window_func_null_val 
)
private

Definition at line 1466 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, CodegenUtil::createPtrWithHoistedMemoryAddr(), anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, kFLOAT, and WindowFunctionContext::NUM_EXECUTION_DEVICES.

1469  {
1471  const auto window_func_context =
1473  const auto window_func = window_func_context->getWindowFunction();
1474  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1475  const auto pi32_type =
1476  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1477  const auto pi64_type =
1478  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1479  const auto aggregate_state_type =
1480  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1481  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1482  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1483  auto aggregate_state_count = CodegenUtil::createPtrWithHoistedMemoryAddr(
1484  cgen_state_.get(),
1485  code_generator,
1486  co,
1487  aggregate_state_count_i64,
1488  aggregate_state_type,
1490  .front();
1491  std::string agg_count_func_name = "agg_count";
1492  switch (window_func_ti.get_type()) {
1493  case kFLOAT: {
1494  agg_count_func_name += "_float";
1495  break;
1496  }
1497  case kDOUBLE: {
1498  agg_count_func_name += "_double";
1499  break;
1500  }
1501  default: {
1502  break;
1503  }
1504  }
1505  agg_count_func_name += "_skip_val";
1506  cgen_state_->emitCall(agg_count_func_name,
1507  {aggregate_state_count, crt_val, window_func_null_val});
1508 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenWindowFrameBounds ( WindowFunctionContext window_func_context,
const Analyzer::WindowFrame frame_start_bound,
const Analyzer::WindowFrame frame_end_bound,
llvm::Value *  order_key_col_null_val_lv,
WindowFrameBoundFuncArgs args,
CodeGenerator code_generator 
)
private

Definition at line 1080 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, CHECK, WindowFrameBoundFuncArgs::current_col_value_lv, WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFrame::hasTimestampTypeFrameBound(), and WindowFrameBoundFuncArgs::order_type_col_name.

1086  {
1087  const auto window_func = window_func_context->getWindowFunction();
1088  CHECK(window_func);
1089  const auto is_timestamp_type_frame = frame_start_bound->hasTimestampTypeFrameBound() ||
1090  frame_end_bound->hasTimestampTypeFrameBound();
1091 
1092  if (window_func->hasRangeModeFraming()) {
1093  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1);
1094  CHECK(window_func->getOrderKeys().size() == 1UL);
1095  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1UL);
1096  args.order_type_col_name = getOrderKeyTypeName(window_func_context);
1097  args.current_col_value_lv =
1098  codegenLoadCurrentValueFromColBuf(window_func_context, code_generator, args);
1099  }
1100 
1101  auto get_order_key_null_val = [is_timestamp_type_frame,
1102  &order_key_col_null_val_lv,
1103  this](const Analyzer::WindowFrame* frame_bound) {
1104  return is_timestamp_type_frame && !frame_bound->isCurrentRowBound()
1105  ? cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64)
1106  : order_key_col_null_val_lv;
1107  };
1108  auto frame_start_bound_lv =
1109  codegenFrameBound(true,
1110  window_func->hasRangeModeFraming(),
1111  window_func->isFrameNavigateWindowFunction(),
1112  frame_start_bound,
1113  is_timestamp_type_frame,
1114  get_order_key_null_val(frame_start_bound),
1115  args);
1116  auto frame_end_bound_lv =
1117  codegenFrameBound(false,
1118  window_func->hasRangeModeFraming(),
1119  window_func->isFrameNavigateWindowFunction(),
1120  frame_end_bound,
1121  is_timestamp_type_frame,
1122  get_order_key_null_val(frame_end_bound),
1123  args);
1124  CHECK(frame_start_bound_lv);
1125  CHECK(frame_end_bound_lv);
1126  return std::make_pair(frame_start_bound_lv, frame_end_bound_lv);
1127 }
bool hasTimestampTypeFrameBound() const
Definition: Analyzer.h:2833
llvm::Value * current_col_value_lv
llvm::Value * codegenFrameBound(bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Value * codegenLoadCurrentValueFromColBuf(WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
const std::string getOrderKeyTypeName(WindowFunctionContext *window_func_context) const
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const
std::string order_type_col_name

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 22 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, BACKWARD_FILL, CHECK, CHECK_EQ, CONDITIONAL_CHANGE_EVENT, COUNT, COUNT_IF, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, FIRST_VALUE_IN_FRAME, FORWARD_FILL, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAG_IN_FRAME, LAST_VALUE, LAST_VALUE_IN_FRAME, LEAD, LEAD_IN_FRAME, LOG, MAX, MIN, NTH_VALUE, NTH_VALUE_IN_FRAME, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, SUM, and SUM_IF.

23  {
25  CodeGenerator code_generator(this);
26 
27  const auto window_func_context =
29  target_index);
30  const auto window_func = window_func_context->getWindowFunction();
31  switch (window_func->getKind()) {
36  return code_generator.codegenWindowPosition(window_func_context,
37  code_generator.posArg(nullptr));
40  return cgen_state_->emitCall("percent_window_func",
41  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
42  window_func_context->output())),
43  code_generator.posArg(nullptr)});
49  // they are always evaluated on the current frame
51  const auto& args = window_func->getArgs();
52  CHECK(!args.empty());
53  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
54  CHECK_EQ(arg_lvs.size(), size_t(1));
55  return arg_lvs.front();
56  }
65  return codegenWindowFunctionAggregate(&code_generator, co);
74  default:
75  LOG(FATAL) << "Invalid window function kind";
76  }
77  return nullptr;
78 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LOG(tag)
Definition: Logger.h:285
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
static const WindowProjectNodeContext * get(Executor *executor)
const WindowFunctionContext * activateWindowFunctionContext(Executor *executor, const size_t target_index) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * codegenWindowFunctionAggregate(CodeGenerator *code_generator, const CompilationOptions &co)
llvm::Value * codegenWindowNavigationFunctionOnFrame(const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 265 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, CHECK, CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowProjectNodeContext::get(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and WindowFunctionContext::NUM_EXECUTION_DEVICES.

266  {
268  auto [reset_state_false_bb, aggregate_state] =
269  codegenWindowResetStateControlFlow(code_generator, co);
270  llvm::Value* aggregate_state_count = nullptr;
271  const auto window_func_context =
273  const auto window_func = window_func_context->getWindowFunction();
274  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
275  const auto aggregate_state_count_i64 = cgen_state_->llInt(
276  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
277  const auto pi64_type =
278  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
279  aggregate_state_count = CodegenUtil::createPtrWithHoistedMemoryAddr(
280  cgen_state_.get(),
281  code_generator,
282  co,
283  aggregate_state_count_i64,
284  pi64_type,
286  .front();
287  }
288  codegenWindowFunctionStateInit(code_generator, co, aggregate_state);
289  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
290  const auto count_zero = cgen_state_->llInt(int64_t(0));
291  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
292  }
293  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
294  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
296  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
297 }
void codegenWindowFunctionStateInit(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
static const WindowProjectNodeContext * get(Executor *executor)
std::pair< llvm::BasicBlock *, llvm::Value * > codegenWindowResetStateControlFlow(CodeGenerator *code_generator, const CompilationOptions &co)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
#define CHECK(condition)
Definition: Logger.h:291
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 1129 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, COUNT_IF, CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowFrameBoundFuncArgs::current_partition_start_offset_lv, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), inline_fixed_encoding_null_val(), kDATE, kDOUBLE, kENCODING_DATE_IN_DAYS, kENCODING_FIXED, kFLOAT, kSUM_IF, kTIME, kTIMESTAMP, kTINYINT, MAX, MIN, WindowFunctionContext::NUM_EXECUTION_DEVICES, CodeGenerator::posArg(), SUM, SUM_IF, and window_function_conditional_aggregate().

1130  {
1132  const auto window_func_context =
1134  const auto window_func = window_func_context->getWindowFunction();
1135  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1136  const auto window_func_null_val =
1137  window_func_ti.is_fp()
1138  ? cgen_state_->inlineFpNull(window_func_ti)
1139  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
1140  if (window_func_context->elementCount() == 0) {
1141  // we do not need to generate a code for an empty input table
1142  return window_func->getKind() == SqlWindowFunctionKind::AVG
1143  ? cgen_state_->inlineFpNull(SQLTypeInfo(SQLTypes::kDOUBLE))
1144  : window_func_null_val;
1145  }
1146  const auto& args = window_func->getArgs();
1147  CodeGenerator code_generator(this);
1148  if (window_func_context->needsToBuildAggregateTree()) {
1149  // compute an aggregated value for each row of the window frame by using segment
1150  // tree when constructing a window context, we build a necessary segment tree (so
1151  // called `aggregate tree`) to query the aggregated value of the specific window
1152  // frame
1153  const auto pi64_type =
1154  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1155  const auto ppi64_type = llvm::PointerType::get(
1156  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0), 0);
1157 
1158  auto [frame_start_bound_expr_lv, frame_end_bound_expr_lv] =
1159  codegenFrameBoundRange(window_func, code_generator, co);
1160 
1161  // compute aggregated value over the computed frame range
1162  auto current_row_pos_lv = code_generator.posArg(nullptr);
1163  auto partition_index_lv = codegenCurrentPartitionIndex(
1164  window_func_context, &code_generator, co, current_row_pos_lv);
1165 
1166  // ordering column buffer
1167  const auto target_col_ti = args.front()->get_type_info();
1168  const auto target_col_size = target_col_ti.get_size();
1169  const auto col_type_name =
1170  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
1171 
1172  const auto partition_buf_ptrs = codegenLoadPartitionBuffers(
1173  window_func_context, &code_generator, co, partition_index_lv);
1174 
1175  auto [order_col_type_name, order_key_buf_ptr_lv] =
1176  codegenLoadOrderKeyBufPtr(window_func_context, &code_generator, co);
1177 
1178  // null value of the ordering column
1179  const auto order_key_buf_ti =
1180  window_func_context->getOrderKeyColumnBufferTypes().front();
1181  auto const ordering_spec = window_func->getCollation().front();
1182  llvm::Value* order_key_col_null_val_lv{nullptr};
1183  switch (order_key_buf_ti.get_type()) {
1184  case kDATE:
1185  case kTIMESTAMP:
1186  case kTIME: {
1187  if (order_key_buf_ti.get_compression() == kENCODING_FIXED ||
1188  order_key_buf_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
1189  auto null_val = inline_fixed_encoding_null_val(order_key_buf_ti);
1190  order_key_col_null_val_lv = cgen_state_->llInt((int32_t)null_val);
1191  break;
1192  }
1193  }
1194  default: {
1195  order_key_col_null_val_lv = cgen_state_->inlineNull(order_key_buf_ti);
1196  break;
1197  }
1198  }
1199 
1200  auto [null_start_pos_lv, null_end_pos_lv] = codegenFrameNullRange(
1201  window_func_context, &code_generator, co, partition_index_lv);
1202  auto nulls_first_lv = cgen_state_->llBool(ordering_spec.nulls_first);
1203 
1205  frame_start_bound_expr_lv,
1206  frame_end_bound_expr_lv,
1207  current_row_pos_lv,
1208  nullptr,
1209  partition_buf_ptrs.current_partition_start_offset_lv,
1210  cgen_state_->llInt((int64_t)0),
1211  cgen_state_->llInt((int64_t)1),
1212  partition_buf_ptrs.num_elem_current_partition_lv,
1213  order_key_buf_ptr_lv,
1214  "",
1215  partition_buf_ptrs.target_partition_rowid_ptr_lv,
1216  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
1217  nulls_first_lv,
1218  null_start_pos_lv,
1219  null_end_pos_lv};
1220  auto [frame_start_bound_lv, frame_end_bound_lv] =
1221  codegenWindowFrameBounds(window_func_context,
1222  window_func->getFrameStartBound(),
1223  window_func->getFrameEndBound(),
1224  order_key_col_null_val_lv,
1226  code_generator);
1227 
1228  // codegen to send a query with frame bound to aggregate tree searcher
1229  llvm::ConstantInt* aggregation_trees_lv{nullptr};
1230  llvm::Value* invalid_val_lv{nullptr};
1231  llvm::Value* null_val_lv{nullptr};
1232  std::string aggregation_tree_search_func_name{"search_"};
1233  std::string aggregation_tree_getter_func_name{"get_"};
1234 
1235  // prepare null values and aggregate_tree getter and searcher depending on
1236  // a type of the ordering column
1237  auto agg_expr_ti = args.front()->get_type_info();
1238  if (agg_expr_ti.is_fp()) {
1239  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1240  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::max());
1241  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1242  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::lowest());
1243  } else {
1244  invalid_val_lv = cgen_state_->llFp((double)0);
1245  }
1246  null_val_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1247  aggregation_tree_search_func_name += "double";
1248  aggregation_tree_getter_func_name += "double";
1249  } else {
1250  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1251  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::max());
1252  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1253  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::lowest());
1254  } else {
1255  invalid_val_lv = cgen_state_->llInt((int64_t)0);
1256  }
1257  null_val_lv = cgen_state_->llInt(inline_int_null_value<int64_t>());
1258  aggregation_tree_search_func_name += "int64_t";
1259  aggregation_tree_getter_func_name += "integer";
1260  }
1261 
1262  // derived aggregation has a different code path
1263  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1264  aggregation_tree_search_func_name += "_derived";
1265  aggregation_tree_getter_func_name += "_derived";
1266  }
1267 
1268  // get a buffer holding aggregate trees for each partition
1269  if (agg_expr_ti.is_fp()) {
1270  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1271  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1272  window_func_context->getDerivedAggregationTreesForDoubleTypeWindowExpr()));
1273  } else {
1274  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1275  window_func_context->getAggregationTreesForDoubleTypeWindowExpr()));
1276  }
1277  } else {
1278  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1279  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1280  window_func_context->getDerivedAggregationTreesForIntegerTypeWindowExpr()));
1281  } else {
1282  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1283  window_func_context->getAggregationTreesForIntegerTypeWindowExpr()));
1284  }
1285  }
1286 
1287  CHECK(aggregation_trees_lv);
1288  CHECK(invalid_val_lv);
1289  aggregation_tree_search_func_name += "_aggregation_tree";
1290  aggregation_tree_getter_func_name += "_aggregation_tree";
1291 
1292  // get the aggregate tree of the current partition from a window context
1293  auto aggregation_trees_ptr = CodegenUtil::createPtrWithHoistedMemoryAddr(
1294  cgen_state_.get(),
1295  &code_generator,
1296  co,
1297  aggregation_trees_lv,
1298  ppi64_type,
1300  .front();
1301  auto target_aggregation_tree_lv = cgen_state_->emitCall(
1302  aggregation_tree_getter_func_name, {aggregation_trees_ptr, partition_index_lv});
1303 
1304  // a depth of segment tree
1305  const auto tree_depth_buf = cgen_state_->llInt(
1306  reinterpret_cast<int64_t>(window_func_context->getAggregateTreeDepth()));
1307  const auto tree_depth_buf_ptr = CodegenUtil::createPtrWithHoistedMemoryAddr(
1308  cgen_state_.get(),
1309  &code_generator,
1310  co,
1311  tree_depth_buf,
1312  pi64_type,
1314  .front();
1315  const auto current_partition_tree_depth_buf_ptr = cgen_state_->ir_builder_.CreateGEP(
1316  get_int_type(64, cgen_state_->context_), tree_depth_buf_ptr, partition_index_lv);
1317  const auto current_partition_tree_depth_lv = cgen_state_->ir_builder_.CreateLoad(
1318  current_partition_tree_depth_buf_ptr->getType()->getPointerElementType(),
1319  current_partition_tree_depth_buf_ptr);
1320 
1321  // a fanout of the current partition's segment tree
1322  const auto aggregation_tree_fanout_lv = cgen_state_->llInt(
1323  static_cast<int64_t>(window_func_context->getAggregateTreeFanout()));
1324 
1325  // agg_type
1326  const auto agg_type_lv =
1327  cgen_state_->llInt(static_cast<int32_t>(window_func->getKind()));
1328 
1329  // send a query to the aggregate tree with the frame range:
1330  // `frame_start_bound_lv` ~ `frame_end_bound_lv`
1331  auto res_lv =
1332  cgen_state_->emitCall(aggregation_tree_search_func_name,
1333  {target_aggregation_tree_lv,
1334  frame_start_bound_lv,
1335  frame_end_bound_lv,
1336  current_partition_tree_depth_lv,
1337  aggregation_tree_fanout_lv,
1338  cgen_state_->llBool(agg_expr_ti.is_decimal()),
1339  cgen_state_->llInt((int64_t)agg_expr_ti.get_scale()),
1340  invalid_val_lv,
1341  null_val_lv,
1342  agg_type_lv});
1343 
1344  // handling returned null value if exists
1345  std::string null_handler_func_name{"handle_null_val_"};
1346  std::vector<llvm::Value*> null_handler_args{res_lv, null_val_lv};
1347 
1348  // determine null_handling function's name
1349  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1350  // average aggregate function returns a value as a double
1351  // (and our search* function also returns a double)
1352  if (agg_expr_ti.is_fp()) {
1353  // fp type: double null value
1354  null_handler_func_name += "double_double";
1355  } else {
1356  // non-fp type: int64_t null type
1357  null_handler_func_name += "double_int64_t";
1358  }
1359  } else if (agg_expr_ti.is_fp()) {
1360  // fp type: double null value
1361  null_handler_func_name += "double_double";
1362  } else {
1363  // non-fp type: int64_t null type
1364  null_handler_func_name += "int64_t_int64_t";
1365  }
1366  null_handler_func_name += "_window_framing_agg";
1367 
1368  // prepare null_val
1369  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1370  if (agg_expr_ti.is_fp()) {
1371  null_handler_args.push_back(cgen_state_->llFp((double)0));
1372  } else {
1373  null_handler_args.push_back(cgen_state_->llInt((int64_t)0));
1374  }
1375  } else if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1376  null_handler_args.push_back(cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE)));
1377  } else {
1378  null_handler_args.push_back(cgen_state_->castToTypeIn(window_func_null_val, 64));
1379  }
1380  res_lv = cgen_state_->emitCall(null_handler_func_name, null_handler_args);
1381 
1382  // when AGG_TYPE is double, we get a double type return value we expect an integer
1383  // type value for the count aggregation
1384  if (window_func->getKind() == SqlWindowFunctionKind::COUNT && agg_expr_ti.is_fp()) {
1385  return cgen_state_->ir_builder_.CreateFPToSI(
1386  res_lv, get_int_type(64, cgen_state_->context_));
1387  } else if (window_func->getKind() != SqlWindowFunctionKind::COUNT &&
1388  agg_expr_ti.is_date_in_days()) {
1389  // we need to decode the "encoded" date column value
1390  auto date_null_val = get_null_value_by_size(cgen_state_.get(), agg_expr_ti);
1391  if (date_null_val->getType()->getScalarSizeInBits() != 32) {
1392  date_null_val = cgen_state_->castToTypeIn(date_null_val, 32);
1393  }
1394  return cgen_state_->emitCall("fixed_width_date_decode",
1395  {res_lv, date_null_val, null_val_lv});
1396  }
1397  return res_lv;
1398  } else {
1399  auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
1400  Analyzer::Expr* arg_target_expr;
1401  std::vector<llvm::Value*> agg_func_args{aggregate_state};
1402  auto modified_window_func_null_val = window_func_null_val;
1403  if (args.empty() ||
1404  (window_func->getKind() == SqlWindowFunctionKind::COUNT &&
1405  dynamic_cast<Analyzer::Constant*>(args.front().get()) != nullptr)) {
1406  // a count aggregation without an expression: COUNT(1) or COUNT(*)
1407  agg_func_args.push_back(cgen_state_->llInt(int64_t(1)));
1408  } else {
1409  // we use #base_agg_func_name##_skip_val agg function
1410  // i.e.,int64_t agg_sum_skip_val(int64_t* agg, int64_t val, int64_t skip_val)
1411  arg_target_expr = args.front().get();
1412  const auto arg_lvs = code_generator.codegen(arg_target_expr, true, co);
1413  CHECK_EQ(arg_lvs.size(), size_t(1));
1414  // handling current row's value
1415  auto crt_val = arg_lvs.front();
1416  if ((window_func->getKind() == SqlWindowFunctionKind::SUM ||
1417  window_func->getKind() == SqlWindowFunctionKind::SUM_IF) &&
1418  !window_func_ti.is_fp()) {
1419  crt_val = code_generator.codegenCastBetweenIntTypes(
1420  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
1421  }
1422  agg_func_args.push_back(window_func_ti.get_type() == kFLOAT
1423  ? crt_val
1424  : cgen_state_->castToTypeIn(crt_val, 64));
1425  // handle null value and conditional value for conditional aggregates if necessary
1426  llvm::Value* cond_lv{nullptr};
1427  if (window_function_conditional_aggregate(window_func->getKind())) {
1428  switch (window_func->getKind()) {
1430  // COUNT_IF has a single condition expr which is always bool type
1431  modified_window_func_null_val = cgen_state_->castToTypeIn(
1432  cgen_state_->inlineNull(SQLTypeInfo(kTINYINT)), 64);
1433  break;
1435  // FP type input col uses its own null value depending on the type
1436  // otherwise (integer type input col), we use 8-byte type
1437  if (args.front()->get_type_info().is_integer()) {
1438  agg_func_args[1] = cgen_state_->castToTypeIn(agg_func_args[1], 64);
1439  // keep the null value but casting its type to 8-byte
1440  modified_window_func_null_val =
1441  cgen_state_->castToTypeIn(window_func_null_val, 64);
1442  }
1443  auto cond_expr_lv = code_generator.codegen(args[1].get(), true, co).front();
1444  cond_lv =
1446  }
1447  default:
1448  break;
1449  }
1450  }
1451  agg_name += "_skip_val";
1452  agg_func_args.push_back(modified_window_func_null_val);
1453  if (cond_lv) {
1454  agg_func_args.push_back(cond_lv);
1455  }
1456  }
1457  cgen_state_->emitCall(agg_name, agg_func_args);
1458  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1460  &code_generator, co, agg_func_args[1], window_func_null_val);
1461  }
1462  return codegenAggregateWindowState(&code_generator, co, aggregate_state);
1463  }
1464 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
Definition: sqltypes.h:76
bool window_function_conditional_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:78
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
llvm::Value * current_partition_start_offset_lv
Definition: sqltypes.h:80
void codegenWindowAvgEpilogue(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *crt_val, llvm::Value *window_func_null_val)
llvm::Value * codegenConditionalAggregateCondValSelector(llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *current_row_pos_lv)
#define CHECK(condition)
Definition: Logger.h:291
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co) const
llvm::Value * get_null_value_by_size(CgenState *cgen_state, SQLTypeInfo col_ti)
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
llvm::Value * codegenAggregateWindowState(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  aggregate_state 
)
private

Definition at line 339 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, COUNT, COUNT_IF, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

341  {
343  const auto window_func_context =
345  const auto window_func = window_func_context->getWindowFunction();
346  const auto window_func_ti = get_adjusted_window_type_info(window_func);
347  const auto window_func_null_val =
348  window_func_ti.is_fp()
349  ? cgen_state_->inlineFpNull(window_func_ti)
350  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
351  llvm::Value* window_func_init_val;
352  const auto window_func_kind = window_func_context->getWindowFunction()->getKind();
353  if (window_func_kind == SqlWindowFunctionKind::COUNT ||
354  window_func_kind == SqlWindowFunctionKind::COUNT_IF) {
355  switch (window_func_ti.get_type()) {
356  case kFLOAT: {
357  window_func_init_val = cgen_state_->llFp(float(0));
358  break;
359  }
360  case kDOUBLE: {
361  window_func_init_val = cgen_state_->llFp(double(0));
362  break;
363  }
364  default: {
365  window_func_init_val = cgen_state_->llInt(int64_t(0));
366  break;
367  }
368  }
369  } else {
370  window_func_init_val = window_func_null_val;
371  }
372  const auto pi32_type =
373  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
374  switch (window_func_ti.get_type()) {
375  case kDOUBLE: {
376  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
377  break;
378  }
379  case kFLOAT: {
380  aggregate_state =
381  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
382  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
383  break;
384  }
385  default: {
386  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
387  break;
388  }
389  }
390 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowNavigationFunctionOnFrame ( const CompilationOptions co)
private

Definition at line 392 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowFrameBoundFuncArgs::current_partition_start_offset_lv, FIRST_VALUE_IN_FRAME, FORWARD_FILL, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size_with_encoding(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kENCODING_DATE_IN_DAYS, kSecsPerDay, LAG_IN_FRAME, LAST_VALUE_IN_FRAME, LEAD_IN_FRAME, NTH_VALUE_IN_FRAME, WindowFunctionContext::NUM_EXECUTION_DEVICES, and UNREACHABLE.

393  {
395  const auto window_func_context =
397  const auto window_func = window_func_context->getWindowFunction();
398  const auto window_func_kind = window_func->getKind();
399  const auto& args = window_func->getArgs();
400  CHECK(args.size() >= 1 && args.size() <= 3);
401  CodeGenerator code_generator(this);
402 
403  const auto target_col_ti = args.front()->get_type_info();
404  const auto target_col_size = target_col_ti.get_size();
405  const auto target_col_type_name =
406  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
407  const auto target_col_logical_type_name = get_col_type_name_by_size(
408  window_func->get_type_info().get_size(), window_func->get_type_info().is_fp());
409 
410  // when target_column is fixed encoded, we store the actual column value by
411  // considering it, but our resultset analyzer only considers the type without encoding
412  // scheme so we handle them separately
413  auto logical_null_val_lv =
414  get_null_value_by_size(cgen_state_.get(), window_func->get_type_info());
415  auto target_col_null_val_lv =
417  if (window_func_context->elementCount() == 0) {
418  // we do not need to generate a code for an empty input table
419  return target_col_null_val_lv;
420  }
421 
422  auto current_row_pos_lv = code_generator.posArg(nullptr);
423  auto partition_index_lv = codegenCurrentPartitionIndex(
424  window_func_context, &code_generator, co, current_row_pos_lv);
425 
426  // load window function input expression; target_column
427  size_t target_col_size_in_byte = target_col_size * 8;
428  llvm::Type* col_buf_ptr_type =
429  target_col_ti.is_fp()
430  ? get_fp_type(target_col_size_in_byte, cgen_state_->context_)
431  : get_int_type(target_col_size_in_byte, cgen_state_->context_);
432  auto col_buf_type = llvm::PointerType::get(col_buf_ptr_type, 0);
433  auto target_col_buf_ptr_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
434  window_func_context->getColumnBufferForWindowFunctionExpressions().front()));
435  auto target_col_buf_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
436  cgen_state_.get(),
437  &code_generator,
438  co,
439  target_col_buf_ptr_lv,
440  col_buf_type,
442  .front();
443 
444  // prepare various buffer ptrs related to the window partition
445  auto partition_buf_ptrs = codegenLoadPartitionBuffers(
446  window_func_context, &code_generator, co, partition_index_lv);
447 
448  // null value of the ordering column
449  const auto order_key_buf_ti =
450  window_func_context->getOrderKeyColumnBufferTypes().front();
451  auto const ordering_spec = window_func->getCollation().front();
452  auto order_key_col_null_val_lv =
453  get_null_value_by_size_with_encoding(cgen_state_.get(), order_key_buf_ti);
454 
455  // load ordering column
456  auto [order_col_type_name, order_key_buf_ptr_lv] =
457  codegenLoadOrderKeyBufPtr(window_func_context, &code_generator, co);
458 
459  // null range
460  auto [null_start_pos_lv, null_end_pos_lv] =
461  codegenFrameNullRange(window_func_context, &code_generator, co, partition_index_lv);
462 
463  // compute a row index of the current row w.r.t the window frame it belongs to
464  std::string row_idx_on_frame_func = "compute_";
465  row_idx_on_frame_func += order_col_type_name;
466  row_idx_on_frame_func += ordering_spec.is_desc ? "_greater_equal" : "_less_equal";
467  row_idx_on_frame_func += "_current_row_idx_in_frame";
468  auto int64_t_one_val_lv = cgen_state_->llInt((int64_t)1);
469  auto nulls_first_lv = cgen_state_->llBool(ordering_spec.nulls_first);
470  auto cur_row_idx_in_frame_lv =
471  cgen_state_->emitCall(row_idx_on_frame_func,
472  {partition_buf_ptrs.num_elem_current_partition_lv,
473  current_row_pos_lv,
474  order_key_buf_ptr_lv,
475  partition_buf_ptrs.target_partition_rowid_ptr_lv,
476  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
477  order_key_col_null_val_lv,
478  nulls_first_lv,
479  null_start_pos_lv,
480  null_end_pos_lv});
481 
482  if (window_func->isMissingValueFillingFunction()) {
483  // We classify both FORWARD_FILL and BACKWARD_FILL as window frame navigate function
484  // b/c they need to determine the current row index within a sorted partition
485  // (as we did for window frame navigation functions) to compute the correct and
486  // consistent resultset Otherwise, the query result may differ per execution due to
487  // missing table ordering Now we know the current row's index in the sorted
488  // partition (cur_row_idx_in_frame_lv), so we can return by calling the runtime
489  // function with the index we computed
490  std::string func_name = "fill_" + target_col_type_name + "_missing_value";
491 
492  llvm::Value* forward_fill_lv =
493  cgen_state_->llBool(window_func_kind == SqlWindowFunctionKind::FORWARD_FILL);
494  return cgen_state_->emitCall(func_name,
495  {cur_row_idx_in_frame_lv,
496  target_col_null_val_lv,
497  target_col_buf_lv,
498  partition_buf_ptrs.num_elem_current_partition_lv,
499  partition_buf_ptrs.target_partition_rowid_ptr_lv,
500  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
501  forward_fill_lv});
502  }
503 
504  // compute frame bound for the current row
505  auto [frame_start_bound_expr_lv, frame_end_bound_expr_lv] =
506  codegenFrameBoundRange(window_func, code_generator, co);
507 
508  // compute frame bound for the current row
509  auto const int64_t_zero_val_lv = cgen_state_->llInt((int64_t)0);
511  frame_start_bound_expr_lv,
512  frame_end_bound_expr_lv,
513  window_func->hasRangeModeFraming() ? current_row_pos_lv : cur_row_idx_in_frame_lv,
514  nullptr,
515  window_func->hasRangeModeFraming()
516  ? int64_t_zero_val_lv
517  : partition_buf_ptrs.current_partition_start_offset_lv,
518  int64_t_zero_val_lv,
519  int64_t_one_val_lv,
520  partition_buf_ptrs.num_elem_current_partition_lv,
521  order_key_buf_ptr_lv,
522  "",
523  partition_buf_ptrs.target_partition_rowid_ptr_lv,
524  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
525  nulls_first_lv,
526  null_start_pos_lv,
527  null_end_pos_lv};
528  auto [frame_start_bound_lv, frame_end_bound_lv] =
529  codegenWindowFrameBounds(window_func_context,
530  window_func->getFrameStartBound(),
531  window_func->getFrameEndBound(),
532  order_key_col_null_val_lv,
534  code_generator);
535 
536  // compute the index of the current row in frame it belongs to
537  llvm::Value* modified_cur_row_idx_in_frame_lv{nullptr};
538  llvm::Value* offset_lv{nullptr};
539  switch (window_func_kind) {
541  offset_lv = cgen_state_->castToTypeIn(
542  code_generator.codegen(args[1].get(), true, co)[0], 64);
543  modified_cur_row_idx_in_frame_lv =
544  cgen_state_->ir_builder_.CreateSub(cur_row_idx_in_frame_lv, offset_lv);
545  break;
547  offset_lv = cgen_state_->castToTypeIn(
548  code_generator.codegen(args[1].get(), true, co)[0], 64);
549  modified_cur_row_idx_in_frame_lv =
550  cgen_state_->ir_builder_.CreateAdd(cur_row_idx_in_frame_lv, offset_lv);
551  break;
553  modified_cur_row_idx_in_frame_lv = frame_start_bound_lv;
554  break;
556  modified_cur_row_idx_in_frame_lv = frame_end_bound_lv;
557  break;
559  offset_lv = cgen_state_->castToTypeIn(
560  code_generator.codegen(args[1].get(), true, co)[0], 64);
561  auto candidate_offset_lv =
562  cgen_state_->ir_builder_.CreateAdd(frame_start_bound_lv, offset_lv);
563  auto out_of_frame_bound_lv =
564  cgen_state_->ir_builder_.CreateICmpSGT(candidate_offset_lv, frame_end_bound_lv);
565  modified_cur_row_idx_in_frame_lv = cgen_state_->ir_builder_.CreateSelect(
566  out_of_frame_bound_lv, cgen_state_->llInt((int64_t)-1), candidate_offset_lv);
567  break;
568  }
569  default:
570  UNREACHABLE() << "Unsupported window function to navigate a window frame.";
571  }
572  CHECK(modified_cur_row_idx_in_frame_lv);
573 
574  // get the target column value in the frame w.r.t the offset
575  std::string target_func_name = "get_";
576  target_func_name += target_col_type_name + "_value_";
577  target_func_name += target_col_logical_type_name + "_type_";
578  target_func_name += "in_frame";
579  auto res_lv =
580  cgen_state_->emitCall(target_func_name,
581  {modified_cur_row_idx_in_frame_lv,
582  frame_start_bound_lv,
583  frame_end_bound_lv,
584  target_col_buf_lv,
585  partition_buf_ptrs.target_partition_rowid_ptr_lv,
586  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
587  logical_null_val_lv,
588  target_col_null_val_lv});
589  if (target_col_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
590  res_lv = cgen_state_->emitCall(
591  "encode_date",
592  {res_lv, logical_null_val_lv, cgen_state_->llInt((int64_t)kSecsPerDay)});
593  }
594  CHECK(res_lv);
595  return res_lv;
596 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
static constexpr int64_t kSecsPerDay
#define UNREACHABLE()
Definition: Logger.h:338
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
llvm::Value * current_partition_start_offset_lv
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *current_row_pos_lv)
#define CHECK(condition)
Definition: Logger.h:291
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co) const
llvm::Value * get_null_value_by_size(CgenState *cgen_state, SQLTypeInfo col_ti)
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
llvm::Value * get_null_value_by_size_with_encoding(CgenState *cgen_state, SQLTypeInfo col_ti)

+ Here is the call graph for this function:

std::pair< llvm::BasicBlock *, llvm::Value * > Executor::codegenWindowResetStateControlFlow ( CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 299 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, CodegenUtil::createPtrWithHoistedMemoryAddr(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), WindowFunctionContext::NUM_EXECUTION_DEVICES, CodeGenerator::posArg(), and CodeGenerator::toBool().

301  {
303  const auto window_func_context =
305  auto aggregate_state = aggregateWindowStatePtr(code_generator, co);
306  const auto bitset = cgen_state_->llInt(
307  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
308  const auto bitset_lv =
310  cgen_state_.get(),
311  code_generator,
312  co,
313  bitset,
314  llvm::PointerType::get(get_int_type(8, cgen_state_->context_), 0),
316  .front();
317  const auto min_val = cgen_state_->llInt(int64_t(0));
318  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
319  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
320  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
321  const auto reset_state =
322  code_generator->toBool(cgen_state_->emitCall("bit_is_set",
323  {bitset_lv,
324  code_generator->posArg(nullptr),
325  min_val,
326  max_val,
327  null_val,
328  null_bool_val}));
329  const auto reset_state_true_bb = llvm::BasicBlock::Create(
330  cgen_state_->context_, "reset_state.true", cgen_state_->current_func_);
331  const auto reset_state_false_bb = llvm::BasicBlock::Create(
332  cgen_state_->context_, "reset_state.false", cgen_state_->current_func_);
333  cgen_state_->ir_builder_.CreateCondBr(
334  reset_state, reset_state_true_bb, reset_state_false_bb);
335  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
336  return std::make_pair(reset_state_false_bb, aggregate_state);
337 }
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:590
llvm::Value * aggregateWindowStatePtr(CodeGenerator *code_generator, const CompilationOptions &co)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
llvm::Value * toBool(llvm::Value *)
Definition: LogicalIR.cpp:344
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 2715 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), collectAllDeviceShardedTopResults(), DEBUG_TIMER, SharedKernelContext::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, heavyai::NonGroupedAggregate, reduceMultiDeviceResults(), reduceSpeculativeTopN(), GroupByAndAggregate::shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and use_speculative_top_n().

Referenced by executeWorkUnitImpl().

2720  {
2721  auto timer = DEBUG_TIMER(__func__);
2722  auto& result_per_device = shared_context.getFragmentResults();
2723  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
2726  ra_exe_unit.target_exprs, query_mem_desc, device_type);
2727  }
2728  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
2729  try {
2730  return reduceSpeculativeTopN(
2731  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2732  } catch (const std::bad_alloc&) {
2733  throw SpeculativeTopNFailed("Failed during multi-device reduction.");
2734  }
2735  }
2736  const auto shard_count =
2737  device_type == ExecutorDeviceType::GPU
2739  : 0;
2740 
2741  if (shard_count && !result_per_device.empty()) {
2742  return collectAllDeviceShardedTopResults(shared_context, ra_exe_unit, device_type);
2743  }
2744  return reduceMultiDeviceResults(
2745  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2746 }
std::vector< Analyzer::Expr * > target_exprs
NonGroupedAggregate
Definition: enums.h:58
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type) const
Definition: Execute.cpp:2830
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1724
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1589
QueryDescriptionType getQueryDescriptionType() const
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2673
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define DEBUG_TIMER(name)
Definition: Logger.h:412

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  device_type 
) const
private

Definition at line 2830 of file Execute.cpp.

References blockSize(), CHECK, CHECK_EQ, CHECK_LE, SharedKernelContext::getFragmentResults(), gridSize(), SortInfo::limit, SortInfo::offset, SortInfo::order_entries, anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), run_benchmark_import::result, and RelAlgExecutionUnit::sort_info.

Referenced by collectAllDeviceResults().

2833  {
2834  auto& result_per_device = shared_context.getFragmentResults();
2835  const auto first_result_set = result_per_device.front().first;
2836  CHECK(first_result_set);
2837  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
2838  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
2839  const auto top_n =
2840  ra_exe_unit.sort_info.limit.value_or(0) + ra_exe_unit.sort_info.offset;
2841  top_query_mem_desc.setEntryCount(0);
2842  for (auto& result : result_per_device) {
2843  const auto result_set = result.first;
2844  CHECK(result_set);
2845  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n, device_type, this);
2846  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
2847  top_query_mem_desc.setEntryCount(new_entry_cnt);
2848  }
2849  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
2850  first_result_set->getDeviceType(),
2851  top_query_mem_desc,
2852  first_result_set->getRowSetMemOwner(),
2853  blockSize(),
2854  gridSize());
2855  auto top_storage = top_result_set->allocateStorage();
2856  size_t top_output_row_idx{0};
2857  for (auto& result : result_per_device) {
2858  const auto result_set = result.first;
2859  CHECK(result_set);
2860  const auto& top_permutation = result_set->getPermutationBuffer();
2861  CHECK_LE(top_permutation.size(), top_n);
2862  if (top_query_mem_desc.didOutputColumnar()) {
2863  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
2864  result_set->getQueryMemDesc(),
2865  top_storage,
2866  top_output_row_idx,
2867  top_query_mem_desc,
2868  top_permutation);
2869  } else {
2870  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
2871  top_storage,
2872  top_output_row_idx,
2873  top_query_mem_desc,
2874  top_permutation);
2875  }
2876  }
2877  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
2878  return top_result_set;
2879 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2809
std::optional< size_t > limit
std::list< Analyzer::OrderEntry > order_entries
#define CHECK_LE(x, y)
Definition: Logger.h:304
unsigned gridSize() const
Definition: Execute.cpp:4352
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2759
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define CHECK(condition)
Definition: Logger.h:291
unsigned blockSize() const
Definition: Execute.cpp:4366

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context = {} 
)
private

Definition at line 3347 of file NativeCodegen.cpp.

3351  {
3353 
3354  // Switch the code generation into a separate filter function if enabled.
3355  // Note that accesses to function arguments are still codegenned from the
3356  // row function's arguments, then later automatically forwarded and
3357  // remapped into filter function arguments by redeclareFilterFunction().
3358  cgen_state_->row_func_bb_ = cgen_state_->ir_builder_.GetInsertBlock();
3359  llvm::Value* loop_done{nullptr};
3360  std::unique_ptr<Executor::FetchCacheAnchor> fetch_cache_anchor;
3361  if (cgen_state_->filter_func_) {
3362  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3363  auto row_func_entry_bb = &cgen_state_->row_func_->getEntryBlock();
3364  cgen_state_->ir_builder_.SetInsertPoint(row_func_entry_bb,
3365  row_func_entry_bb->begin());
3366  loop_done = cgen_state_->ir_builder_.CreateAlloca(
3367  get_int_type(1, cgen_state_->context_), nullptr, "loop_done");
3368  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3369  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(true), loop_done);
3370  }
3371  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->filter_func_bb_);
3372  cgen_state_->current_func_ = cgen_state_->filter_func_;
3373  fetch_cache_anchor = std::make_unique<Executor::FetchCacheAnchor>(cgen_state_.get());
3374  }
3375 
3376  // generate the code for the filter
3377  std::vector<Analyzer::Expr*> primary_quals;
3378  std::vector<Analyzer::Expr*> deferred_quals;
3379  bool short_circuited = CodeGenerator::prioritizeQuals(
3380  ra_exe_unit, primary_quals, deferred_quals, plan_state_->hoisted_filters_);
3381  if (short_circuited) {
3382  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
3383  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
3384  << " quals";
3385  }
3386  llvm::Value* filter_lv = cgen_state_->llBool(true);
3387  CodeGenerator code_generator(this);
3388  for (auto expr : primary_quals) {
3389  // Generate the filter for primary quals
3390  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
3391  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
3392  }
3393  CHECK(filter_lv->getType()->isIntegerTy(1));
3394  llvm::BasicBlock* sc_false{nullptr};
3395  if (!deferred_quals.empty()) {
3396  auto sc_true = llvm::BasicBlock::Create(
3397  cgen_state_->context_, "sc_true", cgen_state_->current_func_);
3398  sc_false = llvm::BasicBlock::Create(
3399  cgen_state_->context_, "sc_false", cgen_state_->current_func_);
3400  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
3401  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
3402  if (ra_exe_unit.join_quals.empty()) {
3403  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
3404  }
3405  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
3406  filter_lv = cgen_state_->llBool(true);
3407  }
3408  for (auto expr : deferred_quals) {
3409  filter_lv = cgen_state_->ir_builder_.CreateAnd(
3410  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
3411  }
3412 
3413  CHECK(filter_lv->getType()->isIntegerTy(1));
3414  auto ret = group_by_and_aggregate.codegen(
3415  filter_lv, sc_false, query_mem_desc, co, gpu_smem_context);
3416 
3417  // Switch the code generation back to the row function if a filter
3418  // function was enabled.
3419  if (cgen_state_->filter_func_) {
3420  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3421  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(false), loop_done);
3422  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3423  }
3424 
3425  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3426  cgen_state_->current_func_ = cgen_state_->row_func_;
3427  cgen_state_->filter_func_call_ =
3428  cgen_state_->ir_builder_.CreateCall(cgen_state_->filter_func_, {});
3429 
3430  // Create real filter function declaration after placeholder call
3431  // is emitted.
3433 
3434  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3435  auto loop_done_true = llvm::BasicBlock::Create(
3436  cgen_state_->context_, "loop_done_true", cgen_state_->row_func_);
3437  auto loop_done_false = llvm::BasicBlock::Create(
3438  cgen_state_->context_, "loop_done_false", cgen_state_->row_func_);
3439  auto loop_done_flag = cgen_state_->ir_builder_.CreateLoad(
3440  loop_done->getType()->getPointerElementType(), loop_done);
3441  cgen_state_->ir_builder_.CreateCondBr(
3442  loop_done_flag, loop_done_true, loop_done_false);
3443  cgen_state_->ir_builder_.SetInsertPoint(loop_done_true);
3444  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3445  cgen_state_->ir_builder_.SetInsertPoint(loop_done_false);
3446  } else {
3447  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3448  }
3449  }
3450  return ret;
3451 }
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals, const PlanState::HoistedFiltersSet &hoisted_quals)
Definition: LogicalIR.cpp:158
#define CHECK(condition)
Definition: Logger.h:291
void redeclareFilterFunction()
Definition: IRCodegen.cpp:1087
#define VLOG(n)
Definition: Logger.h:388
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const PlanState::DeletedColumnsMap deleted_cols_map,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 2816 of file NativeCodegen.cpp.

2828  {
2829  auto timer = DEBUG_TIMER(__func__);
2830 
2832  if (!cuda_mgr) {
2833  throw QueryMustRunOnCpu();
2834  }
2835  }
2836 
2837 #ifndef NDEBUG
2838  static std::uint64_t counter = 0;
2839  ++counter;
2840  VLOG(1) << "CODEGEN #" << counter << ":";
2841  LOG(IR) << "CODEGEN #" << counter << ":";
2842  LOG(PTX) << "CODEGEN #" << counter << ":";
2843  LOG(ASM) << "CODEGEN #" << counter << ":";
2844 #endif
2845 
2846  // cgenstate_manager uses RAII pattern to manage the live time of
2847  // CgenState instances.
2848  Executor::CgenStateManager cgenstate_manager(*this,
2849  allow_lazy_fetch,
2850  query_infos,
2851  deleted_cols_map,
2852  &ra_exe_unit); // locks compilation_mutex
2853  addTransientStringLiterals(ra_exe_unit, row_set_mem_owner);
2854 
2855  GroupByAndAggregate group_by_and_aggregate(
2856  this,
2857  co.device_type,
2858  ra_exe_unit,
2859  query_infos,
2860  row_set_mem_owner,
2861  has_cardinality_estimation ? std::optional<int64_t>(max_groups_buffer_entry_guess)
2862  : std::nullopt);
2863  auto query_mem_desc =
2864  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
2865  max_groups_buffer_entry_guess,
2866  crt_min_byte_width,
2867  render_info,
2869 
2870  if (query_mem_desc->getQueryDescriptionType() ==
2872  !has_cardinality_estimation && (!render_info || !render_info->isInSitu()) &&
2873  !eo.just_explain) {
2874  const auto col_range_info = group_by_and_aggregate.getColRangeInfo();
2875  throw CardinalityEstimationRequired(col_range_info.max - col_range_info.min);
2876  }
2877 
2878  const bool output_columnar = query_mem_desc->didOutputColumnar();
2879  const bool gpu_shared_mem_optimization =
2881  ra_exe_unit,
2882  cuda_mgr,
2883  co.device_type,
2884  cuda_mgr ? this->blockSize() : 1,
2885  cuda_mgr ? this->numBlocksPerMP() : 1);
2886  if (gpu_shared_mem_optimization) {
2887  // disable interleaved bins optimization on the GPU
2888  query_mem_desc->setHasInterleavedBinsOnGpu(false);
2889  LOG(DEBUG1) << "GPU shared memory is used for the " +
2890  query_mem_desc->queryDescTypeToString() + " query(" +
2891  std::to_string(get_shared_memory_size(gpu_shared_mem_optimization,
2892  query_mem_desc.get())) +
2893  " out of " + std::to_string(g_gpu_smem_threshold) + " bytes).";
2894  }
2895 
2896  const GpuSharedMemoryContext gpu_smem_context(
2897  get_shared_memory_size(gpu_shared_mem_optimization, query_mem_desc.get()));
2898 
2900  const size_t num_count_distinct_descs =
2901  query_mem_desc->getCountDistinctDescriptorsSize();
2902  for (size_t i = 0; i < num_count_distinct_descs; i++) {
2903  const auto& count_distinct_descriptor =
2904  query_mem_desc->getCountDistinctDescriptor(i);
2905  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::UnorderedSet ||
2906  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
2907  !co.hoist_literals)) {
2908  throw QueryMustRunOnCpu();
2909  }
2910  }
2911 
2912  // we currently do not support varlen projection based on baseline groupby when
2913  // 1) target table is multi-fragmented and 2) multiple gpus are involved for query
2914  // processing in this case, we punt the query to cpu to avoid server crash
2915  for (const auto expr : ra_exe_unit.target_exprs) {
2916  if (auto gby_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
2917  bool has_multiple_gpus = cuda_mgr ? cuda_mgr->getDeviceCount() > 1 : false;
2918  if (gby_expr->get_aggtype() == SQLAgg::kSAMPLE && has_multiple_gpus &&
2919  !g_leaf_count) {
2920  std::set<const Analyzer::ColumnVar*,
2921  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2923  gby_expr->collect_column_var(colvar_set, true);
2924  for (const auto cv : colvar_set) {
2925  if (cv->get_type_info().is_varlen()) {
2926  const auto tbl_key = cv->getTableKey();
2927  std::for_each(query_infos.begin(),
2928  query_infos.end(),
2929  [&tbl_key](const InputTableInfo& input_table_info) {
2930  if (input_table_info.table_key == tbl_key &&
2931  input_table_info.info.fragments.size() > 1) {
2932  throw QueryMustRunOnCpu();
2933  }
2934  });
2935  }
2936  }
2937  }
2938  }
2939  }
2940  }
2941 
2942  // Read the module template and target either CPU or GPU
2943  // by binding the stream position functions to the right implementation:
2944  // stride access for GPU, contiguous for CPU
2945  CHECK(cgen_state_->module_ == nullptr);
2946  cgen_state_->set_module_shallow_copy(get_rt_module(), /*always_clone=*/true);
2947 
2948  auto is_gpu = co.device_type == ExecutorDeviceType::GPU;
2949  if (is_gpu) {
2950  cgen_state_->module_->setDataLayout(get_gpu_data_layout());
2951  cgen_state_->module_->setTargetTriple(get_gpu_target_triple_string());
2952  }
2953  if (has_udf_module(/*is_gpu=*/is_gpu)) {
2955  get_udf_module(/*is_gpu=*/is_gpu), *cgen_state_->module_, cgen_state_.get());
2956  }
2957  if (has_rt_udf_module(/*is_gpu=*/is_gpu)) {
2959  get_rt_udf_module(/*is_gpu=*/is_gpu), *cgen_state_->module_, cgen_state_.get());
2960  }
2961 
2963 
2964  auto agg_fnames =
2965  get_agg_fnames(ra_exe_unit.target_exprs, !ra_exe_unit.groupby_exprs.empty());
2966 
2967  const auto agg_slot_count = ra_exe_unit.estimator ? size_t(1) : agg_fnames.size();
2968 
2969  const bool is_group_by{query_mem_desc->isGroupBy()};
2970  auto [query_func, row_func_call] = is_group_by
2972  co.hoist_literals,
2973  *query_mem_desc,
2974  co.device_type,
2975  ra_exe_unit.scan_limit,
2976  gpu_smem_context)
2977  : query_template(cgen_state_->module_,
2978  agg_slot_count,
2979  co.hoist_literals,
2980  !!ra_exe_unit.estimator,
2981  gpu_smem_context);
2982  bind_pos_placeholders("pos_start", true, query_func, cgen_state_->module_);
2983  bind_pos_placeholders("group_buff_idx", false, query_func, cgen_state_->module_);
2984  bind_pos_placeholders("pos_step", false, query_func, cgen_state_->module_);
2985 
2986  cgen_state_->query_func_ = query_func;
2987  cgen_state_->row_func_call_ = row_func_call;
2988  cgen_state_->query_func_entry_ir_builder_.SetInsertPoint(
2989  &query_func->getEntryBlock().front());
2990 
2991  // Generate the function signature and column head fetches s.t.
2992  // double indirection isn't needed in the inner loop
2993  auto& fetch_bb = query_func->front();
2994  llvm::IRBuilder<> fetch_ir_builder(&fetch_bb);
2995  fetch_ir_builder.SetInsertPoint(&*fetch_bb.begin());
2996  auto col_heads = generate_column_heads_load(ra_exe_unit.input_col_descs.size(),
2997  get_arg_by_name(query_func, "byte_stream"),
2998  fetch_ir_builder,
2999  cgen_state_->context_);
3000  CHECK_EQ(ra_exe_unit.input_col_descs.size(), col_heads.size());
3001 
3002  cgen_state_->row_func_ = create_row_function(ra_exe_unit.input_col_descs.size(),
3003  is_group_by ? 0 : agg_slot_count,
3004  co.hoist_literals,
3005  cgen_state_->module_,
3006  cgen_state_->context_);
3007  CHECK(cgen_state_->row_func_);
3008  cgen_state_->row_func_bb_ =
3009  llvm::BasicBlock::Create(cgen_state_->context_, "entry", cgen_state_->row_func_);
3010 
3012  auto filter_func_ft =
3013  llvm::FunctionType::get(get_int_type(32, cgen_state_->context_), {}, false);
3014  cgen_state_->filter_func_ = llvm::Function::Create(filter_func_ft,
3015  llvm::Function::ExternalLinkage,
3016  "filter_func",
3017  cgen_state_->module_);
3018  CHECK(cgen_state_->filter_func_);
3019  cgen_state_->filter_func_bb_ = llvm::BasicBlock::Create(
3020  cgen_state_->context_, "entry", cgen_state_->filter_func_);
3021  }
3022 
3023  cgen_state_->current_func_ = cgen_state_->row_func_;
3024  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3025 
3026  preloadFragOffsets(ra_exe_unit.input_descs, query_infos);
3027  RelAlgExecutionUnit body_execution_unit = ra_exe_unit;
3028  const auto join_loops =
3029  buildJoinLoops(body_execution_unit, co, eo, query_infos, column_cache);
3030 
3031  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
3032  for (auto& simple_qual : ra_exe_unit.simple_quals) {
3033  plan_state_->addSimpleQual(simple_qual);
3034  }
3035  const auto is_not_deleted_bb = codegenSkipDeletedOuterTableRow(ra_exe_unit, co);
3036  if (is_not_deleted_bb) {
3037  cgen_state_->row_func_bb_ = is_not_deleted_bb;
3038  }
3039  if (!join_loops.empty()) {
3040  codegenJoinLoops(join_loops,
3041  body_execution_unit,
3042  group_by_and_aggregate,
3043  query_func,
3044  cgen_state_->row_func_bb_,
3045  *(query_mem_desc.get()),
3046  co,
3047  eo);
3048  } else {
3049  const bool can_return_error = compileBody(
3050  ra_exe_unit, group_by_and_aggregate, *query_mem_desc, co, gpu_smem_context);
3051  if (can_return_error || cgen_state_->needs_error_check_ || eo.with_dynamic_watchdog ||
3053  createErrorCheckControlFlow(query_func,
3056  join_loops,
3057  co.device_type,
3058  group_by_and_aggregate.query_infos_);
3059  }
3060  }
3061  std::vector<llvm::Value*> hoisted_literals;
3062 
3063  if (co.hoist_literals) {
3064  VLOG(1) << "number of hoisted literals: "
3065  << cgen_state_->query_func_literal_loads_.size()
3066  << " / literal buffer usage: " << cgen_state_->getLiteralBufferUsage(0)
3067  << " bytes";
3068  }
3069 
3070  if (co.hoist_literals && !cgen_state_->query_func_literal_loads_.empty()) {
3071  // we have some hoisted literals...
3072  hoisted_literals = inlineHoistedLiterals();
3073  }
3074 
3075  // replace the row func placeholder call with the call to the actual row func
3076  std::vector<llvm::Value*> row_func_args;
3077  for (size_t i = 0; i < cgen_state_->row_func_call_->getNumOperands() - 1; ++i) {
3078  row_func_args.push_back(cgen_state_->row_func_call_->getArgOperand(i));
3079  }
3080  row_func_args.insert(row_func_args.end(), col_heads.begin(), col_heads.end());
3081  row_func_args.push_back(get_arg_by_name(query_func, "join_hash_tables"));
3082  row_func_args.push_back(get_arg_by_name(query_func, "row_func_mgr"));
3083  // push hoisted literals arguments, if any
3084  row_func_args.insert(
3085  row_func_args.end(), hoisted_literals.begin(), hoisted_literals.end());
3086  llvm::ReplaceInstWithInst(
3087  cgen_state_->row_func_call_,
3088  llvm::CallInst::Create(cgen_state_->row_func_, row_func_args, ""));
3089 
3090  // replace the filter func placeholder call with the call to the actual filter func
3091  if (cgen_state_->filter_func_) {
3092  std::vector<llvm::Value*> filter_func_args;
3093  for (auto arg_it = cgen_state_->filter_func_args_.begin();
3094  arg_it != cgen_state_->filter_func_args_.end();
3095  ++arg_it) {
3096  filter_func_args.push_back(arg_it->first);
3097  }
3098  llvm::ReplaceInstWithInst(
3099  cgen_state_->filter_func_call_,
3100  llvm::CallInst::Create(cgen_state_->filter_func_, filter_func_args, ""));
3101  }
3102 
3103  // Aggregate
3104  plan_state_->init_agg_vals_ =
3105  init_agg_val_vec(ra_exe_unit.target_exprs, ra_exe_unit.quals, *query_mem_desc);
3106 
3107  /*
3108  * If we have decided to use GPU shared memory (decision is not made here), then
3109  * we generate proper code for extra components that it needs (buffer initialization and
3110  * gpu reduction from shared memory to global memory). We then replace these functions
3111  * into the already compiled query_func (replacing two placeholders, write_back_nop and
3112  * init_smem_nop). The rest of the code should be as before (row_func, etc.).
3113  */
3114  if (gpu_smem_context.isSharedMemoryUsed()) {
3115  if (query_mem_desc->getQueryDescriptionType() ==
3117  GpuSharedMemCodeBuilder gpu_smem_code(
3118  cgen_state_->module_,
3119  cgen_state_->context_,
3120  *query_mem_desc,
3122  plan_state_->init_agg_vals_,
3123  executor_id_);
3124  gpu_smem_code.codegen();
3125  gpu_smem_code.injectFunctionsInto(query_func);
3126 
3127  // helper functions are used for caching purposes later
3128  cgen_state_->helper_functions_.push_back(gpu_smem_code.getReductionFunction());
3129  cgen_state_->helper_functions_.push_back(gpu_smem_code.getInitFunction());
3130  LOG(IR) << gpu_smem_code.toString();
3131  }
3132  }
3133 
3134  auto multifrag_query_func = cgen_state_->module_->getFunction(
3135  "multifrag_query" + std::string(co.hoist_literals ? "_hoisted_literals" : ""));
3136  CHECK(multifrag_query_func);
3137 
3139  insertErrorCodeChecker(multifrag_query_func,
3140  get_index_by_name(query_func, "error_code"),
3141  co.hoist_literals,
3143  }
3144 
3145  bind_query(query_func,
3146  "query_stub" + std::string(co.hoist_literals ? "_hoisted_literals" : ""),
3147  multifrag_query_func,
3148  cgen_state_->module_);
3149 
3150  std::vector<llvm::Function*> root_funcs{query_func, cgen_state_->row_func_};
3151  if (cgen_state_->filter_func_) {
3152  root_funcs.push_back(cgen_state_->filter_func_);
3153  }
3154  auto live_funcs = CodeGenerator::markDeadRuntimeFuncs(
3155  *cgen_state_->module_, root_funcs, {multifrag_query_func});
3156 
3157  // Always inline the row function and the filter function.
3158  // We don't want register spills in the inner loops.
3159  // LLVM seems to correctly free up alloca instructions
3160  // in these functions even when they are inlined.
3162  if (cgen_state_->filter_func_) {
3164  }
3165 
3166 #ifndef NDEBUG
3167  // Add helpful metadata to the LLVM IR for debugging.
3169 #endif
3170 
3171  auto const device_str = co.device_type == ExecutorDeviceType::CPU ? "CPU:\n" : "GPU:\n";
3172  // Serialize the important LLVM IR functions to text for SQL EXPLAIN.
3173  std::string llvm_ir =
3174  serialize_llvm_object(multifrag_query_func) + serialize_llvm_object(query_func) +
3175  serialize_llvm_object(cgen_state_->row_func_) +
3176  (cgen_state_->filter_func_ ? serialize_llvm_object(cgen_state_->filter_func_) : "");
3177  VLOG(3) << "Unoptimized IR for the " << device_str << "\n" << llvm_ir << "\nEnd of IR";
3179 #ifdef WITH_JIT_DEBUG
3180  throw std::runtime_error(
3181  "Explain optimized not available when JIT runtime debug symbols are enabled");
3182 #else
3183  // Note that we don't run the NVVM reflect pass here. Use LOG(IR) to get the
3184  // optimized IR after NVVM reflect
3185  llvm::legacy::PassManager pass_manager;
3186  optimize_ir(query_func,
3187  cgen_state_->module_,
3188  pass_manager,
3189  live_funcs,
3190  gpu_smem_context.isSharedMemoryUsed(),
3191  co);
3192 #endif // WITH_JIT_DEBUG
3193  llvm_ir =
3194  serialize_llvm_object(multifrag_query_func) + serialize_llvm_object(query_func) +
3195  serialize_llvm_object(cgen_state_->row_func_) +
3196  (cgen_state_->filter_func_ ? serialize_llvm_object(cgen_state_->filter_func_)
3197  : "");
3198 #ifndef NDEBUG
3199  llvm_ir += serialize_llvm_metadata_footnotes(query_func, cgen_state_.get());
3200 #endif
3201  }
3202  LOG(IR) << "\n\n" << query_mem_desc->toString() << "\n";
3203  LOG(IR) << "IR for the " << device_str;
3204 #ifdef NDEBUG
3205  LOG(IR) << serialize_llvm_object(query_func)
3206  << serialize_llvm_object(cgen_state_->row_func_)
3207  << (cgen_state_->filter_func_ ? serialize_llvm_object(cgen_state_->filter_func_)
3208  : "")
3209  << "\nEnd of IR";
3210 #else
3211  LOG(IR) << serialize_llvm_object(cgen_state_->module_) << "\nEnd of IR";
3212 #endif
3213  // Insert calls to "register_buffer_with_executor_rsm" for allocations
3214  // in runtime functions (i.e. from RBC) without it
3216 
3217  // Run some basic validation checks on the LLVM IR before code is generated below.
3218  verify_function_ir(cgen_state_->row_func_);
3219  if (cgen_state_->filter_func_) {
3220  verify_function_ir(cgen_state_->filter_func_);
3221  }
3222 
3223  // Generate final native code from the LLVM IR.
3224  return std::make_tuple(
3227  ? optimizeAndCodegenCPU(query_func, multifrag_query_func, live_funcs, co)
3228  : optimizeAndCodegenGPU(query_func,
3229  multifrag_query_func,
3230  live_funcs,
3231  is_group_by || ra_exe_unit.estimator,
3232  cuda_mgr,
3233  gpu_smem_context.isSharedMemoryUsed(),
3234  co),
3235  cgen_state_->getLiterals(),
3236  output_columnar,
3237  llvm_ir,
3238  std::move(gpu_smem_context)},
3239  std::move(query_mem_desc));
3240 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
GroupByPerfectHash
Definition: enums.h:58
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:1204
const std::unique_ptr< llvm::Module > & get_udf_module(bool is_gpu=false) const
Definition: Execute.h:535
bool is_gpu_shared_mem_supported(const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned cuda_blocksize, const unsigned num_blocks_per_mp)
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
Definition: Analyzer.h:215
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
Definition: Analyzer.h:222
void optimize_ir(llvm::Function *query_func, llvm::Module *llvm_module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co)
#define LOG(tag)
Definition: Logger.h:285
void AutoTrackBuffersInRuntimeIR()
void mark_function_always_inline(llvm::Function *func)
llvm::StringRef get_gpu_data_layout()
const std::unique_ptr< llvm::Module > & get_rt_udf_module(bool is_gpu=false) const
Definition: Execute.h:539
std::vector< InputDescriptor > input_descs
std::string serialize_llvm_metadata_footnotes(llvm::Function *query_func, CgenState *cgen_state)
std::tuple< llvm::Function *, llvm::CallInst * > query_template(llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query, const GpuSharedMemoryContext &gpu_smem_context)
void insertErrorCodeChecker(llvm::Function *query_func, unsigned const error_code_idx, bool hoist_literals, bool allow_runtime_query_interrupt)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
unsigned numBlocksPerMP() const
Definition: Execute.cpp:4361
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::tuple< llvm::Function *, llvm::CallInst * > query_group_by_template(llvm::Module *mod, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit, const GpuSharedMemoryContext &gpu_smem_context)
void addTransientStringLiterals(const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
Definition: Execute.cpp:2523
std::vector< std::string > get_agg_fnames(const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
std::string to_string(char const *&&v)
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:4288
const ExecutorId executor_id_
Definition: Execute.h:1476
llvm::StringRef get_gpu_target_triple_string()
void verify_function_ir(const llvm::Function *func)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
static std::unordered_set< llvm::Function * > markDeadRuntimeFuncs(llvm::Module &module, const std::vector< llvm::Function * > &roots, const std::vector< llvm::Function * > &leaves)
ExecutorExplainType explain_type
unsigned get_index_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:187
std::shared_ptr< CompilationContext > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
static void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::unique_ptr< llvm::Module > & get_rt_module() const
Definition: Execute.h:532
this
Definition: Execute.cpp:285
#define AUTOMATIC_IR_METADATA_DONE()
llvm::Function * create_row_function(const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *llvm_module, llvm::LLVMContext &context)
ExecutorDeviceType device_type
void bind_pos_placeholders(const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *llvm_module)
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
void bind_query(llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *llvm_module)
std::string serialize_llvm_object(const T *llvm_obj)
std::shared_ptr< CompilationContext > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
std::vector< llvm::Value * > generate_column_heads_load(const int num_columns, llvm::Value *byte_stream_arg, llvm::IRBuilder<> &ir_builder, llvm::LLVMContext &ctx)
GroupByBaselineHash
Definition: enums.h:58
bool has_udf_module(bool is_gpu=false) const
Definition: Execute.h:555
bool g_enable_filter_function
Definition: Execute.cpp:91
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
bool has_rt_udf_module(bool is_gpu=false) const
Definition: Execute.h:559
std::vector< llvm::Value * > inlineHoistedLiterals()
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:610
unsigned blockSize() const
Definition: Execute.cpp:4366
size_t g_leaf_count
Definition: ParserNode.cpp:79
std::vector< int64_t > init_agg_val_vec(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc)
#define VLOG(n)
Definition: Logger.h:388
size_t get_shared_memory_size(const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
size_t g_gpu_smem_threshold
Definition: Execute.cpp:142
AggregatedColRange Executor::computeColRangesCache ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 4894 of file Execute.cpp.

References CHECK, Catalog_Namespace::get_metadata_for_column(), getLeafColumnRange(), getTableInfo(), AggregatedColRange::setColRange(), and ExpressionRange::typeSupportsRange().

Referenced by setupCaching().

4895  {
4896  AggregatedColRange agg_col_range_cache;
4897  std::unordered_set<shared::TableKey> phys_table_keys;
4898  for (const auto& phys_input : phys_inputs) {
4899  phys_table_keys.emplace(phys_input.db_id, phys_input.table_id);
4900  }
4901  std::vector<InputTableInfo> query_infos;
4902  for (const auto& table_key : phys_table_keys) {
4903  query_infos.emplace_back(InputTableInfo{table_key, getTableInfo(table_key)});
4904  }
4905  for (const auto& phys_input : phys_inputs) {
4906  auto db_id = phys_input.db_id;
4907  auto table_id = phys_input.table_id;
4908  auto column_id = phys_input.col_id;
4909  const auto cd =
4910  Catalog_Namespace::get_metadata_for_column({db_id, table_id, column_id});
4911  CHECK(cd);
4912  if (ExpressionRange::typeSupportsRange(cd->columnType)) {
4913  const auto col_var = std::make_unique<Analyzer::ColumnVar>(
4914  cd->columnType, shared::ColumnKey{db_id, table_id, column_id}, 0);
4915  const auto col_range = getLeafColumnRange(col_var.get(), query_infos, this, false);
4916  agg_col_range_cache.setColRange(phys_input, col_range);
4917  }
4918  }
4919  return agg_col_range_cache;
4920 }
const ColumnDescriptor * get_metadata_for_column(const ::shared::ColumnKey &column_key)
Fragmenter_Namespace::TableInfo getTableInfo(const shared::TableKey &table_key) const
Definition: Execute.cpp:736
ExpressionRange getLeafColumnRange(const Analyzer::ColumnVar *col_expr, const std::vector< InputTableInfo > &query_infos, const Executor *executor, const bool is_outer_join_proj)
#define CHECK(condition)
Definition: Logger.h:291
void setColRange(const PhysicalInput &, const ExpressionRange &)
static bool typeSupportsRange(const SQLTypeInfo &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryGenerations Executor::computeStringDictionaryGenerations ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 4922 of file Execute.cpp.

References CHECK, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), kENCODING_DICT, anonymous_namespace{Execute.cpp}::prepare_string_dictionaries(), and StringDictionaryGenerations::setGeneration().

Referenced by setupCaching().

4923  {
4924  StringDictionaryGenerations string_dictionary_generations;
4925  // Foreign tables may have not populated dictionaries for encoded columns. If this is
4926  // the case then we need to populate them here to make sure that the generations are set
4927  // correctly.
4928  prepare_string_dictionaries(phys_inputs);
4929  for (const auto& phys_input : phys_inputs) {
4930  const auto catalog =
4932  CHECK(catalog);
4933  const auto cd = catalog->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
4934  CHECK(cd);
4935  const auto& col_ti =
4936  cd->columnType.is_array() ? cd->columnType.get_elem_type() : cd->columnType;
4937  if (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) {
4938  const auto& dict_key = col_ti.getStringDictKey();
4939  const auto dd = catalog->getMetadataForDict(dict_key.dict_id);
4940  CHECK(dd && dd->stringDict);
4941  string_dictionary_generations.setGeneration(dict_key,
4942  dd->stringDict->storageEntryCount());
4943  }
4944  }
4945  return string_dictionary_generations;
4946 }
void setGeneration(const shared::StringDictKey &dict_key, const uint64_t generation)
void prepare_string_dictionaries(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:221
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TableGenerations Executor::computeTableGenerations ( const std::unordered_set< shared::TableKey > &  phys_table_keys)
private

Definition at line 4948 of file Execute.cpp.

References getTableInfo(), and TableGenerations::setGeneration().

Referenced by setupCaching().

4949  {
4950  TableGenerations table_generations;
4951  for (const auto& table_key : phys_table_keys) {
4952  const auto table_info = getTableInfo(table_key);
4953  table_generations.setGeneration(
4954  table_key,
4955  TableGeneration{static_cast<int64_t>(table_info.getPhysicalNumTuples()), 0});
4956  }
4957  return table_generations;
4958 }
void setGeneration(const shared::TableKey &table_key, const TableGeneration &generation)
Fragmenter_Namespace::TableInfo getTableInfo(const shared::TableKey &table_key) const
Definition: Execute.cpp:736

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::containsLeftDeepOuterJoin ( ) const
inline

Definition at line 614 of file Execute.h.

References cgen_state_.

614  {
615  return cgen_state_->contains_left_deep_outer_join_;
616  }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
void Executor::createErrorCheckControlFlow ( llvm::Function *  query_func,
bool  run_with_dynamic_watchdog,
bool  run_with_allowing_runtime_interrupt,
const std::vector< JoinLoop > &  join_loops,
ExecutorDeviceType  device_type,
const std::vector< InputTableInfo > &  input_table_infos 
)
private

Definition at line 2029 of file NativeCodegen.cpp.

2035  {
2037 
2038  // check whether the row processing was successful; currently, it can
2039  // fail by running out of group by buffer slots
2040 
2041  if (run_with_dynamic_watchdog && run_with_allowing_runtime_interrupt) {
2042  // when both dynamic watchdog and runtime interrupt turns on
2043  // we use dynamic watchdog
2044  run_with_allowing_runtime_interrupt = false;
2045  }
2046 
2047  {
2048  // disable injecting query interrupt checker if the session info is invalid
2051  if (current_query_session_.empty()) {
2052  run_with_allowing_runtime_interrupt = false;
2053  }
2054  }
2055 
2056  llvm::Value* row_count = nullptr;
2057  if ((run_with_dynamic_watchdog || run_with_allowing_runtime_interrupt) &&
2058  device_type == ExecutorDeviceType::GPU) {
2059  row_count =
2060  find_variable_in_basic_block<llvm::LoadInst>(query_func, ".entry", "row_count");
2061  }
2062 
2063  bool done_splitting = false;
2064  for (auto bb_it = query_func->begin(); bb_it != query_func->end() && !done_splitting;
2065  ++bb_it) {
2066  llvm::Value* pos = nullptr;
2067  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); ++inst_it) {
2068  if ((run_with_dynamic_watchdog || run_with_allowing_runtime_interrupt) &&
2069  llvm::isa<llvm::PHINode>(*inst_it)) {
2070  if (inst_it->getName() == "pos") {
2071  pos = &*inst_it;
2072  }
2073  continue;
2074  }
2075  if (!llvm::isa<llvm::CallInst>(*inst_it)) {
2076  continue;
2077  }
2078  auto& row_func_call = llvm::cast<llvm::CallInst>(*inst_it);
2079  auto const row_func_name = CodegenUtil::getCalledFunctionName(row_func_call);
2080  if (row_func_name && *row_func_name == "row_process") {
2081  auto next_inst_it = inst_it;
2082  ++next_inst_it;
2083  auto new_bb = bb_it->splitBasicBlock(next_inst_it);
2084  auto& br_instr = bb_it->back();
2085  llvm::IRBuilder<> ir_builder(&br_instr);
2086  llvm::Value* err_lv = &*inst_it;
2087  llvm::Value* err_lv_returned_from_row_func = nullptr;
2088  if (run_with_dynamic_watchdog) {
2089  CHECK(pos);
2090  llvm::Value* call_watchdog_lv = nullptr;
2091  if (device_type == ExecutorDeviceType::GPU) {
2092  // In order to make sure all threads within a block see the same barrier,
2093  // only those blocks whose none of their threads have experienced the critical
2094  // edge will go through the dynamic watchdog computation
2095  CHECK(row_count);
2096  auto crit_edge_rem =
2097  (blockSize() & (blockSize() - 1))
2098  ? ir_builder.CreateSRem(
2099  row_count,
2100  cgen_state_->llInt(static_cast<int64_t>(blockSize())))
2101  : ir_builder.CreateAnd(
2102  row_count,
2103  cgen_state_->llInt(static_cast<int64_t>(blockSize() - 1)));
2104  auto crit_edge_threshold = ir_builder.CreateSub(row_count, crit_edge_rem);
2105  crit_edge_threshold->setName("crit_edge_threshold");
2106 
2107  // only those threads where pos < crit_edge_threshold go through dynamic
2108  // watchdog call
2109  call_watchdog_lv =
2110  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, pos, crit_edge_threshold);
2111  } else {
2112  // CPU path: run watchdog for every 64th row
2113  auto dw_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
2114  call_watchdog_lv = ir_builder.CreateICmp(
2115  llvm::ICmpInst::ICMP_EQ, dw_predicate, cgen_state_->llInt(int64_t(0LL)));
2116  }
2117  CHECK(call_watchdog_lv);
2118  auto error_check_bb = bb_it->splitBasicBlock(
2119  llvm::BasicBlock::iterator(br_instr), ".error_check");
2120  auto& watchdog_br_instr = bb_it->back();
2121 
2122  auto watchdog_check_bb = llvm::BasicBlock::Create(
2123  cgen_state_->context_, ".watchdog_check", query_func, error_check_bb);
2124  llvm::IRBuilder<> watchdog_ir_builder(watchdog_check_bb);
2125  auto detected_timeout = watchdog_ir_builder.CreateCall(
2126  cgen_state_->module_->getFunction("dynamic_watchdog"), {});
2127  auto timeout_err_lv = watchdog_ir_builder.CreateSelect(
2128  detected_timeout,
2129  cgen_state_->llInt(int32_t(ErrorCode::OUT_OF_TIME)),
2130  err_lv);
2131  watchdog_ir_builder.CreateBr(error_check_bb);
2132 
2133  llvm::ReplaceInstWithInst(
2134  &watchdog_br_instr,
2135  llvm::BranchInst::Create(
2136  watchdog_check_bb, error_check_bb, call_watchdog_lv));
2137  ir_builder.SetInsertPoint(&br_instr);
2138  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
2139 
2140  unified_err_lv->addIncoming(timeout_err_lv, watchdog_check_bb);
2141  unified_err_lv->addIncoming(err_lv, &*bb_it);
2142  err_lv = unified_err_lv;
2143  } else if (run_with_allowing_runtime_interrupt) {
2144  CHECK(pos);
2145  llvm::Value* call_check_interrupt_lv{nullptr};
2146  llvm::Value* interrupt_err_lv{nullptr};
2147  llvm::BasicBlock* error_check_bb{nullptr};
2148  llvm::BasicBlock* interrupt_check_bb{nullptr};
2149  llvm::Instruction* check_interrupt_br_instr{nullptr};
2150 
2151  auto has_loop_join = std::any_of(
2152  join_loops.begin(), join_loops.end(), [](const JoinLoop& join_loop) {
2153  return join_loop.isNestedLoopJoin();
2154  });
2155  auto codegen_interrupt_checker = [&]() {
2156  error_check_bb = bb_it->splitBasicBlock(llvm::BasicBlock::iterator(br_instr),
2157  ".error_check");
2158  check_interrupt_br_instr = &bb_it->back();
2159 
2160  interrupt_check_bb = llvm::BasicBlock::Create(
2161  cgen_state_->context_, ".interrupt_check", query_func, error_check_bb);
2162  llvm::IRBuilder<> interrupt_checker_ir_builder(interrupt_check_bb);
2163  auto detected_interrupt = interrupt_checker_ir_builder.CreateCall(
2164  cgen_state_->module_->getFunction("check_interrupt"), {});
2165  interrupt_err_lv = interrupt_checker_ir_builder.CreateSelect(
2166  detected_interrupt,
2167  cgen_state_->llInt(int32_t(ErrorCode::INTERRUPTED)),
2168  err_lv);
2169  interrupt_checker_ir_builder.CreateBr(error_check_bb);
2170  };
2171  if (has_loop_join) {
2172  codegen_interrupt_checker();
2173  CHECK(interrupt_check_bb);
2174  CHECK(check_interrupt_br_instr);
2175  llvm::ReplaceInstWithInst(check_interrupt_br_instr,
2176  llvm::BranchInst::Create(interrupt_check_bb));
2177  ir_builder.SetInsertPoint(&br_instr);
2178  err_lv = interrupt_err_lv;
2179  } else {
2180  if (device_type == ExecutorDeviceType::GPU) {
2181  // approximate how many times the %pos variable
2182  // is increased --> the number of iteration
2183  // here we calculate the # bit shift by considering grid/block/fragment
2184  // sizes since if we use the fixed one (i.e., per 64-th increment) some CUDA
2185  // threads cannot enter the interrupt checking block depending on the
2186  // fragment size --> a thread may not take care of 64 threads if an outer
2187  // table is not sufficiently large, and so cannot be interrupted
2188  int32_t num_shift_by_gridDim = shared::getExpOfTwo(gridSize());
2189  int32_t num_shift_by_blockDim = shared::getExpOfTwo(blockSize());
2190  int64_t total_num_shift = num_shift_by_gridDim + num_shift_by_blockDim;
2191  uint64_t interrupt_checking_freq = 32;
2192  auto freq_control_knob = g_running_query_interrupt_freq;
2193  CHECK_GT(freq_control_knob, 0);
2194  CHECK_LE(freq_control_knob, 1.0);
2195  if (!input_table_infos.empty()) {
2196  const auto& outer_table_info = *input_table_infos.begin();
2197  auto num_outer_table_tuples =
2198  outer_table_info.info.getFragmentNumTuplesUpperBound();
2199  if (num_outer_table_tuples > 0) {
2200  // gridSize * blockSize --> pos_step (idx of the next row per thread)
2201  // we additionally multiply two to pos_step since the number of
2202  // dispatched blocks are double of the gridSize
2203  // # tuples (of fragment) / pos_step --> maximum # increment (K)
2204  // also we multiply 1 / freq_control_knob to K to control the frequency
2205  // So, needs to check the interrupt status more frequently? make K
2206  // smaller
2207  auto max_inc = uint64_t(
2208  floor(num_outer_table_tuples / (gridSize() * blockSize() * 2)));
2209  if (max_inc < 2) {
2210  // too small `max_inc`, so this correction is necessary to make
2211  // `interrupt_checking_freq` be valid (i.e., larger than zero)
2212  max_inc = 2;
2213  }
2214  auto calibrated_inc =
2215  uint64_t(floor(max_inc * (1 - freq_control_knob)));
2216  interrupt_checking_freq =
2217  uint64_t(pow(2, shared::getExpOfTwo(calibrated_inc)));
2218  // add the coverage when interrupt_checking_freq > K
2219  // if so, some threads still cannot be branched to the interrupt checker
2220  // so we manually use smaller but close to the max_inc as freq
2221  if (interrupt_checking_freq > max_inc) {
2222  interrupt_checking_freq = max_inc / 2;
2223  }
2224  if (interrupt_checking_freq < 8) {
2225  // such small freq incurs too frequent interrupt status checking,
2226  // so we fixup to the minimum freq value at some reasonable degree
2227  interrupt_checking_freq = 8;
2228  }
2229  }
2230  }
2231  VLOG(1) << "Set the running query interrupt checking frequency: "
2232  << interrupt_checking_freq;
2233  // check the interrupt flag for every interrupt_checking_freq-th iteration
2234  llvm::Value* pos_shifted_per_iteration =
2235  ir_builder.CreateLShr(pos, cgen_state_->llInt(total_num_shift));
2236  auto interrupt_predicate = ir_builder.CreateAnd(pos_shifted_per_iteration,
2237  interrupt_checking_freq);
2238  call_check_interrupt_lv =
2239  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
2240  interrupt_predicate,
2241  cgen_state_->llInt(int64_t(0LL)));
2242  } else {
2243  // CPU path: run interrupt checker for every 64th row
2244  auto interrupt_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
2245  call_check_interrupt_lv =
2246  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
2247  interrupt_predicate,
2248  cgen_state_->llInt(int64_t(0LL)));
2249  }
2250  codegen_interrupt_checker();
2251  CHECK(call_check_interrupt_lv);
2252  CHECK(interrupt_err_lv);
2253  CHECK(interrupt_check_bb);
2254  CHECK(error_check_bb);
2255  CHECK(check_interrupt_br_instr);
2256  llvm::ReplaceInstWithInst(
2257  check_interrupt_br_instr,
2258  llvm::BranchInst::Create(
2259  interrupt_check_bb, error_check_bb, call_check_interrupt_lv));
2260  ir_builder.SetInsertPoint(&br_instr);
2261  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
2262 
2263  unified_err_lv->addIncoming(interrupt_err_lv, interrupt_check_bb);
2264  unified_err_lv->addIncoming(err_lv, &*bb_it);
2265  err_lv = unified_err_lv;
2266  }
2267  }
2268  if (!err_lv_returned_from_row_func) {
2269  err_lv_returned_from_row_func = err_lv;
2270  }
2271  if (device_type == ExecutorDeviceType::GPU && g_enable_dynamic_watchdog) {
2272  // let kernel execution finish as expected, regardless of the observed error,
2273  // unless it is from the dynamic watchdog where all threads within that block
2274  // return together.
2275  err_lv =
2276  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
2277  err_lv,
2278  cgen_state_->llInt(int32_t(ErrorCode::OUT_OF_TIME)));
2279  } else {
2280  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_NE,
2281  err_lv,
2282  cgen_state_->llInt(static_cast<int32_t>(0)));
2283  }
2284  auto error_bb = llvm::BasicBlock::Create(
2285  cgen_state_->context_, ".error_exit", query_func, new_bb);
2286  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
2287  llvm::CallInst::Create(
2288  cgen_state_->module_->getFunction("record_error_code"),
2289  std::vector<llvm::Value*>{err_lv_returned_from_row_func, error_code_arg},
2290  "",
2291  error_bb);
2292  llvm::ReturnInst::Create(cgen_state_->context_, error_bb);
2293  llvm::ReplaceInstWithInst(&br_instr,
2294  llvm::BranchInst::Create(error_bb, new_bb, err_lv));
2295  done_splitting = true;
2296  break;
2297  }
2298  }
2299  }
2300  CHECK(done_splitting);
2301 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
double g_running_query_interrupt_freq
Definition: Execute.cpp:141
QuerySessionId current_query_session_
Definition: Execute.h:1576
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
#define CHECK_GT(x, y)
Definition: Logger.h:305
std::shared_lock< T > shared_lock
unsigned getExpOfTwo(unsigned n)
Definition: MathUtils.cpp:23
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LE(x, y)
Definition: Logger.h:304
unsigned gridSize() const
Definition: Execute.cpp:4352
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
#define CHECK(condition)
Definition: Logger.h:291
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
unsigned blockSize() const
Definition: Execute.cpp:4366
#define VLOG(n)
Definition: Logger.h:388
std::vector< std::unique_ptr< ExecutionKernel > > Executor::createKernels ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
ColumnFetcher column_fetcher,
const std::vector< InputTableInfo > &  table_infos,
const ExecutionOptions eo,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const size_t  context_count,
const QueryCompilationDescriptor query_comp_desc,
const QueryMemoryDescriptor query_mem_desc,
RenderInfo render_info,
std::unordered_set< int > &  available_gpus,
int &  available_cpus 
)
private

Determines execution dispatch mode and required fragments for a given query step, then creates kernels to execute the query and returns them for launch.

Definition at line 2907 of file Execute.cpp.

References ExecutionOptions::allow_multifrag, CHECK, CHECK_GE, CHECK_GT, anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), data_mgr_, deviceCount(), g_inner_join_fragment_skipping, getColLazyFetchInfo(), QueryCompilationDescriptor::getDeviceType(), QueryMemoryDescriptor::getEntryCount(), SharedKernelContext::getFragOffsets(), Data_Namespace::DataMgr::getMemoryInfo(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, ExecutionOptions::gpu_input_mem_limit_percent, Data_Namespace::GPU_LEVEL, anonymous_namespace{Execute.cpp}::has_lazy_fetched_columns(), logger::INFO, RelAlgExecutionUnit::input_descs, KernelPerFragment, LOG, MultifragmentKernel, ExecutionOptions::outer_fragment_indices, plan_state_, heavyai::Projection, query_mem_desc, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::toString(), RelAlgExecutionUnit::use_bump_allocator, VLOG, and ExecutionOptions::with_watchdog.

Referenced by executeWorkUnitImpl().

2920  {
2921  std::vector<std::unique_ptr<ExecutionKernel>> execution_kernels;
2922 
2923  QueryFragmentDescriptor fragment_descriptor(
2924  ra_exe_unit,
2925  table_infos,
2926  query_comp_desc.getDeviceType() == ExecutorDeviceType::GPU
2928  : std::vector<Data_Namespace::MemoryInfo>{},
2931  CHECK(!ra_exe_unit.input_descs.empty());
2932 
2933  const auto device_type = query_comp_desc.getDeviceType();
2934  const bool uses_lazy_fetch =
2935  plan_state_->allow_lazy_fetch_ &&
2937  const bool use_multifrag_kernel = (device_type == ExecutorDeviceType::GPU) &&
2938  eo.allow_multifrag && (!uses_lazy_fetch || is_agg);
2939  const auto device_count = deviceCount(device_type);
2940  CHECK_GT(device_count, 0);
2941 
2942  fragment_descriptor.buildFragmentKernelMap(ra_exe_unit,
2943  shared_context.getFragOffsets(),
2944  device_count,
2945  device_type,
2946  use_multifrag_kernel,
2948  this);
2949  if (eo.with_watchdog && fragment_descriptor.shouldCheckWorkUnitWatchdog()) {
2950  checkWorkUnitWatchdog(ra_exe_unit, table_infos, device_type, device_count);
2951  }
2952 
2953  if (use_multifrag_kernel) {
2954  VLOG(1) << "Creating multifrag execution kernels";
2955  VLOG(1) << query_mem_desc.toString();
2956 
2957  // NB: We should never be on this path when the query is retried because of running
2958  // out of group by slots; also, for scan only queries on CPU we want the
2959  // high-granularity, fragment by fragment execution instead. For scan only queries on
2960  // GPU, we want the multifrag kernel path to save the overhead of allocating an output
2961  // buffer per fragment.
2962  auto multifrag_kernel_dispatch = [&ra_exe_unit,
2963  &execution_kernels,
2964  &column_fetcher,
2965  &eo,
2966  &query_comp_desc,
2967  &query_mem_desc,
2968  render_info](const int device_id,
2969  const FragmentsList& frag_list,
2970  const int64_t rowid_lookup_key) {
2971  execution_kernels.emplace_back(
2972  std::make_unique<ExecutionKernel>(ra_exe_unit,
2974  device_id,
2975  eo,
2976  column_fetcher,
2977  query_comp_desc,
2978  query_mem_desc,
2979  frag_list,
2981  render_info,
2982  rowid_lookup_key));
2983  };
2984  fragment_descriptor.assignFragsToMultiDispatch(multifrag_kernel_dispatch);
2985  } else {
2986  VLOG(1) << "Creating one execution kernel per fragment";
2987  VLOG(1) << query_mem_desc.toString();
2988 
2989  if (!ra_exe_unit.use_bump_allocator && allow_single_frag_table_opt &&
2990  (query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection) &&
2991  table_infos.size() == 1 && table_infos.front().table_key.table_id > 0) {
2992  const auto max_frag_size =
2993  table_infos.front().info.getFragmentNumTuplesUpperBound();
2994  if (max_frag_size < query_mem_desc.getEntryCount()) {
2995  LOG(INFO) << "Lowering scan limit from " << query_mem_desc.getEntryCount()
2996  << " to match max fragment size " << max_frag_size
2997  << " for kernel per fragment execution path.";
2998  throw CompilationRetryNewScanLimit(max_frag_size);
2999  }
3000  }
3001 
3002  size_t frag_list_idx{0};
3003  auto fragment_per_kernel_dispatch = [&ra_exe_unit,
3004  &execution_kernels,
3005  &column_fetcher,
3006  &eo,
3007  &frag_list_idx,
3008  &device_type,
3009  &query_comp_desc,
3010  &query_mem_desc,
3011  render_info](const int device_id,
3012  const FragmentsList& frag_list,
3013  const int64_t rowid_lookup_key) {
3014  if (!frag_list.size()) {
3015  return;
3016  }
3017  CHECK_GE(device_id, 0);
3018 
3019  execution_kernels.emplace_back(
3020  std::make_unique<ExecutionKernel>(ra_exe_unit,
3021  device_type,
3022  device_id,
3023  eo,
3024  column_fetcher,
3025  query_comp_desc,
3026  query_mem_desc,
3027  frag_list,
3029  render_info,
3030  rowid_lookup_key));
3031  ++frag_list_idx;
3032  };
3033 
3034  fragment_descriptor.assignFragsToKernelDispatch(fragment_per_kernel_dispatch,
3035  ra_exe_unit);
3036  }
3037  return execution_kernels;
3038 }
bool is_agg(const Analyzer::Expr *expr)
std::vector< Analyzer::Expr * > target_exprs
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
ExecutorDeviceType getDeviceType() const
void checkWorkUnitWatchdog(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const ExecutorDeviceType device_type, const int device_count)
Definition: Execute.cpp:1847
const std::vector< uint64_t > & getFragOffsets()
std::string toString() const
#define LOG(tag)
Definition: Logger.h:285
std::vector< size_t > outer_fragment_indices
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:1017
std::vector< InputDescriptor > input_descs
#define CHECK_GE(x, y)
Definition: Logger.h:306
Projection
Definition: enums.h:58
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:1322
#define CHECK_GT(x, y)
Definition: Logger.h:305
std::vector< FragmentsPerTable > FragmentsList
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:98
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel) const
Definition: DataMgr.cpp:430
#define CHECK(condition)
Definition: Logger.h:291
double gpu_input_mem_limit_percent
bool has_lazy_fetched_columns(const std::vector< ColumnLazyFetchInfo > &fetched_cols)
Definition: Execute.cpp:2896
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CudaMgr_Namespace::CudaMgr* Executor::cudaMgr ( ) const
inlineprivate

Definition at line 865 of file Execute.h.

References CHECK, data_mgr_, and Data_Namespace::DataMgr::getCudaMgr().

Referenced by deviceCount(), deviceCycles(), isArchPascalOrLater(), numBlocksPerMP(), and warpSize().

865  {
866  CHECK(data_mgr_);
867  auto cuda_mgr = data_mgr_->getCudaMgr();
868  CHECK(cuda_mgr);
869  return cuda_mgr;
870  }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:177
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int Executor::deviceCount ( const ExecutorDeviceType  device_type) const

Definition at line 1322 of file Execute.cpp.

References cudaMgr(), CudaMgr_Namespace::CudaMgr::getDeviceCount(), and GPU.

Referenced by createKernels(), and deviceCountForMemoryLevel().

1322  {
1323  if (device_type == ExecutorDeviceType::GPU) {
1324  return cudaMgr()->getDeviceCount();
1325  } else {
1326  return 1;
1327  }
1328 }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:865
int getDeviceCount() const
Definition: CudaMgr.h:90

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int Executor::deviceCountForMemoryLevel ( const Data_Namespace::MemoryLevel  memory_level) const
private

Definition at line 1330 of file Execute.cpp.

References CPU, deviceCount(), GPU, and Data_Namespace::GPU_LEVEL.

Referenced by buildHashTableForQualifier().

1331  {
1332  return memory_level == GPU_LEVEL ? deviceCount(ExecutorDeviceType::GPU)
1333  : deviceCount(ExecutorDeviceType::CPU);
1334 }
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:1322
ExecutorDeviceType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t Executor::deviceCycles ( int  milliseconds) const
private

Definition at line 4396 of file Execute.cpp.

References cudaMgr(), and CudaMgr_Namespace::CudaMgr::getAllDeviceProperties().

4396  {
4397  const auto& dev_props = cudaMgr()->getAllDeviceProperties();
4398  return static_cast<int64_t>(dev_props.front().clockKhz) * milliseconds;
4399 }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:865
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:134

+ Here is the call graph for this function:

std::string Executor::dumpCache ( ) const

Definition at line 5520 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::asMap(), AggregatedColRange::asMap(), row_set_mem_owner_, and table_generations_.

5520  {
5521  std::stringstream ss;
5522  ss << "colRangeCache: ";
5523  for (auto& [phys_input, exp_range] : agg_col_range_cache_.asMap()) {
5524  ss << "{" << phys_input.col_id << ", " << phys_input.table_id
5525  << "} = " << exp_range.toString() << ", ";
5526  }
5527  ss << "stringDictGenerations: ";
5528  for (auto& [key, val] : row_set_mem_owner_->getStringDictionaryGenerations().asMap()) {
5529  ss << key << " = " << val << ", ";
5530  }
5531  ss << "tableGenerations: ";
5532  for (auto& [key, val] : table_generations_.asMap()) {
5533  ss << key << " = {" << val.tuple_count << ", " << val.start_rowid << "}, ";
5534  }
5535  ss << "\n";
5536  return ss.str();
5537 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1572
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533
TableGenerations table_generations_
Definition: Execute.h:1573
const std::unordered_map< PhysicalInput, ExpressionRange > & asMap() const
const std::unordered_map< shared::TableKey, TableGeneration > & asMap() const

+ Here is the call graph for this function:

void Executor::enableRuntimeQueryInterrupt ( const double  runtime_query_check_freq,
const unsigned  pending_query_check_freq 
) const

Definition at line 5274 of file Execute.cpp.

References g_enable_runtime_query_interrupt, g_pending_query_interrupt_freq, and g_running_query_interrupt_freq.

5276  {
5277  // The only one scenario that we intentionally call this function is
5278  // to allow runtime query interrupt in QueryRunner for test cases.
5279  // Because test machine's default setting does not allow runtime query interrupt,
5280  // so we have to turn it on within test code if necessary.
5282  g_pending_query_interrupt_freq = pending_query_check_freq;
5283  g_running_query_interrupt_freq = runtime_query_check_freq;
5286  }
5287 }
double g_running_query_interrupt_freq
Definition: Execute.cpp:141
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:140
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:137
void Executor::enrollQuerySession ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted_time_str,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_session_status 
)

Definition at line 5091 of file Execute.cpp.

References addToQuerySessionList(), current_query_session_, and executor_session_mutex_.

5096  {
5097  // enroll the query session into the Executor's session map
5099  if (query_session.empty()) {
5100  return;
5101  }
5102 
5103  addToQuerySessionList(query_session,
5104  query_str,
5105  submitted_time_str,
5106  executor_id,
5107  query_session_status,
5108  session_write_lock);
5109 
5110  if (query_session_status == QuerySessionStatus::QueryStatus::RUNNING_QUERY_KERNEL) {
5111  current_query_session_ = query_session;
5112  }
5113 }
QuerySessionId current_query_session_
Definition: Execute.h:1576
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
bool addToQuerySessionList(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5120

+ Here is the call graph for this function:

ResultSetPtr Executor::executeExplain ( const QueryCompilationDescriptor query_comp_desc)
private

Definition at line 2519 of file Execute.cpp.

References QueryCompilationDescriptor::getIR().

Referenced by executeWorkUnitImpl().

2519  {
2520  return std::make_shared<ResultSet>(query_comp_desc.getIR());
2521 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t Executor::executePlanWithGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< size_t >  outer_tab_frag_ids,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const shared::TableKey outer_table_key,
const int64_t  limit,
const uint32_t  start_rowid,
const uint32_t  num_tables,
const bool  allow_runtime_interrupt,
RenderInfo render_info,
const bool  optimize_cuda_block_and_grid_sizes,
const int64_t  rows_to_process = -1 
)
private

Definition at line 4061 of file Execute.cpp.

References anonymous_namespace{Utm.h}::a, blockSize(), CHECK, CHECK_NE, anonymous_namespace{Execute.cpp}::check_rows_less_than_needed(), checkIsQuerySessionInterrupted(), CPU, DEBUG_TIMER, report::error_code(), executor_session_mutex_, logger::FATAL, g_enable_dynamic_watchdog, CompilationResult::generated_code, getCurrentQuerySession(), QueryMemoryDescriptor::getEntryCount(), getJoinHashTablePtrs(), QueryExecutionContext::getRowSet(), GpuSharedMemoryContext::getSharedMemorySize(), CompilationResult::gpu_smem_context, gridSize(), RelAlgExecutionUnit::groupby_exprs, INJECT_TIMER, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, interrupted_, QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), CompilationResult::literal_values, LOG, shared::printContainer(), QueryExecutionContext::query_buffers_, QueryExecutionContext::query_mem_desc_, RenderInfo::render_allocator_map_ptr, RelAlgExecutionUnit::scan_limit, serializeLiterals(), QueryMemoryDescriptor::setEntryCount(), RelAlgExecutionUnit::union_all, RenderInfo::useCudaBuffers(), and VLOG.

4081  {
4082  auto timer = DEBUG_TIMER(__func__);
4084  // TODO: get results via a separate method, but need to do something with literals.
4085  CHECK(!results || !(*results));
4086  if (col_buffers.empty()) {
4087  return 0;
4088  }
4089  CHECK_NE(ra_exe_unit.groupby_exprs.size(), size_t(0));
4090  // TODO(alex):
4091  // 1. Optimize size (make keys more compact).
4092  // 2. Resize on overflow.
4093  // 3. Optimize runtime.
4094  auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
4095  int32_t error_code = 0;
4096  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
4097  if (allow_runtime_interrupt) {
4098  bool isInterrupted = false;
4099  {
4102  const auto query_session = getCurrentQuerySession(session_read_lock);
4103  isInterrupted = checkIsQuerySessionInterrupted(query_session, session_read_lock);
4104  }
4105  if (isInterrupted) {
4106  throw QueryExecutionError(ErrorCode::INTERRUPTED);
4107  }
4108  }
4109  if (g_enable_dynamic_watchdog && interrupted_.load()) {
4110  return int32_t(ErrorCode::INTERRUPTED);
4111  }
4112 
4113  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
4114  if (render_info && render_info->useCudaBuffers()) {
4115  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
4116  }
4117 
4118  VLOG(2) << "bool(ra_exe_unit.union_all)=" << bool(ra_exe_unit.union_all)
4119  << " ra_exe_unit.input_descs="
4120  << shared::printContainer(ra_exe_unit.input_descs)
4121  << " ra_exe_unit.input_col_descs="
4122  << shared::printContainer(ra_exe_unit.input_col_descs)
4123  << " ra_exe_unit.scan_limit=" << ra_exe_unit.scan_limit
4124  << " num_rows=" << shared::printContainer(num_rows)
4125  << " frag_offsets=" << shared::printContainer(frag_offsets)
4126  << " query_exe_context->query_buffers_->num_rows_="
4127  << query_exe_context->query_buffers_->num_rows_
4128  << " query_exe_context->query_mem_desc_.getEntryCount()="
4129  << query_exe_context->query_mem_desc_.getEntryCount()
4130  << " device_id=" << device_id << " outer_table_key=" << outer_table_key
4131  << " scan_limit=" << scan_limit << " start_rowid=" << start_rowid
4132  << " num_tables=" << num_tables;
4133 
4134  RelAlgExecutionUnit ra_exe_unit_copy = ra_exe_unit;
4135  // For UNION ALL, filter out input_descs and input_col_descs that are not associated
4136  // with outer_table_id.
4137  if (ra_exe_unit_copy.union_all) {
4138  // Sort outer_table_id first, then pop the rest off of ra_exe_unit_copy.input_descs.
4139  std::stable_sort(ra_exe_unit_copy.input_descs.begin(),
4140  ra_exe_unit_copy.input_descs.end(),
4141  [outer_table_key](auto const& a, auto const& b) {
4142  return a.getTableKey() == outer_table_key &&
4143  b.getTableKey() != outer_table_key;
4144  });
4145  while (!ra_exe_unit_copy.input_descs.empty() &&
4146  ra_exe_unit_copy.input_descs.back().getTableKey() != outer_table_key) {
4147  ra_exe_unit_copy.input_descs.pop_back();
4148  }
4149  // Filter ra_exe_unit_copy.input_col_descs.
4150  ra_exe_unit_copy.input_col_descs.remove_if(
4151  [outer_table_key](auto const& input_col_desc) {
4152  return input_col_desc->getScanDesc().getTableKey() != outer_table_key;
4153  });
4154  query_exe_context->query_mem_desc_.setEntryCount(ra_exe_unit_copy.scan_limit);
4155  }
4156 
4157  if (device_type == ExecutorDeviceType::CPU) {
4158  const int32_t scan_limit_for_query =
4159  ra_exe_unit_copy.union_all ? ra_exe_unit_copy.scan_limit : scan_limit;
4160  const int32_t max_matched = scan_limit_for_query == 0
4161  ? query_exe_context->query_mem_desc_.getEntryCount()
4162  : scan_limit_for_query;
4163  CpuCompilationContext* cpu_generated_code =
4164  dynamic_cast<CpuCompilationContext*>(compilation_result.generated_code.get());
4165  CHECK(cpu_generated_code);
4166  query_exe_context->launchCpuCode(ra_exe_unit_copy,
4167  cpu_generated_code,
4168  hoist_literals,
4169  hoist_buf,
4170  col_buffers,
4171  num_rows,
4172  frag_offsets,
4173  max_matched,
4174  &error_code,
4175  start_rowid,
4176  num_tables,
4177  join_hash_table_ptrs,
4178  rows_to_process);
4179  } else {
4180  try {
4181  GpuCompilationContext* gpu_generated_code =
4182  dynamic_cast<GpuCompilationContext*>(compilation_result.generated_code.get());
4183  CHECK(gpu_generated_code);
4184  query_exe_context->launchGpuCode(
4185  ra_exe_unit_copy,
4186  gpu_generated_code,
4187  hoist_literals,
4188  hoist_buf,
4189  col_buffers,
4190  num_rows,
4191  frag_offsets,
4192  ra_exe_unit_copy.union_all ? ra_exe_unit_copy.scan_limit : scan_limit,
4193  data_mgr,
4194  blockSize(),
4195  gridSize(),
4196  device_id,
4197  compilation_result.gpu_smem_context.getSharedMemorySize(),
4198  &error_code,
4199  num_tables,
4200  allow_runtime_interrupt,
4201  join_hash_table_ptrs,
4202  render_allocator_map_ptr,
4203  optimize_cuda_block_and_grid_sizes);
4204  } catch (const OutOfMemory&) {
4205  return int32_t(ErrorCode::OUT_OF_GPU_MEM);
4206  } catch (const OutOfRenderMemory&) {
4207  return int32_t(ErrorCode::OUT_OF_RENDER_MEM);
4208  } catch (const StreamingTopNNotSupportedInRenderQuery&) {
4209  return int32_t(ErrorCode::STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY);
4210  } catch (const std::exception& e) {
4211  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
4212  }
4213  }
4214 
4215  if (heavyai::IsAny<ErrorCode::OVERFLOW_OR_UNDERFLOW,
4216  ErrorCode::DIV_BY_ZERO,
4217  ErrorCode::OUT_OF_TIME,
4218  ErrorCode::INTERRUPTED,
4219  ErrorCode::SINGLE_VALUE_FOUND_MULTIPLE_VALUES,
4220  ErrorCode::GEOS,
4221  ErrorCode::WIDTH_BUCKET_INVALID_ARGUMENT,
4222  ErrorCode::BBOX_OVERLAPS_LIMIT_EXCEEDED>::check(error_code)) {
4223  return error_code;
4224  }
4225 
4226  if (results && error_code != int32_t(ErrorCode::OVERFLOW_OR_UNDERFLOW) &&
4227  error_code != int32_t(ErrorCode::DIV_BY_ZERO) && !render_allocator_map_ptr) {
4228  *results = query_exe_context->getRowSet(ra_exe_unit_copy,
4229  query_exe_context->query_mem_desc_);
4230  CHECK(*results);
4231  VLOG(2) << "results->rowCount()=" << (*results)->rowCount();
4232  (*results)->holdLiterals(hoist_buf);
4233  }
4234  if (error_code < 0 && render_allocator_map_ptr) {
4235  auto const adjusted_scan_limit =
4236  ra_exe_unit_copy.union_all ? ra_exe_unit_copy.scan_limit : scan_limit;
4237  // More rows passed the filter than available slots. We don't have a count to check,
4238  // so assume we met the limit if a scan limit is set
4239  if (adjusted_scan_limit != 0) {
4240  return 0;
4241  } else {
4242  return error_code;
4243  }
4244  }
4245  if (results && error_code &&
4246  (!scan_limit || check_rows_less_than_needed(*results, scan_limit))) {
4247  return error_code; // unlucky, not enough results and we ran out of slots
4248  }
4249 
4250  return 0;
4251 }
bool checkIsQuerySessionInterrupted(const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5254
bool useCudaBuffers() const
Definition: RenderInfo.cpp:54
std::vector< int8_t * > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:4253
void setEntryCount(const size_t val)
std::atomic< bool > interrupted_
Definition: Execute.h:1543
GpuSharedMemoryContext gpu_smem_context
const std::optional< bool > union_all
#define LOG(tag)
Definition: Logger.h:285
size_t getSharedMemorySize() const
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const CpuCompilationContext *fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t start_rowid, const uint32_t num_tables, const std::vector< int8_t * > &join_hash_tables, const int64_t num_rows_to_process=-1)
std::vector< InputDescriptor > input_descs
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
constexpr double a
Definition: Utm.h:32
std::shared_lock< T > shared_lock
std::unique_ptr< QueryMemoryInitializer > query_buffers_
ResultSetPtr getRowSet(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc) const
#define INJECT_TIMER(DESC)
Definition: measure.h:122
#define CHECK_NE(x, y)
Definition: Logger.h:302
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const shared::TableKey &outer_table_key, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
Definition: Execute.cpp:4061
std::shared_ptr< CompilationContext > generated_code
QueryMemoryDescriptor query_mem_desc_
QuerySessionId & getCurrentQuerySession(heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4986
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:1060
unsigned gridSize() const
Definition: Execute.cpp:4352
std::unordered_map< int, CgenState::LiteralValues > literal_values
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:33
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
bool check_rows_less_than_needed(const ResultSetPtr &results, const size_t scan_limit)
Definition: Execute.cpp:4054
def error_code
Definition: report.py:234
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:108
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
unsigned blockSize() const
Definition: Execute.cpp:4366
#define VLOG(n)
Definition: Logger.h:388
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const CompilationContext *compilation_context, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const size_t shared_memory_size, int32_t *error_code, const uint32_t num_tables, const bool allow_runtime_interrupt, const std::vector< int8_t * > &join_hash_tables, RenderAllocatorMap *render_allocator_map, bool optimize_cuda_block_and_grid_sizes)

+ Here is the call graph for this function:

int32_t Executor::executePlanWithoutGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const std::vector< Analyzer::Expr * > &  target_exprs,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t * >> &  col_buffers,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const uint32_t  start_rowid,
const uint32_t  num_tables,
const bool  allow_runtime_interrupt,
RenderInfo render_info,
const bool  optimize_cuda_block_and_grid_sizes,
const int64_t  rows_to_process = -1 
)
private

Definition at line 3834 of file Execute.cpp.

References blockSize(), CHECK, CHECK_EQ, checkIsQuerySessionInterrupted(), CPU, DEBUG_TIMER, report::error_code(), RelAlgExecutionUnit::estimator, QueryExecutionContext::estimator_result_set_, executor_session_mutex_, logger::FATAL, g_bigint_count, g_enable_dynamic_watchdog, CompilationResult::generated_code, get_target_info(), QueryExecutionContext::getAggInitValForIndex(), getCurrentQuerySession(), getJoinHashTablePtrs(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), GpuSharedMemoryContext::getSharedMemorySize(), GPU, CompilationResult::gpu_smem_context, gridSize(), INJECT_TIMER, interrupted_, is_distinct_target(), heavyai::InSituFlagsOwnerInterface::isInSitu(), GpuSharedMemoryContext::isSharedMemoryUsed(), kAVG, kCOUNT, kSAMPLE, QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), CompilationResult::literal_values, LOG, QueryExecutionContext::query_buffers_, QueryExecutionContext::query_mem_desc_, reduceResults(), RenderInfo::render_allocator_map_ptr, serializeLiterals(), takes_float_argument(), and RenderInfo::useCudaBuffers().

3852  {
3854  auto timer = DEBUG_TIMER(__func__);
3855  CHECK(!results || !(*results));
3856  if (col_buffers.empty()) {
3857  return 0;
3858  }
3859 
3860  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
3861  if (render_info) {
3862  // TODO(adb): make sure that we either never get here in the CPU case, or if we do get
3863  // here, we are in non-insitu mode.
3864  CHECK(render_info->useCudaBuffers() || !render_info->isInSitu())
3865  << "CUDA disabled rendering in the executePlanWithoutGroupBy query path is "
3866  "currently unsupported.";
3867  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
3868  }
3869 
3870  int32_t error_code = 0;
3871  std::vector<int64_t*> out_vec;
3872  const auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
3873  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
3874  std::unique_ptr<OutVecOwner> output_memory_scope;
3875  if (allow_runtime_interrupt) {
3876  bool isInterrupted = false;
3877  {
3880  const auto query_session = getCurrentQuerySession(session_read_lock);
3881  isInterrupted = checkIsQuerySessionInterrupted(query_session, session_read_lock);
3882  }
3883  if (isInterrupted) {
3884  throw QueryExecutionError(ErrorCode::INTERRUPTED);
3885  }
3886  }
3887  if (g_enable_dynamic_watchdog && interrupted_.load()) {
3888  throw QueryExecutionError(ErrorCode::INTERRUPTED);
3889  }
3890  if (device_type == ExecutorDeviceType::CPU) {
3891  CpuCompilationContext* cpu_generated_code =
3892  dynamic_cast<CpuCompilationContext*>(compilation_result.generated_code.get());
3893  CHECK(cpu_generated_code);
3894  out_vec = query_exe_context->launchCpuCode(ra_exe_unit,
3895  cpu_generated_code,
3896  hoist_literals,
3897  hoist_buf,
3898  col_buffers,
3899  num_rows,
3900  frag_offsets,
3901  0,
3902  &error_code,
3903  start_rowid,
3904  num_tables,
3905  join_hash_table_ptrs,
3906  rows_to_process);
3907  output_memory_scope.reset(new OutVecOwner(out_vec));
3908  } else {
3909  GpuCompilationContext* gpu_generated_code =
3910  dynamic_cast<GpuCompilationContext*>(compilation_result.generated_code.get());
3911  CHECK(gpu_generated_code);
3912  try {
3913  out_vec = query_exe_context->launchGpuCode(
3914  ra_exe_unit,
3915  gpu_generated_code,
3916  hoist_literals,
3917  hoist_buf,
3918  col_buffers,
3919  num_rows,
3920  frag_offsets,
3921  0,
3922  data_mgr,
3923  blockSize(),
3924  gridSize(),
3925  device_id,
3926  compilation_result.gpu_smem_context.getSharedMemorySize(),
3927  &error_code,
3928  num_tables,
3929  allow_runtime_interrupt,
3930  join_hash_table_ptrs,
3931  render_allocator_map_ptr,
3932  optimize_cuda_block_and_grid_sizes);
3933  output_memory_scope.reset(new OutVecOwner(out_vec));
3934  } catch (const OutOfMemory&) {
3935  return int32_t(ErrorCode::OUT_OF_GPU_MEM);
3936  } catch (const std::exception& e) {
3937  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
3938  }
3939  }
3940  if (heavyai::IsAny<ErrorCode::OVERFLOW_OR_UNDERFLOW,
3941  ErrorCode::DIV_BY_ZERO,
3942  ErrorCode::OUT_OF_TIME,
3943  ErrorCode::INTERRUPTED,
3944  ErrorCode::SINGLE_VALUE_FOUND_MULTIPLE_VALUES,
3945  ErrorCode::GEOS,
3946  ErrorCode::WIDTH_BUCKET_INVALID_ARGUMENT,
3947  ErrorCode::BBOX_OVERLAPS_LIMIT_EXCEEDED>::check(error_code)) {
3948  return error_code;
3949  }
3950  if (ra_exe_unit.estimator) {
3951  CHECK(!error_code);
3952  if (results) {
3953  *results =
3954  std::shared_ptr<ResultSet>(query_exe_context->estimator_result_set_.release());
3955  }
3956  return 0;
3957  }
3958  // Expect delayed results extraction (used for sub-fragments) for estimator only;
3959  CHECK(results);
3960  std::vector<int64_t> reduced_outs;
3961  const auto num_frags = col_buffers.size();
3962  const size_t entry_count =
3963  device_type == ExecutorDeviceType::GPU
3964  ? (compilation_result.gpu_smem_context.isSharedMemoryUsed()
3965  ? 1
3966  : blockSize() * gridSize() * num_frags)
3967  : num_frags;
3968  if (size_t(1) == entry_count) {
3969  for (auto out : out_vec) {
3970  CHECK(out);
3971  reduced_outs.push_back(*out);
3972  }
3973  } else {
3974  size_t out_vec_idx = 0;
3975 
3976  for (const auto target_expr : target_exprs) {
3977  const auto agg_info = get_target_info(target_expr, g_bigint_count);
3978  CHECK(agg_info.is_agg || dynamic_cast<Analyzer::Constant*>(target_expr))
3979  << target_expr->toString();
3980 
3981  const int num_iterations = agg_info.sql_type.is_geometry()
3982  ? agg_info.sql_type.get_physical_coord_cols()
3983  : 1;
3984 
3985  for (int i = 0; i < num_iterations; i++) {
3986  int64_t val1;
3987  const bool float_argument_input = takes_float_argument(agg_info);
3988  if (is_distinct_target(agg_info) ||
3989  shared::is_any<kAPPROX_QUANTILE, kMODE>(agg_info.agg_kind)) {
3990  bool const check = shared::
3991  is_any<kCOUNT, kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kMODE, kCOUNT_IF>(
3992  agg_info.agg_kind);
3993  CHECK(check) << agg_info.agg_kind;
3994  val1 = out_vec[out_vec_idx][0];
3995  error_code = 0;
3996  } else {
3997  const auto chosen_bytes = static_cast<size_t>(
3998  query_exe_context->query_mem_desc_.getPaddedSlotWidthBytes(out_vec_idx));
3999  std::tie(val1, error_code) = Executor::reduceResults(
4000  agg_info.agg_kind,
4001  agg_info.sql_type,
4002  query_exe_context->getAggInitValForIndex(out_vec_idx),
4003  float_argument_input ? sizeof(int32_t) : chosen_bytes,
4004  out_vec[out_vec_idx],
4005  entry_count,
4006  false,
4007  float_argument_input);
4008  }
4009  if (error_code) {
4010  break;
4011  }
4012  reduced_outs.push_back(val1);
4013  if (agg_info.agg_kind == kAVG ||
4014  (agg_info.agg_kind == kSAMPLE &&
4015  (agg_info.sql_type.is_varlen() || agg_info.sql_type.is_geometry()))) {
4016  const auto chosen_bytes = static_cast<size_t>(
4017  query_exe_context->query_mem_desc_.getPaddedSlotWidthBytes(out_vec_idx +
4018  1));
4019  int64_t val2;
4020  std::tie(val2, error_code) = Executor::reduceResults(
4021  agg_info.agg_kind == kAVG ? kCOUNT : agg_info.agg_kind,
4022  agg_info.sql_type,
4023  query_exe_context->getAggInitValForIndex(out_vec_idx + 1),
4024  float_argument_input ? sizeof(int32_t) : chosen_bytes,
4025  out_vec[out_vec_idx + 1],
4026  entry_count,
4027  false,
4028  false);
4029  if (error_code) {
4030  break;
4031  }
4032  reduced_outs.push_back(val2);
4033  ++out_vec_idx;
4034  }
4035  ++out_vec_idx;
4036  }
4037  }
4038  }
4039 
4040  if (error_code) {
4041  return error_code;
4042  }
4043 
4044  CHECK_EQ(size_t(1), query_exe_context->query_buffers_->result_sets_.size());
4045  auto rows_ptr = std::shared_ptr<ResultSet>(
4046  query_exe_context->query_buffers_->result_sets_[0].release());
4047  rows_ptr->fillOneEntry(reduced_outs);
4048  *results = std::move(rows_ptr);
4049  return error_code;
4050 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
bool checkIsQuerySessionInterrupted(const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5254
bool useCudaBuffers() const
Definition: RenderInfo.cpp:54
std::vector< int8_t * > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:4253
std::atomic< bool > interrupted_
Definition: Execute.h:1543
GpuSharedMemoryContext gpu_smem_context
#define LOG(tag)
Definition: Logger.h:285
size_t getSharedMemorySize() const
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const CpuCompilationContext *fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t start_rowid, const uint32_t num_tables, const std::vector< int8_t * > &join_hash_tables, const int64_t num_rows_to_process=-1)
static std::pair< int64_t, int32_t > reduceResults(const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
Definition: Execute.cpp:1337
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:106
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
Definition: Execute.cpp:3834
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
std::shared_lock< T > shared_lock
std::unique_ptr< QueryMemoryInitializer > query_buffers_
#define INJECT_TIMER(DESC)
Definition: measure.h:122
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:102
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
int64_t getAggInitValForIndex(const size_t index) const
const std::shared_ptr< Analyzer::Estimator > estimator
std::shared_ptr< CompilationContext > generated_code
QueryMemoryDescriptor query_mem_desc_
QuerySessionId & getCurrentQuerySession(heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4986
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:1060
Definition: sqldefs.h:81
unsigned gridSize() const
Definition: Execute.cpp:4352
std::unordered_map< int, CgenState::LiteralValues > literal_values
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:33
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
def error_code
Definition: report.py:234
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
unsigned blockSize() const
Definition: Execute.cpp:4366
std::unique_ptr< ResultSet > estimator_result_set_
Definition: sqldefs.h:77
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const CompilationContext *compilation_context, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const size_t shared_memory_size, int32_t *error_code, const uint32_t num_tables, const bool allow_runtime_interrupt, const std::vector< int8_t * > &join_hash_tables, RenderAllocatorMap *render_allocator_map, bool optimize_cuda_block_and_grid_sizes)

+ Here is the call graph for this function:

ResultSetPtr Executor::executeTableFunction ( const TableFunctionExecutionUnit  exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps.

Definition at line 2445 of file Execute.cpp.

References blockSize(), TableFunctionCompilationContext::compile(), table_functions::TableFunction::containsPreFlightFn(), CPU, CompilationOptions::device_type, TableFunctionExecutionContext::execute(), ResultSet::fixupQueryMemoryDescriptor(), getRowSetMemoryOwner(), GPU, gridSize(), table_functions::TableFunction::hasTableFunctionSpecifiedParameter(), INJECT_TIMER, ExecutionOptions::just_validate, CompilationOptions::makeCpuOnly(), query_mem_desc, TableFunctionExecutionUnit::table_func, heavyai::TableFunction, TableFunctionExecutionUnit::target_exprs, and target_exprs_to_infos().

2449  {
2450  INJECT_TIMER(Exec_executeTableFunction);
2451  if (eo.just_validate) {
2453  /*entry_count=*/0,
2455  return std::make_shared<ResultSet>(
2457  co.device_type,
2459  this->getRowSetMemoryOwner(),
2460  this->blockSize(),
2461  this->gridSize());
2462  }
2463 
2464  // Avoid compile functions that set the sizer at runtime if the device is GPU
2465  // This should be fixed in the python script as well to minimize the number of
2466  // QueryMustRunOnCpu exceptions
2469  throw QueryMustRunOnCpu();
2470  }
2471 
2472  ColumnCacheMap column_cache; // Note: if we add retries to the table function
2473  // framework, we may want to move this up a level
2474 
2475  ColumnFetcher column_fetcher(this, column_cache);
2477 
2478  if (exe_unit.table_func.containsPreFlightFn()) {
2479  std::shared_ptr<CompilationContext> compilation_context;
2480  {
2481  Executor::CgenStateManager cgenstate_manager(*this,
2482  false,
2483  table_infos,
2485  nullptr); // locks compilation_mutex
2487  TableFunctionCompilationContext tf_compilation_context(this, pre_flight_co);
2488  compilation_context =
2489  tf_compilation_context.compile(exe_unit, true /* emit_only_preflight_fn*/);
2490  }
2491  exe_context.execute(exe_unit,
2492  table_infos,
2493  compilation_context,
2494  column_fetcher,
2496  this,
2497  true /* is_pre_launch_udtf */);
2498  }
2499  std::shared_ptr<CompilationContext> compilation_context;
2500  {
2501  Executor::CgenStateManager cgenstate_manager(*this,
2502  false,
2503  table_infos,
2505  nullptr); // locks compilation_mutex
2506  TableFunctionCompilationContext tf_compilation_context(this, co);
2507  compilation_context =
2508  tf_compilation_context.compile(exe_unit, false /* emit_only_preflight_fn */);
2509  }
2510  return exe_context.execute(exe_unit,
2511  table_infos,
2512  compilation_context,
2513  column_fetcher,
2514  co.device_type,
2515  this,
2516  false /* is_pre_launch_udtf */);
2517 }
std::unordered_map< shared::TableKey, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:728
const table_functions::TableFunction table_func
TableFunction
Definition: enums.h:58
static CompilationOptions makeCpuOnly(const CompilationOptions &in)
#define INJECT_TIMER(DESC)
Definition: measure.h:122
ExecutorDeviceType device_type
unsigned gridSize() const
Definition: Execute.cpp:4352
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:766
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
std::vector< Analyzer::Expr * > target_exprs
unsigned blockSize() const
Definition: Execute.cpp:4366

+ Here is the call graph for this function:

TableUpdateMetadata Executor::executeUpdate ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const TableDescriptor updated_table_desc,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const UpdateLogForFragment::Callback cb,
const bool  is_agg 
)

Definition at line 65 of file ExecuteUpdate.cpp.

References anonymous_namespace{Utm.h}::a, CHECK, CHECK_EQ, CHECK_GT, CPU, FragmentsPerTable::fragment_ids, g_enable_auto_metadata_update, SharedKernelContext::getFragmentResults(), SharedKernelContext::getFragOffsets(), Catalog_Namespace::Catalog::getMetadataForTable(), KernelPerFragment, query_mem_desc, ExecutionKernel::run(), TableDescriptor::tableId, timer_start(), timer_stop(), and VLOG.

74  {
75  CHECK(cb);
76  CHECK(table_desc_for_update);
77  VLOG(1) << "Executor " << executor_id_
78  << " is executing update/delete work unit:" << ra_exe_unit_in;
79 
80  const auto [ra_exe_unit, deleted_cols_map] = addDeletedColumn(ra_exe_unit_in, co);
81  ColumnCacheMap column_cache;
82 
83  ColumnFetcher column_fetcher(this, column_cache);
84  CHECK_GT(ra_exe_unit.input_descs.size(), size_t(0));
85  const auto& outer_table_key = ra_exe_unit.input_descs[0].getTableKey();
86  CHECK_EQ(outer_table_key, table_infos.front().table_key);
87  const auto& outer_fragments = table_infos.front().info.fragments;
88 
89  std::vector<FragmentsPerTable> fragments = {{{0, 0}, {0}}};
90  for (size_t tab_idx = 1; tab_idx < ra_exe_unit.input_descs.size(); tab_idx++) {
91  const auto& table_key = ra_exe_unit.input_descs[tab_idx].getTableKey();
92  CHECK_EQ(table_infos[tab_idx].table_key, table_key);
93  const auto& fragmentsPerTable = table_infos[tab_idx].info.fragments;
94  FragmentsPerTable entry = {table_key, {}};
95  for (size_t innerFragId = 0; innerFragId < fragmentsPerTable.size(); innerFragId++) {
96  entry.fragment_ids.push_back(innerFragId);
97  }
98  fragments.push_back(entry);
99  }
100 
101  if (outer_fragments.empty()) {
102  return {};
103  }
104 
105  const auto max_tuple_count_fragment_it = std::max_element(
106  outer_fragments.begin(), outer_fragments.end(), [](const auto& a, const auto& b) {
107  return a.getNumTuples() < b.getNumTuples();
108  });
109  CHECK(max_tuple_count_fragment_it != outer_fragments.end());
110  int64_t global_max_groups_buffer_entry_guess =
111  max_tuple_count_fragment_it->getNumTuples();
112  if (is_agg) {
113  global_max_groups_buffer_entry_guess = std::min(
114  2 * global_max_groups_buffer_entry_guess, static_cast<int64_t>(100'000'000));
115  }
116 
117  auto query_comp_desc = std::make_unique<QueryCompilationDescriptor>();
118  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
119  {
120  query_mem_desc = query_comp_desc->compile(global_max_groups_buffer_entry_guess,
121  8,
122  /*has_cardinality_estimation=*/true,
123  ra_exe_unit,
124  table_infos,
125  deleted_cols_map,
126  column_fetcher,
127  co,
128  eo,
129  nullptr,
130  this);
131  }
132  CHECK(query_mem_desc);
133  // Since we execute updates one thread/fragment at a time,
134  // buffer re-use is not applicable and can cause issues
135  // when the contents of the output buffer are written to storage
136  query_mem_desc->setThreadsCanReuseGroupByBuffers(false);
137 
138  TableUpdateMetadata table_update_metadata;
139  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
140  ++fragment_index) {
141  const int64_t crt_fragment_tuple_count =
142  outer_fragments[fragment_index].getNumTuples();
143  if (crt_fragment_tuple_count == 0) {
144  // nothing to update
145  continue;
146  }
147  SharedKernelContext shared_context(table_infos);
148  const auto& frag_offsets = shared_context.getFragOffsets();
149  auto skip_frag = skipFragment(ra_exe_unit.input_descs[0],
150  outer_fragments[fragment_index],
151  ra_exe_unit.simple_quals,
152  frag_offsets,
153  fragment_index);
154  if (skip_frag.first) {
155  VLOG(2) << "Update/delete skipping fragment with table id: "
156  << outer_fragments[fragment_index].physicalTableId
157  << ", fragment id: " << fragment_index;
158  continue;
159  }
160  fragments[0] = {outer_table_key, {fragment_index}};
161  {
162  ExecutionKernel current_fragment_kernel(ra_exe_unit,
164  0,
165  eo,
166  column_fetcher,
167  *query_comp_desc,
168  *query_mem_desc,
169  fragments,
171  /*render_info=*/nullptr,
172  /*rowid_lookup_key=*/-1);
173 
174  auto clock_begin = timer_start();
175  std::lock_guard<std::mutex> kernel_lock(kernel_mutex_);
176  kernel_queue_time_ms_ += timer_stop(clock_begin);
177 
178  current_fragment_kernel.run(this, 0, shared_context);
179  }
180  const auto& proj_fragment_results = shared_context.getFragmentResults();
181  if (proj_fragment_results.empty()) {
182  continue;
183  }
184  const auto& proj_fragment_result = proj_fragment_results[0];
185  const auto proj_result_set = proj_fragment_result.first;
186  CHECK(proj_result_set);
187  cb({outer_fragments[fragment_index], fragment_index, proj_result_set},
188  table_update_metadata);
189  }
190 
192  auto td = cat.getMetadataForTable(table_desc_for_update->tableId);
193  TableOptimizer table_optimizer{td, this, cat};
194  table_optimizer.recomputeMetadataUnlocked(table_update_metadata);
195  }
196  return table_update_metadata;
197 }
bool is_agg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
int64_t kernel_queue_time_ms_
Definition: Execute.h:1562
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:4475
std::vector< InputDescriptor > input_descs
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
bool g_enable_auto_metadata_update
static std::mutex kernel_mutex_
Definition: Execute.h:1624
#define CHECK_GT(x, y)
Definition: Logger.h:305
constexpr double a
Definition: Utm.h:32
const ExecutorId executor_id_
Definition: Execute.h:1476
std::pair< bool, int64_t > skipFragment(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:4658
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
#define CHECK(condition)
Definition: Logger.h:291
std::vector< size_t > fragment_ids
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
#define VLOG(n)
Definition: Logger.h:388
Type timer_start()
Definition: measure.h:42
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

ResultSetPtr Executor::executeWorkUnit ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)

Definition at line 2099 of file Execute.cpp.

References cgen_state_, compilation_queue_time_ms_, executeWorkUnitImpl(), executor_id_, ExecutionOptions::just_validate, kernel_queue_time_ms_, CompilationRetryNewScanLimit::new_scan_limit_, plan_state_, anonymous_namespace{Execute.cpp}::replace_scan_limit(), run_benchmark_import::result, row_set_mem_owner_, and VLOG.

2107  {
2108  VLOG(1) << "Executor " << executor_id_ << " is executing work unit:" << ra_exe_unit_in;
2109  ScopeGuard cleanup_post_execution = [this] {
2110  // cleanup/unpin GPU buffer allocations
2111  // TODO: separate out this state into a single object
2112  VLOG(1) << "Perform post execution clearance for Executor " << executor_id_;
2113  plan_state_.reset(nullptr);
2114  if (cgen_state_) {
2115  cgen_state_->in_values_bitmaps_.clear();
2116  cgen_state_->str_dict_translation_mgrs_.clear();
2117  cgen_state_->tree_model_prediction_mgrs_.clear();
2118  }
2119  row_set_mem_owner_->clearNonOwnedGroupByBuffers();
2120  };
2121 
2122  try {
2123  auto result = executeWorkUnitImpl(max_groups_buffer_entry_guess,
2124  is_agg,
2125  true,
2126  query_infos,
2127  ra_exe_unit_in,
2128  co,
2129  eo,
2131  render_info,
2132  has_cardinality_estimation,
2133  column_cache);
2134  if (result) {
2135  result->setKernelQueueTime(kernel_queue_time_ms_);
2136  result->addCompilationQueueTime(compilation_queue_time_ms_);
2137  if (eo.just_validate) {
2138  result->setValidationOnlyRes();
2139  }
2140  }
2141  return result;
2142  } catch (const CompilationRetryNewScanLimit& e) {
2143  auto result =
2144  executeWorkUnitImpl(max_groups_buffer_entry_guess,
2145  is_agg,
2146  false,
2147  query_infos,
2148  replace_scan_limit(ra_exe_unit_in, e.new_scan_limit_),
2149  co,
2150  eo,
2152  render_info,
2153  has_cardinality_estimation,
2154  column_cache);
2155  if (result) {
2156  result->setKernelQueueTime(kernel_queue_time_ms_);
2157  result->addCompilationQueueTime(compilation_queue_time_ms_);
2158  if (eo.just_validate) {
2159  result->setValidationOnlyRes();
2160  }
2161  }
2162  return result;
2163  }
2164 }
bool is_agg(const Analyzer::Expr *expr)
int64_t kernel_queue_time_ms_
Definition: Execute.h:1562
int64_t compilation_queue_time_ms_
Definition: Execute.h:1563
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const ExecutorId executor_id_
Definition: Execute.h:1476
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
RelAlgExecutionUnit replace_scan_limit(const RelAlgExecutionUnit &ra_exe_unit_in, const size_t new_scan_limit)
Definition: Execute.cpp:2075
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:2166
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

ResultSetPtr Executor::executeWorkUnitImpl ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 2166 of file Execute.cpp.

References addDeletedColumn(), ExecutionOptions::allow_runtime_query_interrupt, blockSize(), CHECK, CHECK_EQ, checkIsQuerySessionEnrolled(), collectAllDeviceResults(), anonymous_namespace{Execute.cpp}::compute_buffer_entry_guess(), CPU, cpu_threads(), createKernels(), data_mgr_, CompilationOptions::device_type, ExecutionOptions::estimate_output_cardinality, executeExplain(), executor_session_mutex_, ExecutionOptions::executor_type, ColumnFetcher::freeLinearizedBuf(), ColumnFetcher::freeTemporaryCpuLinearizedIdxBuf(), g_enable_executor_resource_mgr, get_available_gpus(), get_context_count(), getCurrentQuerySession(), getDeviceTypeForTargets(), QueryExecutionError::getErrorCode(), SharedKernelContext::getFragmentResults(), gridSize(), QueryExecutionError::hasErrorCode(), INJECT_TIMER, interrupted_, ExecutionOptions::just_explain, ExecutionOptions::just_validate, launchKernelsLocked(), launchKernelsViaResourceMgr(), MAX_BYTE_WIDTH_SUPPORTED, Native, RelAlgExecutionUnit::per_device_cardinality, plan_state_, heavyai::Projection, QueryMemoryDescriptor, run_benchmark_import::result, resultsUnion(), row_set_mem_owner_, QuerySessionStatus::RUNNING_REDUCTION, toString(), updateQuerySessionStatus(), VLOG, and ExecutionOptions::with_dynamic_watchdog.

Referenced by executeWorkUnit().

2177  {
2178  INJECT_TIMER(Exec_executeWorkUnit);
2179  const auto [ra_exe_unit, deleted_cols_map] = addDeletedColumn(ra_exe_unit_in, co);
2180  const auto device_type = getDeviceTypeForTargets(ra_exe_unit, co.device_type);
2181  CHECK(!query_infos.empty());
2182  if (!max_groups_buffer_entry_guess) {
2183  // The query has failed the first execution attempt because of running out
2184  // of group by slots. Make the conservative choice: allocate fragment size
2185  // slots and run on the CPU.
2186  CHECK(device_type == ExecutorDeviceType::CPU);
2187  max_groups_buffer_entry_guess =
2188  compute_buffer_entry_guess(query_infos, ra_exe_unit_in);
2189  }
2190 
2191  int8_t crt_min_byte_width{MAX_BYTE_WIDTH_SUPPORTED};
2192  CompilationOptions copied_co = co;
2193  copied_co.device_type = device_type;
2194  do {
2195  SharedKernelContext shared_context(query_infos);
2196  ColumnFetcher column_fetcher(this, column_cache);
2197  ScopeGuard scope_guard = [&column_fetcher] {
2198  column_fetcher.freeLinearizedBuf();
2199  column_fetcher.freeTemporaryCpuLinearizedIdxBuf();
2200  };
2201  auto query_comp_desc_owned = std::make_unique<QueryCompilationDescriptor>();
2202  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
2203  if (eo.executor_type == ExecutorType::Native) {
2204  try {
2205  INJECT_TIMER(query_step_compilation);
2206  query_mem_desc_owned =
2207  query_comp_desc_owned->compile(max_groups_buffer_entry_guess,
2208  crt_min_byte_width,
2209  has_cardinality_estimation,
2210  ra_exe_unit,
2211  query_infos,
2212  deleted_cols_map,
2213  column_fetcher,
2214  copied_co,
2215  eo,
2216  render_info,
2217  this);
2218  CHECK(query_mem_desc_owned);
2219  crt_min_byte_width = query_comp_desc_owned->getMinByteWidth();
2220  } catch (CompilationRetryNoCompaction& e) {
2221  VLOG(1) << e.what();
2222  crt_min_byte_width = MAX_BYTE_WIDTH_SUPPORTED;
2223  continue;
2224  }
2225  } else {
2226  plan_state_.reset(new PlanState(false, query_infos, deleted_cols_map, this));
2227  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
2228  CHECK(!query_mem_desc_owned);
2229  query_mem_desc_owned.reset(
2231  }
2232  if (eo.just_explain) {
2233  return executeExplain(*query_comp_desc_owned);
2234  }
2235 
2236  if (query_mem_desc_owned->canUsePerDeviceCardinality(ra_exe_unit)) {
2237  auto const max_rows_per_device =
2238  query_mem_desc_owned->getMaxPerDeviceCardinality(ra_exe_unit);
2239  if (max_rows_per_device && *max_rows_per_device >= 0 &&
2240  *max_rows_per_device < query_mem_desc_owned->getEntryCount()) {
2241  VLOG(1) << "Setting the max per device cardinality of {max_rows_per_device} as "
2242  "the new scan limit: "
2243  << *max_rows_per_device;
2244  throw CompilationRetryNewScanLimit(*max_rows_per_device);
2245  }
2246  }
2247 
2248  if (!eo.just_validate) {
2249  int available_cpus = cpu_threads();
2250  auto available_gpus = get_available_gpus(data_mgr_);
2251 
2252  const auto context_count =
2253  get_context_count(device_type, available_cpus, available_gpus.size());
2254  try {
2255  auto kernels = createKernels(shared_context,
2256  ra_exe_unit,
2257  column_fetcher,
2258  query_infos,
2259  eo,
2260  is_agg,
2261  allow_single_frag_table_opt,
2262  context_count,
2263  *query_comp_desc_owned,
2264  *query_mem_desc_owned,
2265  render_info,
2266  available_gpus,
2267  available_cpus);
2268  if (!kernels.empty()) {
2269  row_set_mem_owner_->setKernelMemoryAllocator(kernels.size());
2270  }
2272  launchKernelsViaResourceMgr(shared_context,
2273  std::move(kernels),
2274  query_comp_desc_owned->getDeviceType(),
2275  ra_exe_unit.input_descs,
2276  *query_mem_desc_owned);
2277  } else {
2279  shared_context, std::move(kernels), query_comp_desc_owned->getDeviceType());
2280  }
2281 
2282  } catch (QueryExecutionError& e) {
2283  if (eo.with_dynamic_watchdog && interrupted_.load() &&
2284  e.hasErrorCode(ErrorCode::OUT_OF_TIME)) {
2285  throw QueryExecutionError(ErrorCode::INTERRUPTED);
2286  }
2287  if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
2288  throw QueryExecutionError(ErrorCode::INTERRUPTED);
2289  }
2290  if (e.hasErrorCode(ErrorCode::OVERFLOW_OR_UNDERFLOW) &&
2291  static_cast<size_t>(crt_min_byte_width << 1) <= sizeof(int64_t)) {
2292  crt_min_byte_width <<= 1;
2293  continue;
2294  }
2295  throw;
2296  }
2297  }
2298  if (is_agg) {
2299  if (eo.allow_runtime_query_interrupt && ra_exe_unit.query_state) {
2300  // update query status to let user know we are now in the reduction phase
2301  std::string curRunningSession{""};
2302  std::string curRunningQuerySubmittedTime{""};
2303  bool sessionEnrolled = false;
2304  {
2307  curRunningSession = getCurrentQuerySession(session_read_lock);
2308  curRunningQuerySubmittedTime = ra_exe_unit.query_state->getQuerySubmittedTime();
2309  sessionEnrolled =
2310  checkIsQuerySessionEnrolled(curRunningSession, session_read_lock);
2311  }
2312  if (!curRunningSession.empty() && !curRunningQuerySubmittedTime.empty() &&
2313  sessionEnrolled) {
2314  updateQuerySessionStatus(curRunningSession,
2315  curRunningQuerySubmittedTime,
2317  }
2318  }
2319  try {
2320  if (eo.estimate_output_cardinality) {
2321  for (const auto& result : shared_context.getFragmentResults()) {
2322  auto row = result.first->getNextRow(false, false);
2323  CHECK_EQ(1u, row.size());
2324  auto scalar_r = boost::get<ScalarTargetValue>(&row[0]);
2325  CHECK(scalar_r);
2326  auto p = boost::get<int64_t>(scalar_r);
2327  CHECK(p);
2328  // todo(yoonmin): sort the frag_ids to make it consistent for later usage
2329  auto frag_ids = result.second;
2330  VLOG(1) << "Filtered cardinality for fragments-{" << ::toString(result.second)
2331  << "} : " << static_cast<size_t>(*p);
2332  ra_exe_unit_in.per_device_cardinality.emplace_back(result.second,
2333  static_cast<size_t>(*p));
2334  result.first->moveToBegin();
2335  }
2336  }
2337  return collectAllDeviceResults(shared_context,
2338  ra_exe_unit,
2339  *query_mem_desc_owned,
2340  query_comp_desc_owned->getDeviceType(),
2341  row_set_mem_owner);
2342  } catch (ReductionRanOutOfSlots&) {
2343  throw QueryExecutionError(ErrorCode::OUT_OF_SLOTS);
2344  } catch (OverflowOrUnderflow&) {
2345  crt_min_byte_width <<= 1;
2346  continue;
2347  } catch (QueryExecutionError& e) {
2348  VLOG(1) << "Error received! error_code: " << e.getErrorCode()
2349  << ", what(): " << e.what();
2350  throw QueryExecutionError(e.getErrorCode());
2351  }
2352  }
2353  return resultsUnion(shared_context, ra_exe_unit);
2354 
2355  } while (static_cast<size_t>(crt_min_byte_width) <= sizeof(int64_t));
2356 
2357  return std::make_shared<ResultSet>(std::vector<TargetInfo>{},
2360  nullptr,
2361  blockSize(),
2362  gridSize());
2363 }
bool is_agg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< std::unique_ptr< ExecutionKernel > > createKernels(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
Definition: Execute.cpp:2907
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
int32_t getErrorCode() const
Definition: ErrorHandling.h:63
std::atomic< bool > interrupted_
Definition: Execute.h:1543
void updateQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
Definition: Execute.cpp:5075
std::unordered_set< int > get_available_gpus(const Data_Namespace::DataMgr *data_mgr)
Definition: Execute.cpp:1752
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:4475
Projection
Definition: enums.h:58
std::shared_lock< T > shared_lock
size_t compute_buffer_entry_guess(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1778
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178
ResultSetPtr collectAllDeviceResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:2715
std::vector< std::pair< std::vector< size_t >, size_t > > per_device_cardinality
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:1766
#define INJECT_TIMER(DESC)
Definition: measure.h:122
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533
friend class QueryMemoryDescriptor
Definition: Execute.h:1641
bool hasErrorCode(ErrorCode const ec) const
Definition: ErrorHandling.h:65
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
bool checkIsQuerySessionEnrolled(const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5265
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
ExecutorDeviceType device_type
void launchKernelsLocked(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
Definition: Execute.cpp:3123
ResultSetPtr resultsUnion(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1563
QuerySessionId & getCurrentQuerySession(heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4986
unsigned gridSize() const
Definition: Execute.cpp:4352
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
#define CHECK(condition)
Definition: Logger.h:291
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:2575
void launchKernelsViaResourceMgr(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const QueryMemoryDescriptor &query_mem_desc)
Launches a vector of kernels for a given query step, gated/scheduled by ExecutorResourceMgr.
Definition: Execute.cpp:3135
unsigned blockSize() const
Definition: Execute.cpp:4366
int cpu_threads()
Definition: thread_count.h:25
#define VLOG(n)
Definition: Logger.h:388
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:2519

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::executeWorkUnitPerFragment ( const RelAlgExecutionUnit ra_exe_unit,
const InputTableInfo table_info,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
PerFragmentCallBack cb,
const std::set< size_t > &  fragment_indexes_param 
)
private

Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata).

Definition at line 2365 of file Execute.cpp.

References addDeletedColumn(), CHECK, CHECK_EQ, CompilationOptions::device_type, Fragmenter_Namespace::TableInfo::fragments, SharedKernelContext::getFragmentResults(), InputTableInfo::info, kernel_mutex_, kernel_queue_time_ms_, KernelPerFragment, ExecutionKernel::run(), timer_start(), and timer_stop().

2372  {
2373  const auto [ra_exe_unit, deleted_cols_map] = addDeletedColumn(ra_exe_unit_in, co);
2374  ColumnCacheMap column_cache;
2375 
2376  std::vector<InputTableInfo> table_infos{table_info};
2377  SharedKernelContext kernel_context(table_infos);
2378 
2379  ColumnFetcher column_fetcher(this, column_cache);
2380  auto query_comp_desc_owned = std::make_unique<QueryCompilationDescriptor>();
2381  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
2382  {
2383  query_mem_desc_owned =
2384  query_comp_desc_owned->compile(0,
2385  8,
2386  /*has_cardinality_estimation=*/false,
2387  ra_exe_unit,
2388  table_infos,
2389  deleted_cols_map,
2390  column_fetcher,
2391  co,
2392  eo,
2393  nullptr,
2394  this);
2395  }
2396  CHECK(query_mem_desc_owned);
2397  CHECK_EQ(size_t(1), ra_exe_unit.input_descs.size());
2398  const auto table_key = ra_exe_unit.input_descs[0].getTableKey();
2399  const auto& outer_fragments = table_info.info.fragments;
2400 
2401  std::set<size_t> fragment_indexes;
2402  if (fragment_indexes_param.empty()) {
2403  // An empty `fragment_indexes_param` set implies executing
2404  // the query for all fragments in the table. In this
2405  // case, populate `fragment_indexes` with all fragment indexes.
2406  for (size_t i = 0; i < outer_fragments.size(); i++) {
2407  fragment_indexes.emplace(i);
2408  }
2409  } else {
2410  fragment_indexes = fragment_indexes_param;
2411  }
2412 
2413  {
2414  auto clock_begin = timer_start();
2415  std::lock_guard<std::mutex> kernel_lock(kernel_mutex_);
2416  kernel_queue_time_ms_ += timer_stop(clock_begin);
2417 
2418  for (auto fragment_index : fragment_indexes) {
2419  // We may want to consider in the future allowing this to execute on devices other
2420  // than CPU
2421  FragmentsList fragments_list{{table_key, {fragment_index}}};
2422  ExecutionKernel kernel(ra_exe_unit,
2423  co.device_type,
2424  /*device_id=*/0,
2425  eo,
2426  column_fetcher,
2427  *query_comp_desc_owned,
2428  *query_mem_desc_owned,
2429  fragments_list,
2431  /*render_info=*/nullptr,
2432  /*rowid_lookup_key=*/-1);
2433  kernel.run(this, 0, kernel_context);
2434  }
2435  }
2436 
2437  const auto& all_fragment_results = kernel_context.getFragmentResults();
2438 
2439  for (const auto& [result_set_ptr, result_fragment_indexes] : all_fragment_results) {
2440  CHECK_EQ(result_fragment_indexes.size(), 1);
2441  cb(result_set_ptr, outer_fragments[result_fragment_indexes[0]]);
2442  }
2443 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
int64_t kernel_queue_time_ms_
Definition: Execute.h:1562
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:4475
std::vector< InputDescriptor > input_descs
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
std::vector< FragmentInfo > fragments
Definition: Fragmenter.h:171
static std::mutex kernel_mutex_
Definition: Execute.h:1624
std::vector< FragmentsPerTable > FragmentsList
ExecutorDeviceType device_type
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
#define CHECK(condition)
Definition: Logger.h:291
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

FetchResult Executor::fetchChunks ( const ColumnFetcher column_fetcher,
const RelAlgExecutionUnit ra_exe_unit,
const int  device_id,
const Data_Namespace::MemoryLevel  memory_level,
const std::map< shared::TableKey, const TableFragments * > &  all_tables_fragments,
const FragmentsList selected_fragments,
std::list< ChunkIter > &  chunk_iterators,
std::list< std::shared_ptr< Chunk_NS::Chunk >> &  chunks,
DeviceAllocator device_allocator,
const size_t  thread_idx,
const bool  allow_runtime_interrupt 
)
private

Definition at line 3458 of file Execute.cpp.

References buildSelectedFragsMapping(), CHECK, CHECK_EQ, CHECK_LT, checkIsQuerySessionInterrupted(), Data_Namespace::CPU_LEVEL, DEBUG_TIMER, executor_session_mutex_, g_enable_dynamic_watchdog, ColumnFetcher::getAllTableColumnFragments(), getCurrentQuerySession(), ColumnFetcher::getOneTableColumnFragment(), ColumnFetcher::getResultSetColumn(), getRowCountAndOffsetForAllFrags(), INJECT_TIMER, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, interrupted_, ColumnFetcher::linearizeColumnFragments(), needFetchAllFragments(), needLinearizeAllFragments(), plan_state_, RESULT, anonymous_namespace{Execute.cpp}::try_get_column_descriptor(), and VLOG.

3469  {
3470  auto timer = DEBUG_TIMER(__func__);
3472  const auto& col_global_ids = ra_exe_unit.input_col_descs;
3473  std::vector<std::vector<size_t>> selected_fragments_crossjoin;
3474  std::vector<size_t> local_col_to_frag_pos;
3475  buildSelectedFragsMapping(selected_fragments_crossjoin,
3476  local_col_to_frag_pos,
3477  col_global_ids,
3478  selected_fragments,
3479  ra_exe_unit);
3480 
3482  selected_fragments_crossjoin);
3483  std::vector<std::vector<const int8_t*>> all_frag_col_buffers;
3484  std::vector<std::vector<int64_t>> all_num_rows;
3485  std::vector<std::vector<uint64_t>> all_frag_offsets;
3486  for (const auto& selected_frag_ids : frag_ids_crossjoin) {
3487  std::vector<const int8_t*> frag_col_buffers(
3488  plan_state_->global_to_local_col_ids_.size());
3489  for (const auto& col_id : col_global_ids) {
3490  if (allow_runtime_interrupt) {
3491  bool isInterrupted = false;
3492  {
3495  const auto query_session = getCurrentQuerySession(session_read_lock);
3496  isInterrupted =
3497  checkIsQuerySessionInterrupted(query_session, session_read_lock);
3498  }
3499  if (isInterrupted) {
3500  throw QueryExecutionError(ErrorCode::INTERRUPTED);
3501  }
3502  }
3503  if (g_enable_dynamic_watchdog && interrupted_.load()) {
3504  throw QueryExecutionError(ErrorCode::INTERRUPTED);
3505  }
3506  CHECK(col_id);
3507  const auto cd = try_get_column_descriptor(col_id.get());
3508  if (cd && cd->isVirtualCol) {
3509  CHECK_EQ("rowid", cd->columnName);
3510  continue;
3511  }
3512  const auto& table_key = col_id->getScanDesc().getTableKey();
3513  const auto fragments_it = all_tables_fragments.find(table_key);
3514  CHECK(fragments_it != all_tables_fragments.end());
3515  const auto fragments = fragments_it->second;
3516  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3517  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3518  CHECK_LT(static_cast<size_t>(it->second),
3519  plan_state_->global_to_local_col_ids_.size());
3520  const size_t frag_id = selected_frag_ids[local_col_to_frag_pos[it->second]];
3521  if (!fragments->size()) {
3522  return {};
3523  }
3524  CHECK_LT(frag_id, fragments->size());
3525  auto memory_level_for_column = memory_level;
3526  const shared::ColumnKey tbl_col_key{col_id->getScanDesc().getTableKey(),
3527  col_id->getColId()};
3528  if (!plan_state_->isColumnToFetch(tbl_col_key)) {
3529  memory_level_for_column = Data_Namespace::CPU_LEVEL;
3530  }
3531  if (col_id->getScanDesc().getSourceType() == InputSourceType::RESULT) {
3532  frag_col_buffers[it->second] =
3533  column_fetcher.getResultSetColumn(col_id.get(),
3534  memory_level_for_column,
3535  device_id,
3536  device_allocator,
3537  thread_idx);
3538  } else {
3539  if (needFetchAllFragments(*col_id, ra_exe_unit, selected_fragments)) {
3540  // determine if we need special treatment to linearlize multi-frag table
3541  // i.e., a column that is classified as varlen type, i.e., array
3542  // for now, we only support fixed-length array that contains
3543  // geo point coordianates but we can support more types in this way
3545  cd, *col_id, ra_exe_unit, selected_fragments, memory_level)) {
3546  bool for_lazy_fetch = false;
3547  if (plan_state_->isColumnToNotFetch(tbl_col_key)) {
3548  for_lazy_fetch = true;
3549  VLOG(2) << "Try to linearize lazy fetch column (col_id: " << cd->columnId
3550  << ", col_name: " << cd->columnName << ")";
3551  }
3552  frag_col_buffers[it->second] = column_fetcher.linearizeColumnFragments(
3553  col_id->getScanDesc().getTableKey(),
3554  col_id->getColId(),
3555  all_tables_fragments,
3556  chunks,
3557  chunk_iterators,
3558  for_lazy_fetch ? Data_Namespace::CPU_LEVEL : memory_level,
3559  for_lazy_fetch ? 0 : device_id,
3560  device_allocator,
3561  thread_idx);
3562  } else {
3563  frag_col_buffers[it->second] = column_fetcher.getAllTableColumnFragments(
3564  col_id->getScanDesc().getTableKey(),
3565  col_id->getColId(),
3566  all_tables_fragments,
3567  memory_level_for_column,
3568  device_id,
3569  device_allocator,
3570  thread_idx);
3571  }
3572  } else {
3573  frag_col_buffers[it->second] = column_fetcher.getOneTableColumnFragment(
3574  col_id->getScanDesc().getTableKey(),
3575  frag_id,
3576  col_id->getColId(),
3577  all_tables_fragments,
3578  chunks,
3579  chunk_iterators,
3580  memory_level_for_column,
3581  device_id,
3582  device_allocator);
3583  }
3584  }
3585  }
3586  all_frag_col_buffers.push_back(frag_col_buffers);
3587  }
3588  std::tie(all_num_rows, all_frag_offsets) = getRowCountAndOffsetForAllFrags(
3589  ra_exe_unit, frag_ids_crossjoin, ra_exe_unit.input_descs, all_tables_fragments);
3590  return {all_frag_col_buffers, all_num_rows, all_frag_offsets};
3591 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
bool checkIsQuerySessionInterrupted(const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5254
std::atomic< bool > interrupted_
Definition: Execute.h:1543
std::vector< InputDescriptor > input_descs
const int8_t * getResultSetColumn(const InputColDescriptor *col_desc, const Data_Namespace::MemoryLevel memory_level, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx) const
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
const int8_t * getOneTableColumnFragment(const shared::TableKey &table_key, const int frag_id, const int col_id, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments, std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunk_holder, std::list< ChunkIter > &chunk_iter_holder, const Data_Namespace::MemoryLevel memory_level, const int device_id, DeviceAllocator *device_allocator) const
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:3416
const int8_t * getAllTableColumnFragments(const shared::TableKey &table_key, const int col_id, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments, const Data_Namespace::MemoryLevel memory_level, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx) const
std::shared_lock< T > shared_lock
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:3367
#define INJECT_TIMER(DESC)
Definition: measure.h:122
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define CHECK_LT(x, y)
Definition: Logger.h:303
QuerySessionId & getCurrentQuerySession(heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4986
const int8_t * linearizeColumnFragments(const shared::TableKey &table_key, const int col_id, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments, std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunk_holder, std::list< ChunkIter > &chunk_iter_holder, const Data_Namespace::MemoryLevel memory_level, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx) const
bool needLinearizeAllFragments(const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:3435
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3774
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
const ColumnDescriptor * try_get_column_descriptor(const InputColDescriptor *col_desc)
Definition: Execute.cpp:3340
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
#define VLOG(n)
Definition: Logger.h:388
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Definition: Execute.cpp:3458

+ Here is the call graph for this function:

FetchResult Executor::fetchUnionChunks ( const ColumnFetcher column_fetcher,
const RelAlgExecutionUnit ra_exe_unit,
const int  device_id,
const Data_Namespace::MemoryLevel  memory_level,
const std::map< shared::TableKey, const TableFragments * > &  all_tables_fragments,
const FragmentsList selected_fragments,
std::list< ChunkIter > &  chunk_iterators,
std::list< std::shared_ptr< Chunk_NS::Chunk >> &  chunks,
DeviceAllocator device_allocator,
const size_t  thread_idx,
const bool  allow_runtime_interrupt 
)
private

Definition at line 3642 of file Execute.cpp.

References buildSelectedFragsMappingForUnion(), CHECK, CHECK_EQ, CHECK_LE, CHECK_LT, checkIsQuerySessionInterrupted(), Data_Namespace::CPU_LEVEL, DEBUG_TIMER, executor_session_mutex_, anonymous_namespace{Execute.cpp}::get_selected_input_col_descs(), anonymous_namespace{Execute.cpp}::get_selected_input_col_descs_index(), anonymous_namespace{Execute.cpp}::get_selected_input_descs_index(), ColumnFetcher::getAllTableColumnFragments(), getCurrentQuerySession(), ColumnFetcher::getOneTableColumnFragment(), ColumnFetcher::getResultSetColumn(), getRowCountAndOffsetForAllFrags(), INJECT_TIMER, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, needFetchAllFragments(), plan_state_, shared::printContainer(), RESULT, anonymous_namespace{Execute.cpp}::set_mod_range(), anonymous_namespace{Execute.cpp}::try_get_column_descriptor(), and VLOG.

3653  {
3654  auto timer = DEBUG_TIMER(__func__);
3656 
3657  CHECK_EQ(1u, selected_fragments.size());
3658  CHECK_LE(2u, ra_exe_unit.input_descs.size());
3659  CHECK_LE(2u, ra_exe_unit.input_col_descs.size());
3660  auto const& input_descs = ra_exe_unit.input_descs;
3661  const auto& selected_table_key = selected_fragments.front().table_key;
3662  size_t const input_descs_index =
3663  get_selected_input_descs_index(selected_table_key, input_descs);
3664  CHECK_LT(input_descs_index, input_descs.size());
3665  size_t const input_col_descs_index =
3666  get_selected_input_col_descs_index(selected_table_key, ra_exe_unit.input_col_descs);
3667  CHECK_LT(input_col_descs_index, ra_exe_unit.input_col_descs.size());
3668  VLOG(2) << "selected_table_key=" << selected_table_key
3669  << " input_descs_index=" << input_descs_index
3670  << " input_col_descs_index=" << input_col_descs_index
3671  << " input_descs=" << shared::printContainer(input_descs)
3672  << " ra_exe_unit.input_col_descs="
3673  << shared::printContainer(ra_exe_unit.input_col_descs);
3674 
3675  std::list<std::shared_ptr<const InputColDescriptor>> selected_input_col_descs =
3676  get_selected_input_col_descs(selected_table_key, ra_exe_unit.input_col_descs);
3677  std::vector<std::vector<size_t>> selected_fragments_crossjoin;
3678 
3680  selected_fragments_crossjoin, selected_fragments, ra_exe_unit);
3681 
3683  selected_fragments_crossjoin);
3684 
3685  if (allow_runtime_interrupt) {
3686  bool isInterrupted = false;
3687  {
3690  const auto query_session = getCurrentQuerySession(session_read_lock);
3691  isInterrupted = checkIsQuerySessionInterrupted(query_session, session_read_lock);
3692  }
3693  if (isInterrupted) {
3694  throw QueryExecutionError(ErrorCode::INTERRUPTED);
3695  }
3696  }
3697  std::vector<const int8_t*> frag_col_buffers(
3698  plan_state_->global_to_local_col_ids_.size());
3699  for (const auto& col_id : selected_input_col_descs) {
3700  CHECK(col_id);
3701  const auto cd = try_get_column_descriptor(col_id.get());
3702  if (cd && cd->isVirtualCol) {
3703  CHECK_EQ("rowid", cd->columnName);
3704  continue;
3705  }
3706  const auto fragments_it = all_tables_fragments.find(selected_table_key);
3707  CHECK(fragments_it != all_tables_fragments.end());
3708  const auto fragments = fragments_it->second;
3709  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3710  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3711  size_t const local_col_id = it->second;
3712  CHECK_LT(local_col_id, plan_state_->global_to_local_col_ids_.size());
3713  constexpr size_t frag_id = 0;
3714  if (fragments->empty()) {
3715  return {};
3716  }
3717  MemoryLevel const memory_level_for_column =
3718  plan_state_->isColumnToFetch({selected_table_key, col_id->getColId()})
3719  ? memory_level
3721  int8_t const* ptr;
3722  if (col_id->getScanDesc().getSourceType() == InputSourceType::RESULT) {
3723  ptr = column_fetcher.getResultSetColumn(
3724  col_id.get(), memory_level_for_column, device_id, device_allocator, thread_idx);
3725  } else if (needFetchAllFragments(*col_id, ra_exe_unit, selected_fragments)) {
3726  ptr = column_fetcher.getAllTableColumnFragments(selected_table_key,
3727  col_id->getColId(),
3728  all_tables_fragments,
3729  memory_level_for_column,
3730  device_id,
3731  device_allocator,
3732  thread_idx);
3733  } else {
3734  ptr = column_fetcher.getOneTableColumnFragment(selected_table_key,
3735  frag_id,
3736  col_id->getColId(),
3737  all_tables_fragments,
3738  chunks,
3739  chunk_iterators,
3740  memory_level_for_column,
3741  device_id,
3742  device_allocator);
3743  }
3744  // Set frag_col_buffers[i]=ptr for i in mod input_descs.size() range of local_col_id.
3745  set_mod_range(frag_col_buffers, ptr, local_col_id, input_descs.size());
3746  }
3747  auto const [num_rows, frag_offsets] = getRowCountAndOffsetForAllFrags(
3748  ra_exe_unit, frag_ids_crossjoin, input_descs, all_tables_fragments);
3749 
3750  VLOG(2) << "frag_col_buffers=" << shared::printContainer(frag_col_buffers)
3751  << " num_rows=" << shared::printContainer(num_rows)
3752  << " frag_offsets=" << shared::printContainer(frag_offsets)
3753  << " input_descs_index=" << input_descs_index
3754  << " input_col_descs_index=" << input_col_descs_index;
3755  return {{std::move(frag_col_buffers)},
3756  {{num_rows[0][input_descs_index]}},
3757  {{frag_offsets[0][input_descs_index]}}};
3758 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
bool checkIsQuerySessionInterrupted(const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5254
void set_mod_range(std::vector< int8_t const * > &frag_col_buffers, int8_t const *const ptr, size_t const local_col_id, size_t const N)
Definition: Execute.cpp:3627
std::vector< InputDescriptor > input_descs
const int8_t * getResultSetColumn(const InputColDescriptor *col_desc, const Data_Namespace::MemoryLevel memory_level, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx) const
FetchResult fetchUnionChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Definition: Execute.cpp:3642
const int8_t * getOneTableColumnFragment(const shared::TableKey &table_key, const int frag_id, const int col_id, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments, std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunk_holder, std::list< ChunkIter > &chunk_iter_holder, const Data_Namespace::MemoryLevel memory_level, const int device_id, DeviceAllocator *device_allocator) const
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:3416
const int8_t * getAllTableColumnFragments(const shared::TableKey &table_key, const int col_id, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments, const Data_Namespace::MemoryLevel memory_level, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx) const
std::shared_lock< T > shared_lock
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:3367
#define INJECT_TIMER(DESC)
Definition: measure.h:122
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
size_t get_selected_input_descs_index(const shared::TableKey &table_key, std::vector< InputDescriptor > const &input_descs)
Definition: Execute.cpp:3594
size_t get_selected_input_col_descs_index(const shared::TableKey &table_key, std::list< std::shared_ptr< InputColDescriptor const >> const &input_col_descs)
Definition: Execute.cpp:3603
#define CHECK_LT(x, y)
Definition: Logger.h:303
QuerySessionId & getCurrentQuerySession(heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4986
#define CHECK_LE(x, y)
Definition: Logger.h:304
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
void buildSelectedFragsMappingForUnion(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3805
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
const ColumnDescriptor * try_get_column_descriptor(const InputColDescriptor *col_desc)
Definition: Execute.cpp:3340
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:108
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::list< std::shared_ptr< const InputColDescriptor > > get_selected_input_col_descs(const shared::TableKey &table_key, std::list< std::shared_ptr< InputColDescriptor const >> const &input_col_descs)
Definition: Execute.cpp:3614
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

std::string Executor::generatePTX ( const std::string &  cuda_llir) const
private

Definition at line 1540 of file NativeCodegen.cpp.

1540  {
1542  cuda_llir, nvptx_target_machine_.get(), cgen_state_->context_);
1543 }
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:1547
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
static std::string generatePTX(const std::string &cuda_llir, llvm::TargetMachine *nvptx_target_machine, llvm::LLVMContext &context)
const ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy Executor::get_concurrent_resource_grant_policy ( const ExecutorResourceMgr_Namespace::ResourceType  resource_type)
static

Definition at line 5467 of file Execute.cpp.

References executor_resource_mgr_, and g_enable_executor_resource_mgr.

5468  {
5470  throw std::runtime_error(
5471  "ExecutorResourceMgr must be enabled to set executor concurrent resource grant "
5472  "policy.");
5473  }
5474  return executor_resource_mgr_->get_concurrent_resource_grant_policy(resource_type);
5475 }
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178
ExecutorResourceMgr_Namespace::ResourcePoolInfo Executor::get_executor_resource_pool_info ( )
static

Definition at line 5448 of file Execute.cpp.

References executor_resource_mgr_, and g_enable_executor_resource_mgr.

Referenced by foreign_storage::InternalExecutorStatsDataWrapper::initializeObjectsForTable().

5448  {
5450  throw std::runtime_error(
5451  "ExecutorResourceMgr must be enabled to obtain executor resource pool stats.");
5452  }
5453  return executor_resource_mgr_->get_resource_info();
5454 }
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178

+ Here is the caller graph for this function:

size_t Executor::get_executor_resource_pool_total_resource_quantity ( const ExecutorResourceMgr_Namespace::ResourceType  resource_type)
static

Definition at line 5438 of file Execute.cpp.

References executor_resource_mgr_, and g_enable_executor_resource_mgr.

5439  {
5441  throw std::runtime_error(
5442  "ExecutorResourceMgr must be enabled to obtain executor resource pool stats.");
5443  }
5444  return executor_resource_mgr_->get_resource_info(resource_type).second;
5445 }
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178
const std::unique_ptr<llvm::Module>& Executor::get_extension_module ( ExtModuleKinds  kind) const
inlineprivate

Definition at line 1504 of file Execute.h.

References extension_modules_.

Referenced by get_geos_module(), get_libdevice_module(), get_rt_module(), get_rt_udf_module(), and get_udf_module().

1504  {
1505  auto it = extension_modules_.find(kind);
1506  if (it != extension_modules_.end()) {
1507  return it->second;
1508  }
1509  static const std::unique_ptr<llvm::Module> empty;
1510  return empty;
1511  }
std::map< ExtModuleKinds, std::unique_ptr< llvm::Module > > extension_modules_
Definition: Execute.h:1517

+ Here is the caller graph for this function:

const std::unique_ptr<llvm::Module>& Executor::get_geos_module ( ) const
inline

Definition at line 545 of file Execute.h.

References get_extension_module(), and rt_geos_module.

545  {
547  }
const std::unique_ptr< llvm::Module > & get_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1504

+ Here is the call graph for this function:

const std::unique_ptr<llvm::Module>& Executor::get_libdevice_module ( ) const
inline

Definition at line 548 of file Execute.h.

References get_extension_module(), and rt_libdevice_module.

548  {
550  }
const std::unique_ptr< llvm::Module > & get_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1504

+ Here is the call graph for this function:

const std::unique_ptr<llvm::Module>& Executor::get_rt_module ( ) const
inline

Definition at line 532 of file Execute.h.

References get_extension_module(), and template_module.

532  {
534  }
const std::unique_ptr< llvm::Module > & get_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1504

+ Here is the call graph for this function:

const std::unique_ptr<llvm::Module>& Executor::get_rt_udf_module ( bool  is_gpu = false) const
inline

Definition at line 539 of file Execute.h.

References get_extension_module(), register_runtime_extension_functions_mutex_, rt_udf_cpu_module, and rt_udf_gpu_module.

539  {
540  std::lock_guard<std::mutex> lock(
542  return get_extension_module(
544  }
const std::unique_ptr< llvm::Module > & get_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1504
static std::mutex register_runtime_extension_functions_mutex_
Definition: Execute.h:1623

+ Here is the call graph for this function:

const std::unique_ptr<llvm::Module>& Executor::get_udf_module ( bool  is_gpu = false) const
inline

Definition at line 535 of file Execute.h.

References get_extension_module(), udf_cpu_module, and udf_gpu_module.

535  {
536  return get_extension_module(
538  }
const std::unique_ptr< llvm::Module > & get_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1504

+ Here is the call graph for this function:

size_t Executor::getArenaBlockSize ( )
static

Definition at line 562 of file Execute.cpp.

References g_is_test_env, and kArenaBlockOverhead.

Referenced by ResultSetLogicalValuesBuilder::create(), RelAlgExecutor::prepareLeafExecution(), and setupCaching().

562  {
563  return g_is_test_env ? 100000000 : (1UL << 32) + kArenaBlockOverhead;
564 }
constexpr size_t kArenaBlockOverhead
bool g_is_test_env
Definition: Execute.cpp:153

+ Here is the caller graph for this function:

static size_t Executor::getBaselineThreshold ( bool  for_count_distinct,
ExecutorDeviceType  device_type 
)
inlinestatic

Definition at line 1448 of file Execute.h.

References baseline_threshold, and GPU.

Referenced by GroupByAndAggregate::getColRangeInfo().

1449  {
1450  return for_count_distinct ? (device_type == ExecutorDeviceType::GPU
1454  }
static const size_t baseline_threshold
Definition: Execute.h:1549
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:276

+ Here is the caller graph for this function:

Executor::CachedCardinality Executor::getCachedCardinality ( const CardinalityCacheKey cache_key)

Definition at line 5298 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, recycler_mutex_, and VLOG.

5299  {
5302  cardinality_cache_.find(cache_key) != cardinality_cache_.end()) {
5303  VLOG(1) << "Reuse cached cardinality";
5304  return {true, cardinality_cache_[cache_key]};
5305  }
5306  return {false, -1};
5307 }
std::shared_lock< T > shared_lock
static std::unordered_map< CardinalityCacheKey, size_t > cardinality_cache_
Definition: Execute.h:1607
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
bool g_use_estimator_result_cache
Definition: Execute.cpp:139
#define VLOG(n)
Definition: Logger.h:388
CgenState* Executor::getCgenStatePtr ( ) const
inline

Definition at line 1414 of file Execute.h.

References cgen_state_.

1414 { return cgen_state_.get(); }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
ExecutorResourceMgr_Namespace::ChunkRequestInfo Executor::getChunkRequestInfo ( const ExecutorDeviceType  device_type,
const std::vector< InputDescriptor > &  input_descs,
const std::vector< InputTableInfo > &  query_infos,
const std::vector< std::pair< int32_t, FragmentsList >> &  device_fragment_lists 
) const

Determines a unique list of chunks and their associated byte sizes for a given query plan.

Called by Executor::launchKernelsViaResourceMgr

Note that we currently need the kernel's fragment lists generated in Executor::createKernels (which calls QueryFragmentDescriptor::buildFragmentKernelMap), but would be nice to hoist that logic out so that we could call this earlier, i.e. before compilation such that we don't waste compilation cycles in an attempt to run a query on GPU, only to see there are insufficient resources for it and it must be kicked to CPU

Note this method currently has two key limitations:

  1. Only accounts for chunks in the lhs table if a join is involved.
  2. Conservatively estimates that column widths for intermediate results are always 8 bytes, when in some cases they may have a lower byte width.
Parameters
device_type- specifies whether the query needs CPU or GPU buffer pool memory
input_descs- tables needed by the query
query_infos
kernel_fragment_lists
Returns
ExecutorResourceMgr_Namespace::ChunkRequestInfo - contains various info used by ExecutorResourceMgr to gate (and soon optimize scheduling of) query step resource requests.

Definition at line 877 of file Execute.cpp.

References gpu_enabled::accumulate(), CPU, and getColumnByteWidthMap().

Referenced by launchKernelsViaResourceMgr().

881  {
882  using TableFragmentId = std::pair<shared::TableKey, int32_t>;
883  using TableFragmentSizeMap = std::map<TableFragmentId, size_t>;
884 
885  /* Calculate bytes per column */
886 
887  // Only fetch lhs table ids for now...
888  // Allows us to cleanly lower number of kernels in flight to save
889  // buffer pool space, but is not a perfect estimate when big rhs
890  // join tables are involved. Will revisit.
891 
892  std::set<shared::TableKey> lhs_table_keys;
893  for (const auto& input_desc : input_descs) {
894  if (input_desc.getNestLevel() == 0) {
895  lhs_table_keys.insert(input_desc.getTableKey());
896  }
897  }
898 
899  const bool include_lazy_fetch_cols = device_type == ExecutorDeviceType::CPU;
900  const auto column_byte_width_map =
901  getColumnByteWidthMap(lhs_table_keys, include_lazy_fetch_cols);
902 
903  /* Calculate the byte width per row (sum of all columns widths)
904  Assumes each fragment touches the same columns, which is a DB-wide
905  invariant for now */
906 
907  size_t const byte_width_per_row =
908  std::accumulate(column_byte_width_map.begin(),
909  column_byte_width_map.end(),
910  size_t(0),
911  [](size_t sum, auto& col_entry) { return sum + col_entry.second; });
912 
913  /* Calculate num tuples for all fragments */
914 
915  TableFragmentSizeMap all_table_fragments_size_map;
916 
917  for (auto& query_info : query_infos) {
918  const auto& table_key = query_info.table_key;
919  for (const auto& frag : query_info.info.fragments) {
920  const int32_t frag_id = frag.fragmentId;
921  const TableFragmentId table_frag_id = std::make_pair(table_key, frag_id);
922  const size_t fragment_num_tuples = frag.getNumTuples(); // num_tuples;
923  all_table_fragments_size_map.insert(
924  std::make_pair(table_frag_id, fragment_num_tuples));
925  }
926  }
927 
928  /* Calculate num tuples only for fragments actually touched by query
929  Also calculate the num bytes needed for each kernel */
930 
931  TableFragmentSizeMap query_table_fragments_size_map;
932  std::vector<size_t> bytes_per_kernel;
933  bytes_per_kernel.reserve(kernel_fragment_lists.size());
934 
935  size_t max_kernel_bytes{0};
936 
937  for (auto& kernel_frag_list : kernel_fragment_lists) {
938  size_t kernel_bytes{0};
939  const auto frag_list = kernel_frag_list.second;
940  for (const auto& table_frags : frag_list) {
941  const auto& table_key = table_frags.table_key;
942  for (const size_t frag_id : table_frags.fragment_ids) {
943  const TableFragmentId table_frag_id = std::make_pair(table_key, frag_id);
944  const size_t fragment_num_tuples = all_table_fragments_size_map[table_frag_id];
945  kernel_bytes += fragment_num_tuples * byte_width_per_row;
946  query_table_fragments_size_map.insert(
947  std::make_pair(table_frag_id, fragment_num_tuples));
948  }
949  }
950  bytes_per_kernel.emplace_back(kernel_bytes);
951  if (kernel_bytes > max_kernel_bytes) {
952  max_kernel_bytes = kernel_bytes;
953  }
954  }
955 
956  /* Calculate bytes per chunk touched by the query */
957 
958  std::map<ChunkKey, size_t> all_chunks_byte_sizes_map;
959  constexpr int32_t subkey_min = std::numeric_limits<int32_t>::min();
960 
961  for (const auto& col_byte_width_entry : column_byte_width_map) {
962  // Build a chunk key prefix of (db_id, table_id, column_id)
963  const int32_t db_id = col_byte_width_entry.first.db_id;
964  const int32_t table_id = col_byte_width_entry.first.table_id;
965  const int32_t col_id = col_byte_width_entry.first.column_id;
966  const size_t col_byte_width = col_byte_width_entry.second;
967  const shared::TableKey table_key(db_id, table_id);
968 
969  const auto frag_start =
970  query_table_fragments_size_map.lower_bound({table_key, subkey_min});
971  for (auto frag_itr = frag_start; frag_itr != query_table_fragments_size_map.end() &&
972  frag_itr->first.first == table_key;
973  frag_itr++) {
974  const ChunkKey chunk_key = {db_id, table_id, col_id, frag_itr->first.second};
975  const size_t chunk_byte_size = col_byte_width * frag_itr->second;
976  all_chunks_byte_sizes_map.insert({chunk_key, chunk_byte_size});
977  }
978  }
979 
980  size_t total_chunk_bytes{0};
981  const size_t num_chunks = all_chunks_byte_sizes_map.size();
982  std::vector<std::pair<ChunkKey, size_t>> chunks_with_byte_sizes;
983  chunks_with_byte_sizes.reserve(num_chunks);
984  for (const auto& chunk_byte_size_entry : all_chunks_byte_sizes_map) {
985  chunks_with_byte_sizes.emplace_back(
986  std::make_pair(chunk_byte_size_entry.first, chunk_byte_size_entry.second));
987  // Add here, post mapping of the chunks, to make sure chunks are deduped and we get an
988  // accurate size estimate
989  total_chunk_bytes += chunk_byte_size_entry.second;
990  }
991  // Don't allow scaling of bytes per kernel launches for GPU yet as we're not set up for
992  // this at this point
993  const bool bytes_scales_per_kernel = device_type == ExecutorDeviceType::CPU;
994 
995  // Return ChunkRequestInfo
996 
997  return {device_type,
998  chunks_with_byte_sizes,
999  num_chunks,
1000  total_chunk_bytes,
1001  bytes_per_kernel,
1002  max_kernel_bytes,
1003  bytes_scales_per_kernel};
1004 }
std::vector< int > ChunkKey
Definition: types.h:36
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
std::map< shared::ColumnKey, size_t > getColumnByteWidthMap(const std::set< shared::TableKey > &table_ids_to_fetch, const bool include_lazy_fetched_cols) const
Definition: Execute.cpp:819

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< ColumnLazyFetchInfo > Executor::getColLazyFetchInfo ( const std::vector< Analyzer::Expr * > &  target_exprs) const

Definition at line 1017 of file Execute.cpp.

References CHECK, get_column_descriptor(), get_column_descriptor_maybe(), IS_GEO, kNULLT, and plan_state_.

Referenced by createKernels().

1018  {
1019  CHECK(plan_state_);
1020  std::vector<ColumnLazyFetchInfo> col_lazy_fetch_info;
1021  for (const auto target_expr : target_exprs) {
1022  if (!plan_state_->isLazyFetchColumn(target_expr)) {
1023  col_lazy_fetch_info.emplace_back(
1024  ColumnLazyFetchInfo{false, -1, SQLTypeInfo(kNULLT, false)});
1025  } else {
1026  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1027  CHECK(col_var);
1028  auto rte_idx = (col_var->get_rte_idx() == -1) ? 0 : col_var->get_rte_idx();
1029  const auto cd = get_column_descriptor_maybe(col_var->getColumnKey());
1030  if (cd && IS_GEO(cd->columnType.get_type())) {
1031  // Geo coords cols will be processed in sequence. So we only need to track the
1032  // first coords col in lazy fetch info.
1033  {
1034  auto col_key = col_var->getColumnKey();
1035  col_key.column_id += 1;
1036  const auto cd0 = get_column_descriptor(col_key);
1037  const auto col0_ti = cd0->columnType;
1038  CHECK(!cd0->isVirtualCol);
1039  const auto col0_var = makeExpr<Analyzer::ColumnVar>(col0_ti, col_key, rte_idx);
1040  const auto local_col0_id = plan_state_->getLocalColumnId(col0_var.get(), false);
1041  col_lazy_fetch_info.emplace_back(
1042  ColumnLazyFetchInfo{true, local_col0_id, col0_ti});
1043  }
1044  } else {
1045  auto local_col_id = plan_state_->getLocalColumnId(col_var, false);
1046  const auto& col_ti = col_var->get_type_info();
1047  col_lazy_fetch_info.emplace_back(ColumnLazyFetchInfo{true, local_col_id, col_ti});
1048  }
1049  }
1050  }
1051  return col_lazy_fetch_info;
1052 }
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:241
const ColumnDescriptor * get_column_descriptor(const shared::ColumnKey &column_key)
Definition: Execute.h:213
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define CHECK(condition)
Definition: Logger.h:291
#define IS_GEO(T)
Definition: sqltypes.h:310

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ExpressionRange Executor::getColRange ( const PhysicalInput phys_input) const

Definition at line 746 of file Execute.cpp.

References agg_col_range_cache_, and AggregatedColRange::getColRange().

746  {
747  return agg_col_range_cache_.getColRange(phys_input);
748 }
ExpressionRange getColRange(const PhysicalInput &) const
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1572

+ Here is the call graph for this function:

std::map< shared::ColumnKey, size_t > Executor::getColumnByteWidthMap ( const std::set< shared::TableKey > &  table_ids_to_fetch,
const bool  include_lazy_fetched_cols 
) const

Definition at line 819 of file Execute.cpp.

References CHECK, anonymous_namespace{Execute.cpp}::get_col_byte_width(), and plan_state_.

Referenced by getChunkRequestInfo().

821  {
822  std::map<shared::ColumnKey, size_t> col_byte_width_map;
823 
824  for (const auto& fetched_col : plan_state_->getColumnsToFetch()) {
825  if (table_ids_to_fetch.count({fetched_col.db_id, fetched_col.table_id}) == 0) {
826  continue;
827  }
828  const size_t col_byte_width = get_col_byte_width(fetched_col);
829  CHECK(col_byte_width_map.insert({fetched_col, col_byte_width}).second);
830  }
831  if (include_lazy_fetched_cols) {
832  for (const auto& lazy_fetched_col : plan_state_->getColumnsToNotFetch()) {
833  if (table_ids_to_fetch.count({lazy_fetched_col.db_id, lazy_fetched_col.table_id}) ==
834  0) {
835  continue;
836  }
837  const size_t col_byte_width = get_col_byte_width(lazy_fetched_col);
838  CHECK(col_byte_width_map.insert({lazy_fetched_col, col_byte_width}).second);
839  }
840  }
841  return col_byte_width_map;
842 }
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
size_t get_col_byte_width(const shared::ColumnKey &column_key)
Definition: Execute.cpp:791
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColumnDescriptor * Executor::getColumnDescriptor ( const Analyzer::ColumnVar col_var) const

Definition at line 711 of file Execute.cpp.

References get_column_descriptor_maybe(), and Analyzer::ColumnVar::getColumnKey().

Referenced by getPhysicalColumnDescriptor().

712  {
713  return get_column_descriptor_maybe(col_var->getColumnKey());
714 }
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:241
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::LLVMContext& Executor::getContext ( )
inline

Definition at line 1417 of file Execute.h.

References context_.

1417 { return *context_.get(); }
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
QuerySessionId & Executor::getCurrentQuerySession ( heavyai::shared_lock< heavyai::shared_mutex > &  read_lock)

Definition at line 4986 of file Execute.cpp.

References current_query_session_.

Referenced by executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeWorkUnitImpl(), fetchChunks(), and fetchUnionChunks().

4987  {
4988  return current_query_session_;
4989 }
QuerySessionId current_query_session_
Definition: Execute.h:1576

+ Here is the caller graph for this function:

Data_Namespace::DataMgr* Executor::getDataMgr ( ) const
inline

Definition at line 623 of file Execute.h.

References CHECK, and data_mgr_.

Referenced by getDeviceTypeForTargets(), logSystemCPUMemoryStatus(), and logSystemGPUMemoryStatus().

623  {
624  CHECK(data_mgr_);
625  return data_mgr_;
626  }
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

heavyai::shared_mutex & Executor::getDataRecyclerLock ( )

Definition at line 4970 of file Execute.cpp.

References recycler_mutex_.

4970  {
4971  return recycler_mutex_;
4972 }
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
ExecutorDeviceType Executor::getDeviceTypeForTargets ( const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  requested_device_type 
)
private

Definition at line 2575 of file Execute.cpp.

References CPU, g_bigint_count, get_target_info(), getDataMgr(), RelAlgExecutionUnit::groupby_exprs, isArchPascalOrLater(), kAVG, kDOUBLE, kSUM, kSUM_IF, and RelAlgExecutionUnit::target_exprs.

Referenced by executeWorkUnitImpl().

2577  {
2578  if (!getDataMgr()->gpusPresent()) {
2579  return ExecutorDeviceType::CPU;
2580  }
2581  for (const auto target_expr : ra_exe_unit.target_exprs) {
2582  const auto agg_info = get_target_info(target_expr, g_bigint_count);
2583  if (!ra_exe_unit.groupby_exprs.empty() &&
2584  !isArchPascalOrLater(requested_device_type)) {
2585  if ((agg_info.agg_kind == kAVG || agg_info.agg_kind == kSUM ||
2586  agg_info.agg_kind == kSUM_IF) &&
2587  agg_info.agg_arg_type.get_type() == kDOUBLE) {
2588  return ExecutorDeviceType::CPU;
2589  }
2590  }
2591  if (dynamic_cast<const Analyzer::RegexpExpr*>(target_expr)) {
2592  return ExecutorDeviceType::CPU;
2593  }
2594  }
2595  return requested_device_type;
2596 }
std::vector< Analyzer::Expr * > target_exprs
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:872
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
bool g_bigint_count
Definition: sqldefs.h:80
Data_Namespace::DataMgr * getDataMgr() const
Definition: Execute.h:623
Definition: sqldefs.h:77

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< Executor > Executor::getExecutor ( const ExecutorId  id,
const std::string &  debug_dir = "",
const std::string &  debug_file = "",
const SystemParameters system_parameters = SystemParameters() 
)
static

Definition at line 513 of file Execute.cpp.

References CHECK, SystemParameters::cuda_block_size, SystemParameters::cuda_grid_size, executors_, executors_cache_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Catalog_Namespace::SysCatalog::instance(), and SystemParameters::max_gpu_slab_size.

Referenced by ResultSetReductionJIT::codegen(), GpuReductionHelperJIT::codegen(), ColumnarResults::ColumnarResults(), Parser::OptimizeTableStmt::execute(), Parser::CopyTableStmt::execute(), Parser::InsertValuesStmt::execute(), DBHandler::execute_rel_alg(), QueryRunner::QueryRunner::extractQueryPlanDag(), StubGenerator::generateStub(), DBHandler::get_queries_info(), QueryRunner::QueryRunner::getCalcitePlan(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), QueryRunner::QueryRunner::getExecutor(), CgenState::getExecutor(), Parser::LocalQueryConnector::getOuterFragmentCount(), QueryRunner::QueryRunner::getParsedGlobalQueryHints(), QueryRunner::QueryRunner::getParsedQueryHint(), QueryRunner::QueryRunner::getParsedQueryHints(), DBHandler::getQueries(), QueryRunner::QueryRunner::getQueryInfoForDataRecyclerTest(), QueryRunner::QueryRunner::getRaExecutionSequence(), QueryRunner::QueryRunner::getRelAlgDag(), QueryRunner::QueryRunner::getRootNodeFromParsedQuery(), DBHandler::import_table(), import_export::Importer::importDelimited(), import_export::Importer::importGDALGeo(), import_export::Importer::importGDALRaster(), DBHandler::importGeoTableSingle(), DBHandler::interrupt(), DBHandler::interruptQuery(), DBHandler::invalidate_cur_session(), anonymous_namespace{DBHandler.cpp}::log_cache_size(), migrations::MigrationMgr::migrateDateInDaysMetadata(), Parser::InsertIntoTableAsSelectStmt::populateData(), Parser::LocalQueryConnector::query(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), QueryRunner::anonymous_namespace{QueryRunner.cpp}::run_select_query_with_filter_push_down(), QueryRunner::QueryRunner::runSelectQuery(), QueryRunner::QueryRunner::runSQLWithAllowingInterrupt(), DBHandler::set_cur_session(), DBHandler::sql_execute_impl(), and anonymous_namespace{DdlCommandExecutor.cpp}::vacuum_table_if_required().

517  {
519  auto it = executors_.find(executor_id);
520  if (it != executors_.end()) {
521  return it->second;
522  }
524  auto executor = std::make_shared<Executor>(executor_id,
525  &data_mgr,
526  system_parameters.cuda_block_size,
527  system_parameters.cuda_grid_size,
528  system_parameters.max_gpu_slab_size,
529  debug_dir,
530  debug_file);
531  CHECK(executors_.insert(std::make_pair(executor_id, executor)).second);
532  return executor;
533 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:234
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::unique_lock< T > unique_lock
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1581
#define CHECK(condition)
Definition: Logger.h:291
static heavyai::shared_mutex executors_cache_mutex_
Definition: Execute.h:1602

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ExecutorId Executor::getExecutorId ( ) const
inline

Definition at line 1332 of file Execute.h.

References executor_id_.

Referenced by launchKernelsViaResourceMgr().

1332 { return executor_id_; };
const ExecutorId executor_id_
Definition: Execute.h:1476

+ Here is the caller graph for this function:

const std::vector< size_t > Executor::getExecutorIdsRunningQuery ( const QuerySessionId interrupt_session) const

Definition at line 5347 of file Execute.cpp.

References executor_session_mutex_, queries_session_map_, and run_benchmark_import::res.

5348  {
5349  std::vector<size_t> res;
5351  auto it = queries_session_map_.find(interrupt_session);
5352  if (it != queries_session_map_.end()) {
5353  for (auto& kv : it->second) {
5354  if (kv.second.getQueryStatus() ==
5355  QuerySessionStatus::QueryStatus::RUNNING_QUERY_KERNEL) {
5356  res.push_back(kv.second.getExecutorId());
5357  }
5358  }
5359  }
5360  return res;
5361 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
std::shared_lock< T > shared_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
const SQLTypeInfo Executor::getFirstOrderColTypeInfo ( WindowFunctionContext window_func_context) const
private

Definition at line 732 of file WindowFunctionIR.cpp.

References Analyzer::WindowFunction::getOrderKeys(), and WindowFunctionContext::getWindowFunction().

733  {
734  const auto window_func = window_func_context->getWindowFunction();
735  return window_func->getOrderKeys().front()->get_type_info();
736 }
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2933
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

std::vector< size_t > Executor::getFragmentCount ( const FragmentsList selected_fragments,
const size_t  scan_idx,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3760 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs, RelAlgExecutionUnit::join_quals, and plan_state_.

Referenced by buildSelectedFragsMapping().

3762  {
3763  if ((ra_exe_unit.input_descs.size() > size_t(2) || !ra_exe_unit.join_quals.empty()) &&
3764  scan_idx > 0 &&
3765  !plan_state_->join_info_.sharded_range_table_indices_.count(scan_idx) &&
3766  !selected_fragments[scan_idx].fragment_ids.empty()) {
3767  // Fetch all fragments
3768  return {size_t(0)};
3769  }
3770 
3771  return selected_fragments[scan_idx].fragment_ids;
3772 }
std::vector< InputDescriptor > input_descs
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532

+ Here is the caller graph for this function:

std::string Executor::getFramingFuncName ( const std::string &  bound_type,
const std::string &  order_col_type,
const std::string &  op_type,
bool  for_timestamp_type 
) const
private

Definition at line 847 of file WindowFunctionIR.cpp.

850  {
851  auto target_val_type = for_timestamp_type ? "int64_t" : order_col_type;
852  auto null_type = for_timestamp_type ? "int64_t" : order_col_type;
853  return "range_mode_" + target_val_type + "_" + order_col_type + "_" + null_type + "_" +
854  op_type + "_frame_" + bound_type + "_bound";
855 }
std::unordered_map< shared::TableKey, const Analyzer::BinOper * > Executor::getInnerTabIdToJoinCond ( ) const
private

Definition at line 2882 of file Execute.cpp.

References CHECK_EQ, and plan_state_.

2882  {
2883  std::unordered_map<shared::TableKey, const Analyzer::BinOper*> id_to_cond;
2884  const auto& join_info = plan_state_->join_info_;
2885  CHECK_EQ(join_info.equi_join_tautologies_.size(), join_info.join_hash_tables_.size());
2886  for (size_t i = 0; i < join_info.join_hash_tables_.size(); ++i) {
2887  const auto& inner_table_key = join_info.join_hash_tables_[i]->getInnerTableId();
2888  id_to_cond.insert(
2889  std::make_pair(inner_table_key, join_info.equi_join_tautologies_[i].get()));
2890  }
2891  return id_to_cond;
2892 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
std::vector< int8_t * > Executor::getJoinHashTablePtrs ( const ExecutorDeviceType  device_type,
const int  device_id 
)
private

Definition at line 4253 of file Execute.cpp.

References CHECK, GPU, and plan_state_.

Referenced by executePlanWithGroupBy(), and executePlanWithoutGroupBy().

4254  {
4255  std::vector<int8_t*> table_ptrs;
4256  const auto& join_hash_tables = plan_state_->join_info_.join_hash_tables_;
4257  for (auto hash_table : join_hash_tables) {
4258  if (!hash_table) {
4259  CHECK(table_ptrs.empty());
4260  return {};
4261  }
4262  table_ptrs.push_back(hash_table->getJoinHashBuffer(
4263  device_type, device_type == ExecutorDeviceType::GPU ? device_id : 0));
4264  }
4265  return table_ptrs;
4266 }
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

const StringDictionaryProxy::IdMap * Executor::getJoinIntersectionStringProxyTranslationMap ( const StringDictionaryProxy source_proxy,
StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  source_string_op_infos,
const std::vector< StringOps_Namespace::StringOpInfo > &  dest_source_string_op_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
) const

Definition at line 621 of file Execute.cpp.

References CHECK, and str_dict_mutex_.

626  {
627  CHECK(row_set_mem_owner);
628  std::lock_guard<std::mutex> lock(
629  str_dict_mutex_); // TODO: can we use RowSetMemOwner state mutex here?
630  // First translate lhs onto itself if there are string ops
631  if (!dest_string_op_infos.empty()) {
632  row_set_mem_owner->addStringProxyUnionTranslationMap(
633  dest_proxy, dest_proxy, dest_string_op_infos);
634  }
635  return row_set_mem_owner->addStringProxyIntersectionTranslationMap(
636  source_proxy, dest_proxy, source_string_op_infos);
637 }
std::mutex str_dict_mutex_
Definition: Execute.h:1545
#define CHECK(condition)
Definition: Logger.h:291
const QueryPlanDAG Executor::getLatestQueryPlanDagExtracted ( ) const

Definition at line 5382 of file Execute.cpp.

References latest_query_plan_extracted_, and recycler_mutex_.

5382  {
5385 }
std::shared_lock< T > shared_lock
static QueryPlanDAG latest_query_plan_extracted_
Definition: Execute.h:1612
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
size_t Executor::getNumBytesForFetchedRow ( const std::set< shared::TableKey > &  table_keys_to_fetch) const
size_t Executor::getNumBytesForFetchedRow ( const std::set< int > &  table_ids_to_fetch) const
size_t Executor::getNumCurentSessionsEnrolled ( ) const

Definition at line 5115 of file Execute.cpp.

References executor_session_mutex_, and queries_session_map_.

5115  {
5117  return queries_session_map_.size();
5118 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
std::shared_lock< T > shared_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
size_t Executor::getOrderKeySize ( WindowFunctionContext window_func_context) const
private

Definition at line 738 of file WindowFunctionIR.cpp.

738  {
739  const auto order_key_size = getFirstOrderColTypeInfo(window_func_context).get_size();
740  return order_key_size;
741 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
const SQLTypeInfo getFirstOrderColTypeInfo(WindowFunctionContext *window_func_context) const
const std::string Executor::getOrderKeyTypeName ( WindowFunctionContext window_func_context) const
private

Definition at line 743 of file WindowFunctionIR.cpp.

References CHECK, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), Analyzer::WindowFunction::getOrderKeys(), and WindowFunctionContext::getWindowFunction().

744  {
745  auto const order_key_size = getOrderKeySize(window_func_context);
746  auto const order_key_ptr =
747  window_func_context->getWindowFunction()->getOrderKeys().front();
748  CHECK(order_key_ptr);
749  return get_col_type_name_by_size(order_key_size,
750  order_key_ptr->get_type_info().is_fp());
751 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2933
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const
size_t getOrderKeySize(WindowFunctionContext *window_func_context) const

+ Here is the call graph for this function:

const ColumnDescriptor * Executor::getPhysicalColumnDescriptor ( const Analyzer::ColumnVar col_var,
int  n 
) const

Definition at line 716 of file Execute.cpp.

References shared::ColumnKey::column_id, get_column_descriptor_maybe(), getColumnDescriptor(), Analyzer::ColumnVar::getColumnKey(), and anonymous_namespace{Utm.h}::n.

718  {
719  const auto cd = getColumnDescriptor(col_var);
720  if (!cd || n > cd->columnType.get_physical_cols()) {
721  return nullptr;
722  }
723  auto column_key = col_var->getColumnKey();
724  column_key.column_id += n;
725  return get_column_descriptor_maybe(column_key);
726 }
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:241
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
Definition: Execute.cpp:711
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
constexpr double n
Definition: Utm.h:38

+ Here is the call graph for this function:

PlanState* Executor::getPlanStatePtr ( ) const
inline

Definition at line 1415 of file Execute.h.

References plan_state_.

1415 { return plan_state_.get(); }
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
QueryPlanDagCache & Executor::getQueryPlanDagCache ( )

Definition at line 4974 of file Execute.cpp.

References query_plan_dag_cache_.

4974  {
4975  return query_plan_dag_cache_;
4976 }
static QueryPlanDagCache query_plan_dag_cache_
Definition: Execute.h:1604
std::vector< QuerySessionStatus > Executor::getQuerySessionInfo ( const QuerySessionId query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 5329 of file Execute.cpp.

References queries_session_map_.

5331  {
5332  if (!queries_session_map_.empty() && queries_session_map_.count(query_session)) {
5333  auto& query_infos = queries_session_map_.at(query_session);
5334  std::vector<QuerySessionStatus> ret;
5335  for (auto& info : query_infos) {
5336  ret.emplace_back(query_session,
5337  info.second.getExecutorId(),
5338  info.second.getQueryStr(),
5339  info.second.getQuerySubmittedTime(),
5340  info.second.getQueryStatus());
5341  }
5342  return ret;
5343  }
5344  return {};
5345 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
QuerySessionStatus::QueryStatus Executor::getQuerySessionStatus ( const QuerySessionId candidate_query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 5001 of file Execute.cpp.

References queries_session_map_.

5003  {
5004  if (queries_session_map_.count(candidate_query_session) &&
5005  !queries_session_map_.at(candidate_query_session).empty()) {
5006  return queries_session_map_.at(candidate_query_session)
5007  .begin()
5008  ->second.getQueryStatus();
5009  }
5010  return QuerySessionStatus::QueryStatus::UNDEFINED;
5011 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
ResultSetRecyclerHolder & Executor::getResultSetRecyclerHolder ( )

Definition at line 4978 of file Execute.cpp.

References resultset_recycler_holder_.

4978  {
4980 }
static ResultSetRecyclerHolder resultset_recycler_holder_
Definition: Execute.h:1608
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > Executor::getRowCountAndOffsetForAllFrags ( const RelAlgExecutionUnit ra_exe_unit,
const CartesianProduct< std::vector< std::vector< size_t >>> &  frag_ids_crossjoin,
const std::vector< InputDescriptor > &  input_descs,
const std::map< shared::TableKey, const TableFragments * > &  all_tables_fragments 
)
private

Definition at line 3367 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, get_table_id_to_frag_offsets(), RelAlgExecutionUnit::join_quals, plan_state_, and RelAlgExecutionUnit::union_all.

Referenced by fetchChunks(), and fetchUnionChunks().

3371  {
3372  std::vector<std::vector<int64_t>> all_num_rows;
3373  std::vector<std::vector<uint64_t>> all_frag_offsets;
3374  const auto tab_id_to_frag_offsets =
3375  get_table_id_to_frag_offsets(input_descs, all_tables_fragments);
3376  std::unordered_map<size_t, size_t> outer_id_to_num_row_idx;
3377  for (const auto& selected_frag_ids : frag_ids_crossjoin) {
3378  std::vector<int64_t> num_rows;
3379  std::vector<uint64_t> frag_offsets;
3380  if (!ra_exe_unit.union_all) {
3381  CHECK_EQ(selected_frag_ids.size(), input_descs.size());
3382  }
3383  for (size_t tab_idx = 0; tab_idx < input_descs.size(); ++tab_idx) {
3384  const auto frag_id = ra_exe_unit.union_all ? 0 : selected_frag_ids[tab_idx];
3385  const auto fragments_it =
3386  all_tables_fragments.find(input_descs[tab_idx].getTableKey());
3387  CHECK(fragments_it != all_tables_fragments.end());
3388  const auto& fragments = *fragments_it->second;
3389  if (ra_exe_unit.join_quals.empty() || tab_idx == 0 ||
3390  plan_state_->join_info_.sharded_range_table_indices_.count(tab_idx)) {
3391  const auto& fragment = fragments[frag_id];
3392  num_rows.push_back(fragment.getNumTuples());
3393  } else {
3394  size_t total_row_count{0};
3395  for (const auto& fragment : fragments) {
3396  total_row_count += fragment.getNumTuples();
3397  }
3398  num_rows.push_back(total_row_count);
3399  }
3400  const auto frag_offsets_it =
3401  tab_id_to_frag_offsets.find(input_descs[tab_idx].getTableKey());
3402  CHECK(frag_offsets_it != tab_id_to_frag_offsets.end());
3403  const auto& offsets = frag_offsets_it->second;
3404  CHECK_LT(frag_id, offsets.size());
3405  frag_offsets.push_back(offsets[frag_id]);
3406  }
3407  all_num_rows.push_back(num_rows);
3408  // Fragment offsets of outer table should be ONLY used by rowid for now.
3409  all_frag_offsets.push_back(frag_offsets);
3410  }
3411  return {all_num_rows, all_frag_offsets};
3412 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::map< shared::TableKey, std::vector< uint64_t > > get_table_id_to_frag_offsets(const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:3348
const std::optional< bool > union_all
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::shared_ptr< RowSetMemoryOwner > Executor::getRowSetMemoryOwner ( ) const

Definition at line 728 of file Execute.cpp.

References row_set_mem_owner_.

Referenced by executeTableFunction(), TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().

728  {
729  return row_set_mem_owner_;
730 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533

+ Here is the caller graph for this function:

heavyai::shared_mutex & Executor::getSessionLock ( )

Definition at line 4982 of file Execute.cpp.

References executor_session_mutex_.

4982  {
4983  return executor_session_mutex_;
4984 }
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
StringDictionaryProxy* Executor::getStringDictionaryProxy ( const shared::StringDictKey dict_key,
const bool  with_generation 
) const
inline

Returns a string dictionary proxy using the currently active row set memory owner.

Definition at line 578 of file Execute.h.

References CHECK, and row_set_mem_owner_.

Referenced by addTransientStringLiterals(), and serializeLiterals().

579  {
581  return getStringDictionaryProxy(dict_key, row_set_mem_owner_, with_generation);
582  }
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key, const bool with_generation) const
Definition: Execute.h:578
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

StringDictionaryProxy* Executor::getStringDictionaryProxy ( const shared::StringDictKey dict_key,
const std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  with_generation 
) const
const StringDictionaryProxy::TranslationMap< Datum > * Executor::getStringProxyNumericTranslationMap ( const shared::StringDictKey source_dict_key,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  with_generation 
) const

Definition at line 640 of file Execute.cpp.

References CHECK, and str_dict_mutex_.

644  {
645  CHECK(row_set_mem_owner);
646  std::lock_guard<std::mutex> lock(
647  str_dict_mutex_); // TODO: can we use RowSetMemOwner state mutex here?
648  return row_set_mem_owner->getOrAddStringProxyNumericTranslationMap(
649  source_dict_key, with_generation, string_op_infos);
650 }
std::mutex str_dict_mutex_
Definition: Execute.h:1545
#define CHECK(condition)
Definition: Logger.h:291
const StringDictionaryProxy::IdMap * Executor::getStringProxyTranslationMap ( const shared::StringDictKey source_dict_key,
const shared::StringDictKey dest_dict_key,
const RowSetMemoryOwner::StringTranslationType  translation_type,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  with_generation 
) const

Definition at line 606 of file Execute.cpp.

References CHECK, and str_dict_mutex_.

Referenced by TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().

612  {
613  CHECK(row_set_mem_owner);
614  std::lock_guard<std::mutex> lock(
615  str_dict_mutex_); // TODO: can we use RowSetMemOwner state mutex here?
616  return row_set_mem_owner->getOrAddStringProxyTranslationMap(
617  source_dict_key, dest_dict_key, with_generation, translation_type, string_op_infos);
618 }
std::mutex str_dict_mutex_
Definition: Execute.h:1545
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

std::vector< size_t > Executor::getTableFragmentIndices ( const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  device_type,
const size_t  table_idx,
const size_t  outer_frag_idx,
std::map< shared::TableKey, const TableFragments * > &  selected_tables_fragments,
const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &  inner_table_id_to_join_condition 
)
private

Definition at line 3236 of file Execute.cpp.

References CHECK, CHECK_LT, RelAlgExecutionUnit::input_descs, and skipFragmentPair().

3243  {
3244  const auto& table_key = ra_exe_unit.input_descs[table_idx].getTableKey();
3245  auto table_frags_it = selected_tables_fragments.find(table_key);
3246  CHECK(table_frags_it != selected_tables_fragments.end());
3247  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
3248  const auto outer_table_fragments_it =
3249  selected_tables_fragments.find(outer_input_desc.getTableKey());
3250  const auto outer_table_fragments = outer_table_fragments_it->second;
3251  CHECK(outer_table_fragments_it != selected_tables_fragments.end());
3252  CHECK_LT(outer_frag_idx, outer_table_fragments->size());
3253  if (!table_idx) {
3254  return {outer_frag_idx};
3255  }
3256  const auto& outer_fragment_info = (*outer_table_fragments)[outer_frag_idx];
3257  auto& inner_frags = table_frags_it->second;
3258  CHECK_LT(size_t(1), ra_exe_unit.input_descs.size());
3259  std::vector<size_t> all_frag_ids;
3260  for (size_t inner_frag_idx = 0; inner_frag_idx < inner_frags->size();
3261  ++inner_frag_idx) {
3262  const auto& inner_frag_info = (*inner_frags)[inner_frag_idx];
3263  if (skipFragmentPair(outer_fragment_info,
3264  inner_frag_info,
3265  table_idx,
3266  inner_table_id_to_join_condition,
3267  ra_exe_unit,
3268  device_type)) {
3269  continue;
3270  }
3271  all_frag_ids.push_back(inner_frag_idx);
3272  }
3273  return all_frag_ids;
3274 }
std::vector< InputDescriptor > input_descs
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291
bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: Execute.cpp:3278

+ Here is the call graph for this function:

const TableGeneration & Executor::getTableGeneration ( const shared::TableKey table_key) const

Definition at line 741 of file Execute.cpp.

References TableGenerations::getGeneration(), and table_generations_.

Referenced by skipFragment().

742  {
743  return table_generations_.getGeneration(table_key);
744 }
const TableGeneration & getGeneration(const shared::TableKey &table_key) const
TableGenerations table_generations_
Definition: Execute.h:1573

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Fragmenter_Namespace::TableInfo Executor::getTableInfo ( const shared::TableKey table_key) const

Definition at line 736 of file Execute.cpp.

References InputTableInfoCache::getTableInfo(), and input_table_info_cache_.

Referenced by computeColRangesCache(), and computeTableGenerations().

737  {
738  return input_table_info_cache_.getTableInfo(table_key);
739 }
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1571
Fragmenter_Namespace::TableInfo getTableInfo(const shared::TableKey &table_key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const TemporaryTables* Executor::getTemporaryTables ( )
inline

Returns pointer to the intermediate tables vector currently stored by this executor.

Definition at line 573 of file Execute.h.

References temporary_tables_.

Referenced by skipFragmentPair().

573 { return temporary_tables_; }
const TemporaryTables * temporary_tables_
Definition: Execute.h:1559

+ Here is the caller graph for this function:

const TemporaryTables* Executor::getTemporaryTables ( ) const
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > Executor::getUniqueThreadSharedResultSets ( const std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &  results_per_device) const
private

Definition at line 1624 of file Execute.cpp.

References gpu_enabled::accumulate(), and run_benchmark_import::result.

Referenced by reduceMultiDeviceResults().

1626  {
1627  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>> unique_thread_results;
1628  if (results_per_device.empty()) {
1629  return unique_thread_results;
1630  }
1631  auto max_ti = [](int acc, auto& e) { return std::max(acc, e.first->getThreadIdx()); };
1632  int const max_thread_idx =
1633  std::accumulate(results_per_device.begin(), results_per_device.end(), -1, max_ti);
1634  std::vector<bool> seen_thread_idxs(max_thread_idx + 1, false);
1635  for (const auto& result : results_per_device) {
1636  const int32_t result_thread_idx = result.first->getThreadIdx();
1637  if (!seen_thread_idxs[result_thread_idx]) {
1638  seen_thread_idxs[result_thread_idx] = true;
1639  unique_thread_results.emplace_back(result);
1640  }
1641  }
1642  return unique_thread_results;
1643 }
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

unsigned Executor::gridSize ( ) const

Definition at line 4352 of file Execute.cpp.

References CHECK, data_mgr_, Data_Namespace::DataMgr::getCudaMgr(), and grid_size_x_.

Referenced by collectAllDeviceShardedTopResults(), executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeTableFunction(), executeWorkUnitImpl(), reduceMultiDeviceResults(), reduceMultiDeviceResultSets(), and resultsUnion().

4352  {
4353  CHECK(data_mgr_);
4354  const auto cuda_mgr = data_mgr_->getCudaMgr();
4355  if (!cuda_mgr) {
4356  return 0;
4357  }
4358  return grid_size_x_ ? grid_size_x_ : 2 * cuda_mgr->getMinNumMPsForAllDevices();
4359 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:177
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
unsigned grid_size_x_
Definition: Execute.h:1553
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Executor::GroupColLLVMValue Executor::groupByColumnCodegen ( Analyzer::Expr group_by_col,
const size_t  col_width,
const CompilationOptions co,
const bool  translate_null_val,
const int64_t  translated_null_val,
DiamondCodegen diamond_codegen,
std::stack< llvm::BasicBlock * > &  array_loops,
const bool  thread_mem_shared 
)
private

Definition at line 1384 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GE, CodeGenerator::codegen(), CompilationOptions::device_type, get_int_type(), Analyzer::Expr::get_type_info(), kDOUBLE, kUNNEST, log2_bytes(), need_patch_unnest_double(), numeric_type_name(), DiamondCodegen::orig_cond_false_, CodeGenerator::posArg(), and DiamondCodegen::setFalseTarget().

1392  {
1394  CHECK_GE(col_width, sizeof(int32_t));
1395  CodeGenerator code_generator(this);
1396  auto group_key = code_generator.codegen(group_by_col, true, co).front();
1397  auto key_to_cache = group_key;
1398  if (dynamic_cast<Analyzer::UOper*>(group_by_col) &&
1399  static_cast<Analyzer::UOper*>(group_by_col)->get_optype() == kUNNEST) {
1400  auto preheader = cgen_state_->ir_builder_.GetInsertBlock();
1401  auto array_loop_head = llvm::BasicBlock::Create(cgen_state_->context_,
1402  "array_loop_head",
1403  cgen_state_->current_func_,
1404  preheader->getNextNode());
1405  diamond_codegen.setFalseTarget(array_loop_head);
1406  const auto ret_ty = get_int_type(32, cgen_state_->context_);
1407  auto array_idx_ptr = cgen_state_->ir_builder_.CreateAlloca(ret_ty);
1408  CHECK(array_idx_ptr);
1409  cgen_state_->ir_builder_.CreateStore(cgen_state_->llInt(int32_t(0)), array_idx_ptr);
1410  const auto arr_expr = static_cast<Analyzer::UOper*>(group_by_col)->get_operand();
1411  const auto& array_ti = arr_expr->get_type_info();
1412  CHECK(array_ti.is_array());
1413  const auto& elem_ti = array_ti.get_elem_type();
1414  auto array_len =
1415  (array_ti.get_size() > 0)
1416  ? cgen_state_->llInt(array_ti.get_size() / elem_ti.get_size())
1417  : cgen_state_->emitExternalCall(
1418  "array_size",
1419  ret_ty,
1420  {group_key,
1421  code_generator.posArg(arr_expr),
1422  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1423  cgen_state_->ir_builder_.CreateBr(array_loop_head);
1424  cgen_state_->ir_builder_.SetInsertPoint(array_loop_head);
1425  CHECK(array_len);
1426  auto array_idx = cgen_state_->ir_builder_.CreateLoad(
1427  array_idx_ptr->getType()->getPointerElementType(), array_idx_ptr);
1428  auto bound_check = cgen_state_->ir_builder_.CreateICmp(
1429  llvm::ICmpInst::ICMP_SLT, array_idx, array_len);
1430  auto array_loop_body = llvm::BasicBlock::Create(
1431  cgen_state_->context_, "array_loop_body", cgen_state_->current_func_);
1432  cgen_state_->ir_builder_.CreateCondBr(
1433  bound_check,
1434  array_loop_body,
1435  array_loops.empty() ? diamond_codegen.orig_cond_false_ : array_loops.top());
1436  cgen_state_->ir_builder_.SetInsertPoint(array_loop_body);
1437  cgen_state_->ir_builder_.CreateStore(
1438  cgen_state_->ir_builder_.CreateAdd(array_idx, cgen_state_->llInt(int32_t(1))),
1439  array_idx_ptr);
1440  auto array_at_fname = "array_at_" + numeric_type_name(elem_ti);
1441  if (array_ti.get_size() < 0) {
1442  if (array_ti.get_notnull()) {
1443  array_at_fname = "notnull_" + array_at_fname;
1444  }
1445  array_at_fname = "varlen_" + array_at_fname;
1446  }
1447  const auto ar_ret_ty =
1448  elem_ti.is_fp()
1449  ? (elem_ti.get_type() == kDOUBLE
1450  ? llvm::Type::getDoubleTy(cgen_state_->context_)
1451  : llvm::Type::getFloatTy(cgen_state_->context_))
1452  : get_int_type(elem_ti.get_logical_size() * 8, cgen_state_->context_);
1453  group_key = cgen_state_->emitExternalCall(
1454  array_at_fname,
1455  ar_ret_ty,
1456  {group_key, code_generator.posArg(arr_expr), array_idx});
1458  elem_ti, isArchMaxwell(co.device_type), thread_mem_shared)) {
1459  key_to_cache = spillDoubleElement(group_key, ar_ret_ty);
1460  } else {
1461  key_to_cache = group_key;
1462  }
1463  CHECK(array_loop_head);
1464  array_loops.push(array_loop_head);
1465  }
1466  cgen_state_->group_by_expr_cache_.push_back(key_to_cache);
1467  llvm::Value* orig_group_key{nullptr};
1468  if (translate_null_val) {
1469  const std::string translator_func_name(
1470  col_width == sizeof(int32_t) ? "translate_null_key_i32_" : "translate_null_key_");
1471  const auto& ti = group_by_col->get_type_info();
1472  const auto key_type = get_int_type(ti.get_logical_size() * 8, cgen_state_->context_);
1473  orig_group_key = group_key;
1474  group_key = cgen_state_->emitCall(
1475  translator_func_name + numeric_type_name(ti),
1476  {group_key,
1477  static_cast<llvm::Value*>(
1478  llvm::ConstantInt::get(key_type, inline_int_null_val(ti))),
1479  static_cast<llvm::Value*>(llvm::ConstantInt::get(
1480  llvm::Type::getInt64Ty(cgen_state_->context_), translated_null_val))});
1481  }
1482  group_key = cgen_state_->ir_builder_.CreateBitCast(
1483  cgen_state_->castToTypeIn(group_key, col_width * 8),
1484  get_int_type(col_width * 8, cgen_state_->context_));
1485  if (orig_group_key) {
1486  orig_group_key = cgen_state_->ir_builder_.CreateBitCast(
1487  cgen_state_->castToTypeIn(orig_group_key, col_width * 8),
1488  get_int_type(col_width * 8, cgen_state_->context_));
1489  }
1490  return {group_key, orig_group_key};
1491 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
bool need_patch_unnest_double(const SQLTypeInfo &ti, const bool is_maxwell, const bool mem_shared)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
void setFalseTarget(llvm::BasicBlock *cond_false)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::BasicBlock * orig_cond_false_
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
ExecutorDeviceType device_type
llvm::Value * spillDoubleElement(llvm::Value *elem_val, llvm::Type *elem_ty)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:198
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:230
bool isArchMaxwell(const ExecutorDeviceType dt) const

+ Here is the call graph for this function:

bool Executor::has_extension_module ( ExtModuleKinds  kind) const
inlineprivate

Definition at line 1513 of file Execute.h.

References extension_modules_.

Referenced by has_geos_module(), has_libdevice_module(), has_rt_module(), has_rt_udf_module(), and has_udf_module().

1513  {
1514  return extension_modules_.find(kind) != extension_modules_.end();
1515  }
std::map< ExtModuleKinds, std::unique_ptr< llvm::Module > > extension_modules_
Definition: Execute.h:1517

+ Here is the caller graph for this function:

bool Executor::has_geos_module ( ) const
inline

Definition at line 563 of file Execute.h.

References has_extension_module(), and rt_geos_module.

563  {
565  }
bool has_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1513

+ Here is the call graph for this function:

bool Executor::has_libdevice_module ( ) const
inline

Definition at line 566 of file Execute.h.

References has_extension_module(), and rt_libdevice_module.

566  {
568  }
bool has_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1513

+ Here is the call graph for this function:

bool Executor::has_rt_module ( ) const
inline

Definition at line 552 of file Execute.h.

References has_extension_module(), and template_module.

552  {
554  }
bool has_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1513

+ Here is the call graph for this function:

bool Executor::has_rt_udf_module ( bool  is_gpu = false) const
inline

Definition at line 559 of file Execute.h.

References has_extension_module(), rt_udf_cpu_module, and rt_udf_gpu_module.

+ Here is the call graph for this function:

bool Executor::has_udf_module ( bool  is_gpu = false) const
inline

Definition at line 555 of file Execute.h.

References has_extension_module(), udf_cpu_module, and udf_gpu_module.

+ Here is the call graph for this function:

bool Executor::hasLazyFetchColumns ( const std::vector< Analyzer::Expr * > &  target_exprs) const

Definition at line 1006 of file Execute.cpp.

References CHECK, and plan_state_.

1007  {
1008  CHECK(plan_state_);
1009  for (const auto target_expr : target_exprs) {
1010  if (plan_state_->isLazyFetchColumn(target_expr)) {
1011  return true;
1012  }
1013  }
1014  return false;
1015 }
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define CHECK(condition)
Definition: Logger.h:291
void Executor::init_resource_mgr ( const size_t  num_cpu_slots,
const size_t  num_gpu_slots,
const size_t  cpu_result_mem,
const size_t  cpu_buffer_pool_mem,
const size_t  gpu_buffer_pool_mem,
const double  per_query_max_cpu_slots_ratio,
const double  per_query_max_cpu_result_mem_ratio,
const bool  allow_cpu_kernel_concurrency,
const bool  allow_cpu_gpu_kernel_concurrency,
const bool  allow_cpu_slot_oversubscription_concurrency,
const bool  allow_cpu_result_mem_oversubscription,
const double  max_available_resource_use_ratio 
)
static

Definition at line 5387 of file Execute.cpp.

References executor_resource_mgr_, and ExecutorResourceMgr_Namespace::generate_executor_resource_mgr().

Referenced by DBHandler::init_executor_resource_mgr(), and QueryRunner::QueryRunner::QueryRunner().

5399  {
5400  const double per_query_max_pinned_cpu_buffer_pool_mem_ratio{1.0};
5401  const double per_query_max_pageable_cpu_buffer_pool_mem_ratio{0.5};
5403  num_cpu_slots,
5404  num_gpu_slots,
5405  cpu_result_mem,
5406  cpu_buffer_pool_mem,
5407  gpu_buffer_pool_mem,
5408  per_query_max_cpu_slots_ratio,
5409  per_query_max_cpu_result_mem_ratio,
5410  per_query_max_pinned_cpu_buffer_pool_mem_ratio,
5411  per_query_max_pageable_cpu_buffer_pool_mem_ratio,
5412  allow_cpu_kernel_concurrency,
5413  allow_cpu_gpu_kernel_concurrency,
5414  allow_cpu_slot_oversubscription_concurrency,
5415  true, // allow_gpu_slot_oversubscription
5416  allow_cpu_result_mem_oversubscription_concurrency,
5417  max_available_resource_use_ratio);
5418 }
std::shared_ptr< ExecutorResourceMgr > generate_executor_resource_mgr(const size_t num_cpu_slots, const size_t num_gpu_slots, const size_t cpu_result_mem, const size_t cpu_buffer_pool_mem, const size_t gpu_buffer_pool_mem, const double per_query_max_cpu_slots_ratio, const double per_query_max_cpu_result_mem_ratio, const double per_query_max_pinned_cpu_buffer_pool_mem_ratio, const double per_query_max_pageable_cpu_buffer_pool_mem_ratio, const bool allow_cpu_kernel_concurrency, const bool allow_cpu_gpu_kernel_concurrency, const bool allow_cpu_slot_oversubscription_concurrency, const bool allow_gpu_slot_oversubscription, const bool allow_cpu_result_mem_oversubscription_concurrency, const double max_available_resource_use_ratio)
Convenience factory-esque method that allows us to use the same logic to generate an ExecutorResource...
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::initialize_extension_module_sources ( )
static

Definition at line 298 of file Execute.cpp.

References CHECK, extension_module_sources, get_cuda_libdevice_dir(), heavyai::get_root_abs_path(), LOG, rt_geos_module, rt_libdevice_module, template_module, and logger::WARNING.

Referenced by input_table_info_cache_().

298  {
302  auto root_path = heavyai::get_root_abs_path();
303  auto template_path = root_path + "/QueryEngine/RuntimeFunctions.bc";
304  CHECK(boost::filesystem::exists(template_path));
306  template_path;
307 #ifdef ENABLE_GEOS
308  auto rt_geos_path = root_path + "/QueryEngine/GeosRuntime.bc";
309  CHECK(boost::filesystem::exists(rt_geos_path));
311  rt_geos_path;
312 #endif
313 #ifdef HAVE_CUDA
314  auto rt_libdevice_path = get_cuda_libdevice_dir() + "/libdevice.10.bc";
315  if (boost::filesystem::exists(rt_libdevice_path)) {
317  rt_libdevice_path;
318  } else {
319  LOG(WARNING) << "File " << rt_libdevice_path
320  << " does not exist; support for some UDF "
321  "functions might not be available.";
322  }
323 #endif
324  }
325 }
std::string get_cuda_libdevice_dir(void)
Definition: CudaMgr.cpp:612
std::string get_root_abs_path()
#define LOG(tag)
Definition: Logger.h:285
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:528
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::initializeNVPTXBackend ( ) const
private

Definition at line 1545 of file NativeCodegen.cpp.

1545  {
1546  if (nvptx_target_machine_) {
1547  return;
1548  }
1549  const auto arch = cudaMgr()->getDeviceArch();
1551 }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:865
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:1547
NvidiaDeviceArch getDeviceArch() const
Definition: CudaMgr.h:186
static std::unique_ptr< llvm::TargetMachine > initializeNVPTXBackend(const CudaMgr_Namespace::NvidiaDeviceArch arch)
std::vector< llvm::Value * > Executor::inlineHoistedLiterals ( )
private

Definition at line 2373 of file NativeCodegen.cpp.

2373  {
2375 
2376  std::vector<llvm::Value*> hoisted_literals;
2377 
2378  // row_func_ is using literals whose defs have been hoisted up to the query_func_,
2379  // extend row_func_ signature to include extra args to pass these literal values.
2380  std::vector<llvm::Type*> row_process_arg_types;
2381 
2382  for (llvm::Function::arg_iterator I = cgen_state_->row_func_->arg_begin(),
2383  E = cgen_state_->row_func_->arg_end();
2384  I != E;
2385  ++I) {
2386  row_process_arg_types.push_back(I->getType());
2387  }
2388 
2389  for (auto& element : cgen_state_->query_func_literal_loads_) {
2390  for (auto value : element.second) {
2391  row_process_arg_types.push_back(value->getType());
2392  }
2393  }
2394 
2395  auto ft = llvm::FunctionType::get(
2396  get_int_type(32, cgen_state_->context_), row_process_arg_types, false);
2397  auto row_func_with_hoisted_literals =
2398  llvm::Function::Create(ft,
2399  llvm::Function::ExternalLinkage,
2400  "row_func_hoisted_literals",
2401  cgen_state_->row_func_->getParent());
2402 
2403  auto row_func_arg_it = row_func_with_hoisted_literals->arg_begin();
2404  for (llvm::Function::arg_iterator I = cgen_state_->row_func_->arg_begin(),
2405  E = cgen_state_->row_func_->arg_end();
2406  I != E;
2407  ++I) {
2408  if (I->hasName()) {
2409  row_func_arg_it->setName(I->getName());
2410  }
2411  ++row_func_arg_it;
2412  }
2413 
2414  decltype(row_func_with_hoisted_literals) filter_func_with_hoisted_literals{nullptr};
2415  decltype(row_func_arg_it) filter_func_arg_it{nullptr};
2416  if (cgen_state_->filter_func_) {
2417  // filter_func_ is using literals whose defs have been hoisted up to the row_func_,
2418  // extend filter_func_ signature to include extra args to pass these literal values.
2419  std::vector<llvm::Type*> filter_func_arg_types;
2420 
2421  for (llvm::Function::arg_iterator I = cgen_state_->filter_func_->arg_begin(),
2422  E = cgen_state_->filter_func_->arg_end();
2423  I != E;
2424  ++I) {
2425  filter_func_arg_types.push_back(I->getType());
2426  }
2427 
2428  for (auto& element : cgen_state_->query_func_literal_loads_) {
2429  for (auto value : element.second) {
2430  filter_func_arg_types.push_back(value->getType());
2431  }
2432  }
2433 
2434  auto ft2 = llvm::FunctionType::get(
2435  get_int_type(32, cgen_state_->context_), filter_func_arg_types, false);
2436  filter_func_with_hoisted_literals =
2437  llvm::Function::Create(ft2,
2438  llvm::Function::ExternalLinkage,
2439  "filter_func_hoisted_literals",
2440  cgen_state_->filter_func_->getParent());
2441 
2442  filter_func_arg_it = filter_func_with_hoisted_literals->arg_begin();
2443  for (llvm::Function::arg_iterator I = cgen_state_->filter_func_->arg_begin(),
2444  E = cgen_state_->filter_func_->arg_end();
2445  I != E;
2446  ++I) {
2447  if (I->hasName()) {
2448  filter_func_arg_it->setName(I->getName());
2449  }
2450  ++filter_func_arg_it;
2451  }
2452  }
2453 
2454  std::unordered_map<int, std::vector<llvm::Value*>>
2455  query_func_literal_loads_function_arguments,
2456  query_func_literal_loads_function_arguments2;
2457 
2458  for (auto& element : cgen_state_->query_func_literal_loads_) {
2459  std::vector<llvm::Value*> argument_values, argument_values2;
2460 
2461  for (auto value : element.second) {
2462  hoisted_literals.push_back(value);
2463  argument_values.push_back(&*row_func_arg_it);
2464  if (cgen_state_->filter_func_) {
2465  argument_values2.push_back(&*filter_func_arg_it);
2466  cgen_state_->filter_func_args_[&*row_func_arg_it] = &*filter_func_arg_it;
2467  }
2468  if (value->hasName()) {
2469  row_func_arg_it->setName("arg_" + value->getName());
2470  if (cgen_state_->filter_func_) {
2471  filter_func_arg_it->getContext();
2472  filter_func_arg_it->setName("arg_" + value->getName());
2473  }
2474  }
2475  ++row_func_arg_it;
2476  ++filter_func_arg_it;
2477  }
2478 
2479  query_func_literal_loads_function_arguments[element.first] = argument_values;
2480  query_func_literal_loads_function_arguments2[element.first] = argument_values2;
2481  }
2482 
2483  // copy the row_func function body over
2484  // see
2485  // https://stackoverflow.com/questions/12864106/move-function-body-avoiding-full-cloning/18751365
2486  row_func_with_hoisted_literals->getBasicBlockList().splice(
2487  row_func_with_hoisted_literals->begin(),
2488  cgen_state_->row_func_->getBasicBlockList());
2489 
2490  // also replace row_func arguments with the arguments from row_func_hoisted_literals
2491  for (llvm::Function::arg_iterator I = cgen_state_->row_func_->arg_begin(),
2492  E = cgen_state_->row_func_->arg_end(),
2493  I2 = row_func_with_hoisted_literals->arg_begin();
2494  I != E;
2495  ++I) {
2496  I->replaceAllUsesWith(&*I2);
2497  I2->takeName(&*I);
2498  cgen_state_->filter_func_args_.replace(&*I, &*I2);
2499  ++I2;
2500  }
2501 
2502  cgen_state_->row_func_ = row_func_with_hoisted_literals;
2503 
2504  // and finally replace literal placeholders
2505  std::vector<llvm::Instruction*> placeholders;
2506  std::string prefix("__placeholder__literal_");
2507  for (auto it = llvm::inst_begin(row_func_with_hoisted_literals),
2508  e = llvm::inst_end(row_func_with_hoisted_literals);
2509  it != e;
2510  ++it) {
2511  if (it->hasName() && it->getName().startswith(prefix)) {
2512  auto offset_and_index_entry =
2513  cgen_state_->row_func_hoisted_literals_.find(llvm::dyn_cast<llvm::Value>(&*it));
2514  CHECK(offset_and_index_entry != cgen_state_->row_func_hoisted_literals_.end());
2515 
2516  int lit_off = offset_and_index_entry->second.offset_in_literal_buffer;
2517  int lit_idx = offset_and_index_entry->second.index_of_literal_load;
2518 
2519  it->replaceAllUsesWith(
2520  query_func_literal_loads_function_arguments[lit_off][lit_idx]);
2521  placeholders.push_back(&*it);
2522  }
2523  }
2524  for (auto placeholder : placeholders) {
2525  placeholder->removeFromParent();
2526  }
2527 
2528  if (cgen_state_->filter_func_) {
2529  // copy the filter_func function body over
2530  // see
2531  // https://stackoverflow.com/questions/12864106/move-function-body-avoiding-full-cloning/18751365
2532  filter_func_with_hoisted_literals->getBasicBlockList().splice(
2533  filter_func_with_hoisted_literals->begin(),
2534  cgen_state_->filter_func_->getBasicBlockList());
2535 
2536  // also replace filter_func arguments with the arguments from
2537  // filter_func_hoisted_literals
2538  for (llvm::Function::arg_iterator I = cgen_state_->filter_func_->arg_begin(),
2539  E = cgen_state_->filter_func_->arg_end(),
2540  I2 = filter_func_with_hoisted_literals->arg_begin();
2541  I != E;
2542  ++I) {
2543  I->replaceAllUsesWith(&*I2);
2544  I2->takeName(&*I);
2545  ++I2;
2546  }
2547 
2548  cgen_state_->filter_func_ = filter_func_with_hoisted_literals;
2549 
2550  // and finally replace literal placeholders
2551  std::vector<llvm::Instruction*> placeholders;
2552  std::string prefix("__placeholder__literal_");
2553  for (auto it = llvm::inst_begin(filter_func_with_hoisted_literals),
2554  e = llvm::inst_end(filter_func_with_hoisted_literals);
2555  it != e;
2556  ++it) {
2557  if (it->hasName() && it->getName().startswith(prefix)) {
2558  auto offset_and_index_entry = cgen_state_->row_func_hoisted_literals_.find(
2559  llvm::dyn_cast<llvm::Value>(&*it));
2560  CHECK(offset_and_index_entry != cgen_state_->row_func_hoisted_literals_.end());
2561 
2562  int lit_off = offset_and_index_entry->second.offset_in_literal_buffer;
2563  int lit_idx = offset_and_index_entry->second.index_of_literal_load;
2564 
2565  it->replaceAllUsesWith(
2566  query_func_literal_loads_function_arguments2[lit_off][lit_idx]);
2567  placeholders.push_back(&*it);
2568  }
2569  }
2570  for (auto placeholder : placeholders) {
2571  placeholder->removeFromParent();
2572  }
2573  }
2574 
2575  return hoisted_literals;
2576 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
void Executor::insertErrorCodeChecker ( llvm::Function *  query_func,
unsigned const  error_code_idx,
bool  hoist_literals,
bool  allow_runtime_query_interrupt 
)
private

Definition at line 3242 of file NativeCodegen.cpp.

3245  {
3246  auto query_stub_func_name =
3247  "query_stub" + std::string(hoist_literals ? "_hoisted_literals" : "");
3248  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
3249  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); ++inst_it) {
3250  if (!llvm::isa<llvm::CallInst>(*inst_it)) {
3251  continue;
3252  }
3253  auto& row_func_call = llvm::cast<llvm::CallInst>(*inst_it);
3254  auto const row_func_name = CodegenUtil::getCalledFunctionName(row_func_call);
3255  if (row_func_name && *row_func_name == query_stub_func_name) {
3256  auto next_inst_it = inst_it;
3257  ++next_inst_it;
3258  auto new_bb = bb_it->splitBasicBlock(next_inst_it);
3259  auto& br_instr = bb_it->back();
3260  llvm::IRBuilder<> ir_builder(&br_instr);
3261  llvm::Value* err_lv = &*inst_it;
3262  auto error_check_bb =
3263  bb_it->splitBasicBlock(llvm::BasicBlock::iterator(br_instr), ".error_check");
3264  // query_func does not have parameter names assigned.
3265  llvm::Value* const error_code_arg = get_arg_by_index(query_func, error_code_idx);
3266  CHECK(error_code_arg) << error_code_idx << '/' << query_func->arg_size();
3267  llvm::Value* err_code = nullptr;
3268  if (allow_runtime_query_interrupt) {
3269  // decide the final error code with a consideration of interrupt status
3270  auto& check_interrupt_br_instr = bb_it->back();
3271  auto interrupt_check_bb = llvm::BasicBlock::Create(
3272  cgen_state_->context_, ".interrupt_check", query_func, error_check_bb);
3273  llvm::IRBuilder<> interrupt_checker_ir_builder(interrupt_check_bb);
3274  auto detected_interrupt = interrupt_checker_ir_builder.CreateCall(
3275  cgen_state_->module_->getFunction("check_interrupt"), {});
3276  auto detected_error = interrupt_checker_ir_builder.CreateCall(
3277  cgen_state_->module_->getFunction("get_error_code"),
3278  std::vector<llvm::Value*>{error_code_arg});
3279  err_code = interrupt_checker_ir_builder.CreateSelect(
3280  detected_interrupt,
3281  cgen_state_->llInt(int32_t(ErrorCode::INTERRUPTED)),
3282  detected_error);
3283  interrupt_checker_ir_builder.CreateBr(error_check_bb);
3284  llvm::ReplaceInstWithInst(&check_interrupt_br_instr,
3285  llvm::BranchInst::Create(interrupt_check_bb));
3286  ir_builder.SetInsertPoint(&br_instr);
3287  } else {
3288  // uses error code returned from row_func and skip to check interrupt status
3289  ir_builder.SetInsertPoint(&br_instr);
3290  err_code =
3291  ir_builder.CreateCall(cgen_state_->module_->getFunction("get_error_code"),
3292  std::vector<llvm::Value*>{error_code_arg});
3293  }
3294  err_lv = ir_builder.CreateICmp(
3295  llvm::ICmpInst::ICMP_NE, err_code, cgen_state_->llInt(0));
3296  auto error_bb = llvm::BasicBlock::Create(
3297  cgen_state_->context_, ".error_exit", query_func, new_bb);
3298  llvm::CallInst::Create(cgen_state_->module_->getFunction("record_error_code"),
3299  std::vector<llvm::Value*>{err_code, error_code_arg},
3300  "",
3301  error_bb);
3302  llvm::ReturnInst::Create(cgen_state_->context_, error_bb);
3303  llvm::ReplaceInstWithInst(&br_instr,
3304  llvm::BranchInst::Create(error_bb, new_bb, err_lv));
3305  break;
3306  }
3307  }
3308  }
3309 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * get_arg_by_index(llvm::Function *func, unsigned const index)
Definition: Execute.h:178
void Executor::interrupt ( const QuerySessionId query_session = "",
const QuerySessionId interrupt_session = "" 
)

Definition at line 42 of file GpuInterrupt.cpp.

References CHECK, CHECK_EQ, CHECK_GE, check_interrupt_init(), checkCudaErrors(), data_mgr_(), DW_ABORT, dw_abort, dynamic_watchdog_init(), g_enable_dynamic_watchdog, g_enable_non_kernel_time_query_interrupt, g_enable_runtime_query_interrupt, INT_ABORT, runtime_interrupt_flag, to_string(), and VLOG.

43  {
44  const auto allow_interrupt =
46  if (allow_interrupt) {
47  bool is_running_query = false;
48  {
49  // here we validate the requested query session is valid (is already enrolled)
50  // if not, we skip the interrupt request
53  if (!checkIsQuerySessionEnrolled(query_session, session_read_lock)) {
54  VLOG(1) << "Skip the interrupt request (no query has been submitted from the "
55  "given query session)";
56  return;
57  }
58  if (checkIsQuerySessionInterrupted(query_session, session_read_lock)) {
59  VLOG(1) << "Skip the interrupt request (already interrupted query session)";
60  return;
61  }
62  // if a query is pending query, we just need to turn interrupt flag for the session
63  // on (not sending interrupt signal to "RUNNING" kernel, see the below code)
64  is_running_query = checkCurrentQuerySession(query_session, session_read_lock);
65  }
66  {
67  // We have to cover interrupt request from *any* session because we don't know
68  // whether the request is for the running query or pending query
69  // or for non-kernel time interrupt
70  // (or just false alarm that indicates unregistered session in a queue).
71  // So we try to set a session has been interrupted once we confirm
72  // the session has been enrolled and is not interrupted at this moment
75  setQuerySessionAsInterrupted(query_session, session_write_lock);
76  }
77  if (!is_running_query) {
78  return;
79  }
80  // mark the interrupted status of this executor
81  interrupted_.store(true);
82  }
83 
84  // for both GPU and CPU kernel execution, interrupt flag that running kernel accesses
85  // is a global variable from a view of Executors
86  // but it's okay for now since we hold a kernel_lock when starting the query execution
87  // this indicates we should revisit this logic when starting to use multi-query
88  // execution for supporting per-kernel interrupt
89  bool CPU_execution_mode = true;
90 
91 #ifdef HAVE_CUDA
92  // The below code is basically for runtime query interrupt for GPU.
93  // It is also possible that user forces to use CPU-mode even if the user has GPU(s).
94  // In this case, we should not execute the code in below to avoid runtime failure
96  auto cuda_mgr = data_mgr_->getCudaMgr();
97  if (cuda_mgr && (g_enable_dynamic_watchdog || allow_interrupt)) {
98  // we additionally allow sending interrupt signal for
99  // `g_enable_non_kernel_time_query_interrupt` especially for CTAS/ITAS queries: data
100  // population happens on CPU but select_query can be processed via GPU
101  CHECK_GE(cuda_mgr->getDeviceCount(), 1);
102  std::lock_guard<std::mutex> lock(gpu_active_modules_mutex_);
103  CUcontext old_cu_context;
104  checkCudaErrors(cuCtxGetCurrent(&old_cu_context));
105  for (int device_id = 0; device_id < max_gpu_count; device_id++) {
106  if (gpu_active_modules_device_mask_ & (1 << device_id)) {
107  void* llvm_module = gpu_active_modules_[device_id];
108  auto cu_module = static_cast<CUmodule>(llvm_module);
109  if (!cu_module) {
110  continue;
111  } else {
112  VLOG(1) << "Try to interrupt the running query on GPU assigned to Executor "
113  << executor_id_;
114  CPU_execution_mode = false;
115  }
116  cuda_mgr->setContext(device_id);
117 
118  // Create high priority non-blocking communication stream
119  CUstream cu_stream1;
121  cuStreamCreateWithPriority(&cu_stream1, CU_STREAM_NON_BLOCKING, 1));
122 
123  CUevent start, stop;
124  cuEventCreate(&start, 0);
125  cuEventCreate(&stop, 0);
126  cuEventRecord(start, cu_stream1);
127 
130  size_t dw_abort_size;
131  if (cuModuleGetGlobal(&dw_abort, &dw_abort_size, cu_module, "dw_abort") ==
132  CUDA_SUCCESS) {
133  CHECK_EQ(dw_abort_size, sizeof(uint32_t));
134  int32_t abort_val = 1;
135  checkCudaErrors(cuMemcpyHtoDAsync(dw_abort,
136  reinterpret_cast<void*>(&abort_val),
137  sizeof(int32_t),
138  cu_stream1));
139 
140  if (device_id == 0) {
141  VLOG(1) << "GPU: Async Abort submitted to Device "
142  << std::to_string(device_id);
143  }
144  }
145  }
146 
147  if (allow_interrupt) {
149  size_t runtime_interrupt_flag_size;
150  auto status = cuModuleGetGlobal(&runtime_interrupt_flag,
151  &runtime_interrupt_flag_size,
152  cu_module,
153  "runtime_interrupt_flag");
154  if (status == CUDA_SUCCESS) {
155  VLOG(1) << "Executor " << executor_id_
156  << " retrieves interrupt status from GPU " << device_id;
157  CHECK_EQ(runtime_interrupt_flag_size, sizeof(uint32_t));
158  int32_t abort_val = 1;
159  checkCudaErrors(cuMemcpyHtoDAsync(runtime_interrupt_flag,
160  reinterpret_cast<void*>(&abort_val),
161  sizeof(int32_t),
162  cu_stream1));
163  if (device_id == 0) {
164  VLOG(1) << "GPU: send interrupt signal from Executor " << executor_id_
165  << " to Device " << std::to_string(device_id);
166  }
167  } else if (status == CUDA_ERROR_NOT_FOUND) {
168  std::runtime_error(
169  "Runtime query interrupt on Executor " + std::to_string(executor_id_) +
170  " has failed: an interrupt flag on the GPU could "
171  "not be initialized (CUDA_ERROR_CODE: CUDA_ERROR_NOT_FOUND)");
172  } else {
173  // if we reach here, query runtime interrupt is failed due to
174  // one of the following error: CUDA_ERROR_NOT_INITIALIZED,
175  // CUDA_ERROR_DEINITIALIZED. CUDA_ERROR_INVALID_CONTEXT, and
176  // CUDA_ERROR_INVALID_VALUE. All those error codes are due to device failure.
177  const char* error_ret_str = nullptr;
178  cuGetErrorName(status, &error_ret_str);
179  if (!error_ret_str) {
180  error_ret_str = "UNKNOWN";
181  }
182  std::string error_str(error_ret_str);
183  std::runtime_error(
184  "Runtime interrupt on Executor " + std::to_string(executor_id_) +
185  " has failed due to a device " + std::to_string(device_id) +
186  "'s issue "
187  "(CUDA_ERROR_CODE: " +
188  error_str + ")");
189  }
190 
191  cuEventRecord(stop, cu_stream1);
192  cuEventSynchronize(stop);
193  float milliseconds = 0;
194  cuEventElapsedTime(&milliseconds, start, stop);
195  VLOG(1) << "Device " << std::to_string(device_id)
196  << ": submitted async interrupt request from Executor " << executor_id_
197  << " : SUCCESS: " << std::to_string(milliseconds) << " ms";
198  checkCudaErrors(cuStreamDestroy(cu_stream1));
199  }
200  }
201  checkCudaErrors(cuCtxSetCurrent(old_cu_context));
202  }
203  }
204 #endif
206  dynamic_watchdog_init(static_cast<unsigned>(DW_ABORT));
207  }
208 
209  if (allow_interrupt && CPU_execution_mode) {
210  // turn interrupt flag on for CPU mode
211  VLOG(1) << "Try to interrupt the running query on CPU from Executor " << executor_id_;
212  check_interrupt_init(static_cast<unsigned>(INT_ABORT));
213  }
214 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:177
#define CHECK_EQ(x, y)
Definition: Logger.h:301
bool checkIsQuerySessionInterrupted(const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5254
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
std::atomic< bool > interrupted_
Definition: Execute.h:1543
int CUcontext
Definition: nocuda.h:22
static const int max_gpu_count
Definition: Execute.h:1535
void * CUstream
Definition: nocuda.h:23
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
unsigned long long CUdeviceptr
Definition: nocuda.h:28
#define CHECK_GE(x, y)
Definition: Logger.h:306
static void * gpu_active_modules_[max_gpu_count]
Definition: Execute.h:1541
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
static uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:1540
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:138
bool checkCurrentQuerySession(const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4991
std::string to_string(char const *&&v)
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1476
std::unique_lock< T > unique_lock
bool checkIsQuerySessionEnrolled(const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5265
__device__ int32_t runtime_interrupt_flag
Definition: cuda_mapd_rt.cu:95
RUNTIME_EXPORT uint64_t dynamic_watchdog_init(unsigned ms_budget)
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
#define CHECK(condition)
Definition: Logger.h:291
RUNTIME_EXPORT bool check_interrupt_init(unsigned command)
void setQuerySessionAsInterrupted(const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5243
__device__ int32_t dw_abort
Definition: cuda_mapd_rt.cu:94
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:137
#define VLOG(n)
Definition: Logger.h:388
void * CUmodule
Definition: nocuda.h:24
static std::mutex gpu_active_modules_mutex_
Definition: Execute.h:1539

+ Here is the call graph for this function:

void Executor::invalidateCardinalityCacheForTable ( const shared::TableKey table_key)
static

Definition at line 5316 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, and recycler_mutex_.

Referenced by clearExternalCaches().

5316  {
5319  for (auto it = cardinality_cache_.begin(); it != cardinality_cache_.end();) {
5320  if (it->first.containsTableKey(table_key)) {
5321  it = cardinality_cache_.erase(it);
5322  } else {
5323  it++;
5324  }
5325  }
5326  }
5327 }
std::unique_lock< T > unique_lock
static std::unordered_map< CardinalityCacheKey, size_t > cardinality_cache_
Definition: Execute.h:1607
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
bool g_use_estimator_result_cache
Definition: Execute.cpp:139

+ Here is the caller graph for this function:

void Executor::invalidateRunningQuerySession ( heavyai::unique_lock< heavyai::shared_mutex > &  write_lock)

Definition at line 5013 of file Execute.cpp.

References current_query_session_.

Referenced by clearQuerySessionStatus().

5014  {
5016 }
QuerySessionId current_query_session_
Definition: Execute.h:1576

+ Here is the caller graph for this function:

bool Executor::isArchMaxwell ( const ExecutorDeviceType  dt) const

Definition at line 25 of file MaxwellCodegenPatch.cpp.

References GPU.

25  {
26  return dt == ExecutorDeviceType::GPU && cudaMgr()->isArchMaxwell();
27 }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:865
bool isArchMaxwell() const
Definition: CudaMgr.h:147
bool Executor::isArchPascalOrLater ( const ExecutorDeviceType  dt) const
inlineprivate

Definition at line 872 of file Execute.h.

References cudaMgr(), GPU, and CudaMgr_Namespace::CudaMgr::isArchPascalOrLater().

Referenced by getDeviceTypeForTargets().

872  {
873  if (dt == ExecutorDeviceType::GPU) {
874  return cudaMgr()->isArchPascalOrLater();
875  }
876  return false;
877  }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:865
bool isArchPascalOrLater() const
Definition: CudaMgr.h:156

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::isCPUOnly ( ) const

Definition at line 706 of file Execute.cpp.

References CHECK, data_mgr_, and Data_Namespace::DataMgr::getCudaMgr().

706  {
707  CHECK(data_mgr_);
708  return !data_mgr_->getCudaMgr();
709 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:177
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

bool Executor::isFragmentFullyDeleted ( const InputDescriptor table_desc,
const Fragmenter_Namespace::FragmentInfo fragment 
)
private

Definition at line 4561 of file Execute.cpp.

References CHECK, extract_max_stat_int_type(), extract_min_stat_int_type(), Catalog_Namespace::SysCatalog::getCatalog(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), InputDescriptor::getTableKey(), Catalog_Namespace::SysCatalog::instance(), and Fragmenter_Namespace::FragmentInfo::physicalTableId.

Referenced by skipFragment().

4563  {
4564  // Skip temporary tables
4565  const auto& table_key = table_desc.getTableKey();
4566  if (table_key.table_id < 0) {
4567  return false;
4568  }
4569 
4570  const auto catalog =
4572  CHECK(catalog);
4573  const auto td = catalog->getMetadataForTable(fragment.physicalTableId);
4574  CHECK(td);
4575  const auto deleted_cd = catalog->getDeletedColumnIfRowsDeleted(td);
4576  if (!deleted_cd) {
4577  return false;
4578  }
4579 
4580  const auto& chunk_type = deleted_cd->columnType;
4581  CHECK(chunk_type.is_boolean());
4582 
4583  const auto deleted_col_id = deleted_cd->columnId;
4584  auto chunk_meta_it = fragment.getChunkMetadataMap().find(deleted_col_id);
4585  if (chunk_meta_it != fragment.getChunkMetadataMap().end()) {
4586  const int64_t chunk_min =
4587  extract_min_stat_int_type(chunk_meta_it->second->chunkStats, chunk_type);
4588  const int64_t chunk_max =
4589  extract_max_stat_int_type(chunk_meta_it->second->chunkStats, chunk_type);
4590  if (chunk_min == 1 && chunk_max == 1) { // Delete chunk if metadata says full bytemap
4591  // is true (signifying all rows deleted)
4592  return true;
4593  }
4594  }
4595  return false;
4596 }
int64_t extract_max_stat_int_type(const ChunkStats &stats, const SQLTypeInfo &ti)
static SysCatalog & instance()
Definition: SysCatalog.h:343
int64_t extract_min_stat_int_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ChunkMetadataMap & getChunkMetadataMap() const
const shared::TableKey & getTableKey() const
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::launchKernelsImpl ( SharedKernelContext shared_context,
std::vector< std::unique_ptr< ExecutionKernel >> &&  kernels,
const ExecutorDeviceType  device_type,
const size_t  requested_num_threads 
)
private

Launches execution kernels created by createKernels asynchronously using a thread pool.

Definition at line 3040 of file Execute.cpp.

References SharedKernelContext::addDeviceResults(), auto_num_threads, CHECK, CPU, cpu_threads(), DEBUG_TIMER_NEW_THREAD, RelAlgExecutionUnit::estimator, logger::EXECUTOR, g_enable_cpu_sub_tasks, LOG, threading_std::task_group::run(), SharedKernelContext::setNumAllocatedThreads(), logger::thread_local_ids(), VLOG, and threading_std::task_group::wait().

Referenced by launchKernelsLocked(), and launchKernelsViaResourceMgr().

3043  {
3044 #ifdef HAVE_TBB
3045  const size_t num_threads =
3046  requested_num_threads == Executor::auto_num_threads
3047  ? std::min(kernels.size(), static_cast<size_t>(cpu_threads()))
3048  : requested_num_threads;
3049  tbb::task_arena local_arena(num_threads);
3050 #else
3051  const size_t num_threads = cpu_threads();
3052 #endif
3053  shared_context.setNumAllocatedThreads(num_threads);
3054  LOG(EXECUTOR) << "Launching query step with " << num_threads << " threads.";
3056  // A hack to have unused unit for results collection.
3057  const RelAlgExecutionUnit* ra_exe_unit =
3058  kernels.empty() ? nullptr : &kernels[0]->ra_exe_unit_;
3059 
3060 #ifdef HAVE_TBB
3061  if (g_enable_cpu_sub_tasks && device_type == ExecutorDeviceType::CPU) {
3062  shared_context.setThreadPool(&tg);
3063  }
3064  ScopeGuard pool_guard([&shared_context]() { shared_context.setThreadPool(nullptr); });
3065 #endif // HAVE_TBB
3066 
3067  VLOG(1) << "Launching " << kernels.size() << " kernels for query on "
3068  << (device_type == ExecutorDeviceType::CPU ? "CPU"s : "GPU"s)
3069  << " using pool of " << num_threads << " threads.";
3070  size_t kernel_idx = 1;
3071 
3072  for (auto& kernel : kernels) {
3073  CHECK(kernel.get());
3074 #ifdef HAVE_TBB
3075  local_arena.execute([&] {
3076 #endif
3077  tg.run([this,
3078  &kernel,
3079  &shared_context,
3080  parent_thread_local_ids = logger::thread_local_ids(),
3081  num_threads,
3082  crt_kernel_idx = kernel_idx++] {
3083  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
3084  DEBUG_TIMER_NEW_THREAD(parent_thread_local_ids.thread_id_);
3085  // Keep monotonicity of thread_idx by kernel launch time, so that optimizations
3086  // such as launching kernels with data already in pool first become possible
3087 #ifdef HAVE_TBB
3088  const size_t old_thread_idx = crt_kernel_idx % num_threads;
3089  const size_t thread_idx = tbb::this_task_arena::current_thread_index();
3090  LOG(EXECUTOR) << "Thread idx: " << thread_idx
3091  << " Old thread idx: " << old_thread_idx;
3092 #else
3093  const size_t thread_idx = crt_kernel_idx % num_threads;
3094 #endif
3095  kernel->run(this, thread_idx, shared_context);
3096  });
3097 #ifdef HAVE_TBB
3098  }); // local_arena.execute[&]
3099 #endif
3100  }
3101 #ifdef HAVE_TBB
3102  local_arena.execute([&] { tg.wait(); });
3103 #else
3104  tg.wait();
3105 #endif
3106 
3107  for (auto& exec_ctx : shared_context.getTlsExecutionContext()) {
3108  // The first arg is used for GPU only, it's not our case.
3109  // TODO: add QueryExecutionContext::getRowSet() interface
3110  // for our case.
3111  if (exec_ctx) {
3112  ResultSetPtr results;
3113  if (ra_exe_unit->estimator) {
3114  results = std::shared_ptr<ResultSet>(exec_ctx->estimator_result_set_.release());
3115  } else {
3116  results = exec_ctx->getRowSet(*ra_exe_unit, exec_ctx->query_mem_desc_);
3117  }
3118  shared_context.addDeviceResults(std::move(results), {});
3119  }
3120  }
3121 }
#define LOG(tag)
Definition: Logger.h:285
void addDeviceResults(ResultSetPtr &&device_results, std::vector< size_t > outer_table_fragment_ids)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:417
std::shared_ptr< ResultSet > ResultSetPtr
const std::shared_ptr< Analyzer::Estimator > estimator
static const size_t auto_num_threads
Definition: Execute.h:1536
#define CHECK(condition)
Definition: Logger.h:291
bool g_enable_cpu_sub_tasks
Definition: Execute.cpp:89
void setNumAllocatedThreads(size_t num_threads)
int cpu_threads()
Definition: thread_count.h:25
ThreadLocalIds thread_local_ids()
Definition: Logger.cpp:882
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::launchKernelsLocked ( SharedKernelContext shared_context,
std::vector< std::unique_ptr< ExecutionKernel >> &&  kernels,
const ExecutorDeviceType  device_type 
)
private

Definition at line 3123 of file Execute.cpp.

References auto_num_threads, kernel_mutex_, kernel_queue_time_ms_, launchKernelsImpl(), timer_start(), and timer_stop().

Referenced by executeWorkUnitImpl().

3126  {
3127  auto clock_begin = timer_start();
3128  std::lock_guard<std::mutex> kernel_lock(kernel_mutex_);
3129  kernel_queue_time_ms_ += timer_stop(clock_begin);
3130 
3132  shared_context, std::move(kernels), device_type, Executor::auto_num_threads);
3133 }
int64_t kernel_queue_time_ms_
Definition: Execute.h:1562
void launchKernelsImpl(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const size_t requested_num_threads)
Definition: Execute.cpp:3040
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
static std::mutex kernel_mutex_
Definition: Execute.h:1624
static const size_t auto_num_threads
Definition: Execute.h:1536
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::launchKernelsViaResourceMgr ( SharedKernelContext shared_context,
std::vector< std::unique_ptr< ExecutionKernel >> &&  kernels,
const ExecutorDeviceType  device_type,
const std::vector< InputDescriptor > &  input_descs,
const QueryMemoryDescriptor query_mem_desc 
)
private

Launches a vector of kernels for a given query step, gated/scheduled by ExecutorResourceMgr.

This function first calculates the neccessary CPU, GPU, result set memory and buffer pool memory neccessary for the query, which it then requests from ExecutorResourceMgr. The query thread will be conditionally put into a wait state until there are enough resources to execute the query, which might or might not be concurrently with other query steps, depending on the resource grant policies in place and the resources needed by this thread's query step and all other in-flight queries requesting resources. After the thread is given the green light by ExecutorResourceMgr, it then calls launchKernelsImpl which does the actual work of launching the kernels.

Parameters
shared_context- used to obtain InputTableInfo vector (query_infos) used for input chunk calculation
kernels- vector of kernels that will be launched, one per fragment for CPU execution, but can be multi-fragment (one per device) for GPU execution
device_type- specifies whether the query step should run on CPU or GPU
input_descs- neccessary to get the input table and column ids for a query for input chunk calculation
query_mem_desc- neccessary to get result set size per kernel

Definition at line 3135 of file Execute.cpp.

References ExecutorResourceMgr_Namespace::CPU_SLOTS, executor_resource_mgr_, QueryMemoryDescriptor::getBufferSizeBytes(), getChunkRequestInfo(), getExecutorId(), SharedKernelContext::getQueryInfos(), GPU, ExecutorResourceMgr_Namespace::GPU_SLOTS, kernel_queue_time_ms_, launchKernelsImpl(), query_mem_desc, QueryMemoryDescriptor::threadsCanReuseGroupByBuffers(), timer_start(), timer_stop(), and VLOG.

Referenced by executeWorkUnitImpl().

3140  {
3141  // CPU queries in general, plus some GPU queries, i.e. certain types of top-k sorts,
3142  // can generate more kernels than cores/GPU devices, so allow handle this for now
3143  // by capping the number of requested slots from GPU than actual GPUs
3144  const size_t num_kernels = kernels.size();
3145  constexpr bool cap_slots = false;
3146  const size_t num_compute_slots =
3147  cap_slots
3148  ? std::min(num_kernels,
3150  ->get_resource_info(
3151  device_type == ExecutorDeviceType::GPU
3154  .second)
3155  : num_kernels;
3156  const size_t cpu_result_mem_bytes_per_kernel =
3157  query_mem_desc.getBufferSizeBytes(device_type);
3158 
3159  std::vector<std::pair<int32_t, FragmentsList>> kernel_fragments_list;
3160  kernel_fragments_list.reserve(num_kernels);
3161  for (auto& kernel : kernels) {
3162  const auto device_id = kernel->get_chosen_device_id();
3163  const auto frag_list = kernel->get_fragment_list();
3164  if (!frag_list.empty()) {
3165  kernel_fragments_list.emplace_back(std::make_pair(device_id, frag_list));
3166  }
3167  }
3168  const auto chunk_request_info = getChunkRequestInfo(
3169  device_type, input_descs, shared_context.getQueryInfos(), kernel_fragments_list);
3170 
3171  auto gen_resource_request_info = [device_type,
3172  num_compute_slots,
3173  cpu_result_mem_bytes_per_kernel,
3174  &chunk_request_info,
3175  &query_mem_desc]() {
3176  if (device_type == ExecutorDeviceType::GPU) {
3178  device_type,
3179  static_cast<size_t>(0), // priority_level
3180  static_cast<size_t>(0), // cpu_slots
3181  static_cast<size_t>(0), // min_cpu_slots,
3182  num_compute_slots, // gpu_slots
3183  num_compute_slots, // min_gpu_slots
3184  cpu_result_mem_bytes_per_kernel * num_compute_slots, // cpu_result_mem,
3185  cpu_result_mem_bytes_per_kernel * num_compute_slots, // min_cpu_result_mem,
3186  chunk_request_info, // chunks needed
3187  false); // output_buffers_reusable_intra_thrad
3188  } else {
3189  const size_t min_cpu_slots{1};
3190  const size_t min_cpu_result_mem =
3191  query_mem_desc.threadsCanReuseGroupByBuffers()
3192  ? cpu_result_mem_bytes_per_kernel * min_cpu_slots
3193  : cpu_result_mem_bytes_per_kernel * num_compute_slots;
3195  device_type,
3196  static_cast<size_t>(0), // priority_level
3197  num_compute_slots, // cpu_slots
3198  min_cpu_slots, // min_cpu_slots
3199  size_t(0), // gpu_slots
3200  size_t(0), // min_gpu_slots
3201  cpu_result_mem_bytes_per_kernel * num_compute_slots, // cpu_result_mem
3202  min_cpu_result_mem, // min_cpu_result_mem
3203  chunk_request_info, // chunks needed
3204  query_mem_desc
3205  .threadsCanReuseGroupByBuffers()); // output_buffers_reusable_intra_thread
3206  }
3207  };
3208 
3209  const auto resource_request_info = gen_resource_request_info();
3210 
3211  auto clock_begin = timer_start();
3212  const bool is_empty_request =
3213  resource_request_info.cpu_slots == 0UL && resource_request_info.gpu_slots == 0UL;
3214  auto resource_handle =
3215  is_empty_request ? nullptr
3216  : executor_resource_mgr_->request_resources(resource_request_info);
3217  const auto num_cpu_threads =
3218  is_empty_request ? 0UL : resource_handle->get_resource_grant().cpu_slots;
3219  if (device_type == ExecutorDeviceType::GPU) {
3220  const auto num_gpu_slots =
3221  is_empty_request ? 0UL : resource_handle->get_resource_grant().gpu_slots;
3222  VLOG(1) << "In Executor::LaunchKernels executor " << getExecutorId() << " requested "
3223  << "between " << resource_request_info.min_gpu_slots << " and "
3224  << resource_request_info.gpu_slots << " GPU slots, and was granted "
3225  << num_gpu_slots << " GPU slots.";
3226  } else {
3227  VLOG(1) << "In Executor::LaunchKernels executor " << getExecutorId() << " requested "
3228  << "between " << resource_request_info.min_cpu_slots << " and "
3229  << resource_request_info.cpu_slots << " CPU slots, and was granted "
3230  << num_cpu_threads << " CPU slots.";
3231  }
3232  kernel_queue_time_ms_ += timer_stop(clock_begin);
3233  launchKernelsImpl(shared_context, std::move(kernels), device_type, num_cpu_threads);
3234 }
A container to store requested and minimum neccessary resource requests across all resource types cur...
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
int64_t kernel_queue_time_ms_
Definition: Execute.h:1562
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
void launchKernelsImpl(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const size_t requested_num_threads)
Definition: Execute.cpp:3040
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
ExecutorResourceMgr_Namespace::ChunkRequestInfo getChunkRequestInfo(const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos, const std::vector< std::pair< int32_t, FragmentsList >> &device_fragment_lists) const
Determines a unique list of chunks and their associated byte sizes for a given query plan...
Definition: Execute.cpp:877
bool threadsCanReuseGroupByBuffers() const
const std::vector< InputTableInfo > & getQueryInfos() const
ExecutorId getExecutorId() const
Definition: Execute.h:1332
#define VLOG(n)
Definition: Logger.h:388
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::logSystemCPUMemoryStatus ( std::string const &  tag,
size_t const  thread_idx 
) const

Definition at line 765 of file Execute.cpp.

References executor_id_, g_allow_memory_status_log, getDataMgr(), Data_Namespace::DataMgr::getSystemMemoryUsage(), anonymous_namespace{Execute.cpp}::log_system_memory_info_impl(), timer_start(), and timer_stop().

766  {
768  auto timer = timer_start();
769  std::ostringstream oss;
770  oss << getDataMgr()->getSystemMemoryUsage();
772  oss.str(), executor_id_, timer_stop(timer), log_tag, thread_idx);
773  }
774 }
void log_system_memory_info_impl(std::string const &mem_log, size_t executor_id, size_t log_time_ms, std::string const &log_tag, size_t const thread_idx)
Definition: Execute.cpp:752
SystemMemoryUsage getSystemMemoryUsage() const
Definition: DataMgr.cpp:131
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
const ExecutorId executor_id_
Definition: Execute.h:1476
Data_Namespace::DataMgr * getDataMgr() const
Definition: Execute.h:623
bool g_allow_memory_status_log
Definition: Execute.cpp:200
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

void Executor::logSystemGPUMemoryStatus ( std::string const &  tag,
size_t const  thread_idx 
) const

Definition at line 776 of file Execute.cpp.

References executor_id_, g_allow_memory_status_log, Data_Namespace::DataMgr::getCudaMgr(), getDataMgr(), anonymous_namespace{Execute.cpp}::log_system_memory_info_impl(), timer_start(), and timer_stop().

777  {
778 #ifdef HAVE_CUDA
779  if (g_allow_memory_status_log && getDataMgr() && getDataMgr()->gpusPresent() &&
780  getDataMgr()->getCudaMgr()) {
781  auto timer = timer_start();
782  auto mem_log = getDataMgr()->getCudaMgr()->getCudaMemoryUsageInString();
784  mem_log, executor_id_, timer_stop(timer), log_tag, thread_idx);
785  }
786 #endif
787 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:177
void log_system_memory_info_impl(std::string const &mem_log, size_t executor_id, size_t log_time_ms, std::string const &log_tag, size_t const thread_idx)
Definition: Execute.cpp:752
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
const ExecutorId executor_id_
Definition: Execute.h:1476
Data_Namespace::DataMgr * getDataMgr() const
Definition: Execute.h:623
bool g_allow_memory_status_log
Definition: Execute.cpp:200
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

size_t Executor::maxGpuSlabSize ( ) const

Definition at line 4392 of file Execute.cpp.

References max_gpu_slab_size_.

4392  {
4393  return max_gpu_slab_size_;
4394 }
const size_t max_gpu_slab_size_
Definition: Execute.h:1554
bool Executor::needFetchAllFragments ( const InputColDescriptor col_desc,
const RelAlgExecutionUnit ra_exe_unit,
const FragmentsList selected_fragments 
) const
private

Definition at line 3416 of file Execute.cpp.

References CHECK_EQ, CHECK_LT, InputDescriptor::getNestLevel(), InputColDescriptor::getScanDesc(), InputDescriptor::getSourceType(), InputDescriptor::getTableKey(), RelAlgExecutionUnit::input_descs, RelAlgExecutionUnit::join_quals, plan_state_, and TABLE.

Referenced by fetchChunks(), and fetchUnionChunks().

3418  {
3419  const auto& input_descs = ra_exe_unit.input_descs;
3420  const int nest_level = inner_col_desc.getScanDesc().getNestLevel();
3421  if (nest_level < 1 ||
3422  inner_col_desc.getScanDesc().getSourceType() != InputSourceType::TABLE ||
3423  ra_exe_unit.join_quals.empty() || input_descs.size() < 2 ||
3424  (ra_exe_unit.join_quals.empty() &&
3425  plan_state_->isLazyFetchColumn(inner_col_desc))) {
3426  return false;
3427  }
3428  const auto& table_key = inner_col_desc.getScanDesc().getTableKey();
3429  CHECK_LT(static_cast<size_t>(nest_level), selected_fragments.size());
3430  CHECK_EQ(table_key, selected_fragments[nest_level].table_key);
3431  const auto& fragments = selected_fragments[nest_level].fragment_ids;
3432  return fragments.size() > 1;
3433 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< InputDescriptor > input_descs
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define CHECK_LT(x, y)
Definition: Logger.h:303

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::needLinearizeAllFragments ( const ColumnDescriptor cd,
const InputColDescriptor inner_col_desc,
const RelAlgExecutionUnit ra_exe_unit,
const FragmentsList selected_fragments,
const Data_Namespace::MemoryLevel  memory_level 
) const
private

Definition at line 3435 of file Execute.cpp.

References CHECK_EQ, CHECK_LT, ColumnDescriptor::columnType, InputDescriptor::getNestLevel(), InputColDescriptor::getScanDesc(), InputDescriptor::getTableKey(), SQLTypeInfo::is_array(), SQLTypeInfo::is_dict_encoded_type(), and SQLTypeInfo::is_string().

Referenced by fetchChunks().

3440  {
3441  const int nest_level = inner_col_desc.getScanDesc().getNestLevel();
3442  const auto& table_key = inner_col_desc.getScanDesc().getTableKey();
3443  CHECK_LT(static_cast<size_t>(nest_level), selected_fragments.size());
3444  CHECK_EQ(table_key, selected_fragments[nest_level].table_key);
3445  const auto& fragments = selected_fragments[nest_level].fragment_ids;
3446  auto need_linearize =
3447  cd->columnType.is_array() ||
3449  return table_key.table_id > 0 && need_linearize && fragments.size() > 1;
3450 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
int32_t getNestLevel() const
bool is_dict_encoded_type() const
Definition: sqltypes.h:655
const shared::TableKey & getTableKey() const
#define CHECK_LT(x, y)
Definition: Logger.h:303
SQLTypeInfo columnType
bool is_string() const
Definition: sqltypes.h:561
const InputDescriptor & getScanDesc() const
bool is_array() const
Definition: sqltypes.h:585

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static void Executor::nukeCacheOfExecutors ( )
inlinestatic

Definition at line 505 of file Execute.h.

References execute_mutex_, executors_, and executors_cache_mutex_.

505  {
507  execute_mutex_); // don't want native code to vanish while executing
509  executors_.clear();
510  }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1585
std::unique_lock< T > unique_lock
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1581
static heavyai::shared_mutex executors_cache_mutex_
Definition: Execute.h:1602
void Executor::nukeOldState ( const bool  allow_lazy_fetch,
const std::vector< InputTableInfo > &  query_infos,
const PlanState::DeletedColumnsMap deleted_cols_map,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 4268 of file Execute.cpp.

References cgen_state_, compilation_queue_time_ms_, RelAlgExecutionUnit::join_quals, kernel_queue_time_ms_, LEFT, and plan_state_.

4271  {
4274  const bool contains_left_deep_outer_join =
4275  ra_exe_unit && std::find_if(ra_exe_unit->join_quals.begin(),
4276  ra_exe_unit->join_quals.end(),
4277  [](const JoinCondition& join_condition) {
4278  return join_condition.type == JoinType::LEFT;
4279  }) != ra_exe_unit->join_quals.end();
4280  cgen_state_.reset(
4281  new CgenState(query_infos.size(), contains_left_deep_outer_join, this));
4282  plan_state_.reset(new PlanState(allow_lazy_fetch && !contains_left_deep_outer_join,
4283  query_infos,
4284  deleted_cols_map,
4285  this));
4286 }
int64_t kernel_queue_time_ms_
Definition: Execute.h:1562
int64_t compilation_queue_time_ms_
Definition: Execute.h:1563
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
unsigned Executor::numBlocksPerMP ( ) const

Definition at line 4361 of file Execute.cpp.

References shared::ceil_div(), cudaMgr(), and grid_size_x_.

4361  {
4362  return std::max((unsigned)2,
4363  shared::ceil_div(grid_size_x_, cudaMgr()->getMinNumMPsForAllDevices()));
4364 }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:865
unsigned ceil_div(unsigned const dividend, unsigned const divisor)
Definition: misc.h:338
unsigned grid_size_x_
Definition: Execute.h:1553

+ Here is the call graph for this function:

std::shared_ptr< CompilationContext > Executor::optimizeAndCodegenCPU ( llvm::Function *  query_func,
llvm::Function *  multifrag_query_func,
const std::unordered_set< llvm::Function * > &  live_funcs,
const CompilationOptions co 
)
private

Definition at line 487 of file NativeCodegen.cpp.

References QueryEngine::getInstance(), logger::INFO, CodeGenerator::link_udf_module(), LOG, serialize_llvm_object(), and to_string().

491  {
492  CodeCacheKey key{serialize_llvm_object(query_func),
493  serialize_llvm_object(cgen_state_->row_func_)};
494 
495  llvm::Module* M = query_func->getParent();
496  auto* flag = llvm::mdconst::extract_or_null<llvm::ConstantInt>(
497  M->getModuleFlag("manage_memory_buffer"));
498  if (flag and flag->getZExtValue() == 1 and M->getFunction("allocate_varlen_buffer") and
499  M->getFunction("register_buffer_with_executor_rsm")) {
500  LOG(INFO) << "including executor addr to cache key\n";
501  key.push_back(std::to_string(reinterpret_cast<int64_t>(this)));
502  }
503  if (cgen_state_->filter_func_) {
504  key.push_back(serialize_llvm_object(cgen_state_->filter_func_));
505  }
506  for (const auto helper : cgen_state_->helper_functions_) {
507  key.push_back(serialize_llvm_object(helper));
508  }
509  auto cached_code = QueryEngine::getInstance()->cpu_code_accessor->get_value(key);
510  if (cached_code) {
511  return cached_code;
512  }
513 
514  if (cgen_state_->needs_geos_) {
515 #ifdef ENABLE_GEOS
516  auto llvm_module = multifrag_query_func->getParent();
517  load_geos_dynamic_library();
518 
519  // Read geos runtime module and bind GEOS API function references to GEOS library
520  auto rt_geos_module_copy = llvm::CloneModule(
521  *get_geos_module(), cgen_state_->vmap_, [](const llvm::GlobalValue* gv) {
522  auto func = llvm::dyn_cast<llvm::Function>(gv);
523  if (!func) {
524  return true;
525  }
526  switch (func->getLinkage()) {
527  case llvm::GlobalValue::LinkageTypes::InternalLinkage:
528  case llvm::GlobalValue::LinkageTypes::PrivateLinkage:
529  case llvm::GlobalValue::LinkageTypes::ExternalLinkage:
530  case llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage:
531  return true;
532  default:
533  return false;
534  }
535  });
536  CodeGenerator::link_udf_module(rt_geos_module_copy,
537  *llvm_module,
538  cgen_state_.get(),
539  llvm::Linker::Flags::LinkOnlyNeeded);
540 #else
541  throw std::runtime_error("GEOS is disabled in this build");
542 #endif
543  }
544 
545  auto execution_engine =
546  CodeGenerator::generateNativeCPUCode(query_func, live_funcs, co);
547  auto cpu_compilation_context =
548  std::make_shared<CpuCompilationContext>(std::move(execution_engine));
549  cpu_compilation_context->setFunctionPointer(multifrag_query_func);
550  QueryEngine::getInstance()->cpu_code_accessor->put(key, cpu_compilation_context);
551  return std::dynamic_pointer_cast<CompilationContext>(cpu_compilation_context);
552 }
const std::unique_ptr< llvm::Module > & get_geos_module() const
Definition: Execute.h:545
#define LOG(tag)
Definition: Logger.h:285
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:24
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
std::string to_string(char const *&&v)
static void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
std::string serialize_llvm_object(const T *llvm_obj)
static std::shared_ptr< QueryEngine > getInstance()
Definition: QueryEngine.h:89

+ Here is the call graph for this function:

std::shared_ptr< CompilationContext > Executor::optimizeAndCodegenGPU ( llvm::Function *  query_func,
llvm::Function *  multifrag_query_func,
std::unordered_set< llvm::Function * > &  live_funcs,
const bool  no_inline,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  is_gpu_smem_used,
const CompilationOptions co 
)
private

Definition at line 1395 of file NativeCodegen.cpp.

1402  {
1403 #ifdef HAVE_CUDA
1404  auto timer = DEBUG_TIMER(__func__);
1405 
1406  CHECK(cuda_mgr);
1407  CodeCacheKey key{serialize_llvm_object(query_func),
1408  serialize_llvm_object(cgen_state_->row_func_)};
1409  if (cgen_state_->filter_func_) {
1410  key.push_back(serialize_llvm_object(cgen_state_->filter_func_));
1411  }
1412  for (const auto helper : cgen_state_->helper_functions_) {
1413  key.push_back(serialize_llvm_object(helper));
1414  }
1415  auto cached_code = QueryEngine::getInstance()->gpu_code_accessor->get_value(key);
1416  if (cached_code) {
1417  return cached_code;
1418  }
1419 
1420  bool row_func_not_inlined = false;
1421  if (no_inline) {
1422  for (auto it = llvm::inst_begin(cgen_state_->row_func_),
1423  e = llvm::inst_end(cgen_state_->row_func_);
1424  it != e;
1425  ++it) {
1426  if (llvm::isa<llvm::CallInst>(*it)) {
1427  auto& get_gv_call = llvm::cast<llvm::CallInst>(*it);
1428  auto const func_name = CodegenUtil::getCalledFunctionName(get_gv_call);
1429  if (func_name &&
1430  (*func_name == "array_size" || *func_name == "linear_probabilistic_count")) {
1432  row_func_not_inlined = true;
1433  break;
1434  }
1435  }
1436  }
1437  }
1438 
1440  CodeGenerator::GPUTarget gpu_target{
1441  nvptx_target_machine_.get(), cuda_mgr, cgen_state_.get(), row_func_not_inlined};
1442  std::shared_ptr<GpuCompilationContext> compilation_context;
1443 
1444  try {
1445  compilation_context = CodeGenerator::generateNativeGPUCode(this,
1446  query_func,
1447  multifrag_query_func,
1448  live_funcs,
1449  is_gpu_smem_used,
1450  co,
1451  gpu_target);
1452  } catch (CudaMgr_Namespace::CudaErrorException& cuda_error) {
1453  if (cuda_error.getStatus() == CUDA_ERROR_OUT_OF_MEMORY) {
1454  // Thrown if memory not able to be allocated on gpu
1455  // Retry once after evicting portion of code cache
1456  auto& code_cache_accessor = QueryEngine::getInstance()->gpu_code_accessor;
1457  auto const num_entries_to_evict =
1458  code_cache_accessor->computeNumEntriesToEvict(g_fraction_code_cache_to_evict);
1459  code_cache_accessor->evictEntries(num_entries_to_evict);
1460  compilation_context = CodeGenerator::generateNativeGPUCode(this,
1461  query_func,
1462  multifrag_query_func,
1463  live_funcs,
1464  is_gpu_smem_used,
1465  co,
1466  gpu_target);
1467  } else {
1468  throw;
1469  }
1470  }
1471  QueryEngine::getInstance()->gpu_code_accessor->put(key, compilation_context);
1472  return std::dynamic_pointer_cast<CompilationContext>(compilation_context);
1473 #else
1474  return nullptr;
1475 #endif
1476 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:1547
void mark_function_never_inline(llvm::Function *func)
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:24
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
void initializeNVPTXBackend() const
std::string serialize_llvm_object(const T *llvm_obj)
static std::shared_ptr< GpuCompilationContext > generateNativeGPUCode(Executor *executor, llvm::Function *func, llvm::Function *wrapper_func, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co, const GPUTarget &gpu_target)
float g_fraction_code_cache_to_evict
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
static std::shared_ptr< QueryEngine > getInstance()
Definition: QueryEngine.h:89
void Executor::pause_executor_queue ( )
static

Definition at line 5420 of file Execute.cpp.

References executor_resource_mgr_, and g_enable_executor_resource_mgr.

Referenced by anonymous_namespace{DBHandler.cpp}::pause_and_resume_executor_queue(), and DBHandler::pause_executor_queue().

5420  {
5422  throw std::runtime_error(
5423  "Executor queue cannot be paused as it requires Executor Resource Manager to be "
5424  "enabled");
5425  }
5426  executor_resource_mgr_->pause_process_queue();
5427 }
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178

+ Here is the caller graph for this function:

void Executor::preloadFragOffsets ( const std::vector< InputDescriptor > &  input_descs,
const std::vector< InputTableInfo > &  query_infos 
)
private

Definition at line 4288 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, CHECK_LT, and get_arg_by_name().

4289  {
4291  const auto ld_count = input_descs.size();
4292  auto frag_off_ptr = get_arg_by_name(cgen_state_->row_func_, "frag_row_off");
4293  for (size_t i = 0; i < ld_count; ++i) {
4294  CHECK_LT(i, query_infos.size());
4295  const auto frag_count = query_infos[i].info.fragments.size();
4296  if (i > 0) {
4297  cgen_state_->frag_offsets_.push_back(nullptr);
4298  } else {
4299  if (frag_count > 1) {
4300  cgen_state_->frag_offsets_.push_back(cgen_state_->ir_builder_.CreateLoad(
4301  frag_off_ptr->getType()->getPointerElementType(), frag_off_ptr));
4302  } else {
4303  cgen_state_->frag_offsets_.push_back(nullptr);
4304  }
4305  }
4306  }
4307 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:303

+ Here is the call graph for this function:

std::vector< llvm::Value * > Executor::prepareRangeModeFuncArgs ( bool  for_start_bound,
const Analyzer::WindowFrame frame_bound,
bool  is_timestamp_type_frame,
llvm::Value *  order_key_null_val,
const WindowFrameBoundFuncArgs frame_args 
) const
private

Definition at line 875 of file WindowFunctionIR.cpp.

References WindowFrameBoundFuncArgs::current_col_value_lv, WindowFrameBoundFuncArgs::frame_end_bound_expr_lv, WindowFrameBoundFuncArgs::frame_start_bound_expr_lv, WindowFrameBoundFuncArgs::int64_t_zero_val_lv, Analyzer::WindowFrame::isCurrentRowBound(), WindowFrameBoundFuncArgs::null_end_pos_lv, WindowFrameBoundFuncArgs::null_start_pos_lv, WindowFrameBoundFuncArgs::nulls_first_lv, WindowFrameBoundFuncArgs::num_elem_current_partition_lv, WindowFrameBoundFuncArgs::order_key_buf_ptr_lv, WindowFrameBoundFuncArgs::target_partition_rowid_ptr_lv, and WindowFrameBoundFuncArgs::target_partition_sorted_rowid_ptr_lv.

880  {
881  llvm::Value* bound_expr_lv =
882  for_start_bound ? args.frame_start_bound_expr_lv : args.frame_end_bound_expr_lv;
883  llvm::Value* target_val_lv =
884  frame_bound->isCurrentRowBound() || !is_timestamp_type_frame
885  ? args.current_col_value_lv
886  : bound_expr_lv;
887  llvm::Value* frame_bound_val_lv =
888  frame_bound->isCurrentRowBound() || is_timestamp_type_frame
889  ? args.int64_t_zero_val_lv
890  : bound_expr_lv;
891  std::vector<llvm::Value*> frame_args{args.num_elem_current_partition_lv,
892  target_val_lv,
893  args.order_key_buf_ptr_lv,
894  args.target_partition_rowid_ptr_lv,
895  args.target_partition_sorted_rowid_ptr_lv,
896  frame_bound_val_lv,
897  order_key_null_val,
898  args.nulls_first_lv,
899  args.null_start_pos_lv,
900  args.null_end_pos_lv};
901  return frame_args;
902 }
bool isCurrentRowBound() const
Definition: Analyzer.h:2841

+ Here is the call graph for this function:

std::vector< llvm::Value * > Executor::prepareRowModeFuncArgs ( bool  for_start_bound,
SqlWindowFrameBoundType  bound_type,
const WindowFrameBoundFuncArgs args 
) const
private

Definition at line 857 of file WindowFunctionIR.cpp.

References WindowFrameBoundFuncArgs::current_partition_start_offset_lv, CURRENT_ROW, WindowFrameBoundFuncArgs::current_row_pos_lv, EXPR_FOLLOWING, WindowFrameBoundFuncArgs::frame_end_bound_expr_lv, WindowFrameBoundFuncArgs::frame_start_bound_expr_lv, WindowFrameBoundFuncArgs::int64_t_zero_val_lv, and WindowFrameBoundFuncArgs::num_elem_current_partition_lv.

860  {
861  std::vector<llvm::Value*> frame_args{args.current_row_pos_lv,
863  if (bound_type == SqlWindowFrameBoundType::CURRENT_ROW) {
864  frame_args.push_back(args.int64_t_zero_val_lv);
865  } else {
866  frame_args.push_back(for_start_bound ? args.frame_start_bound_expr_lv
867  : args.frame_end_bound_expr_lv);
868  if (bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING) {
869  frame_args.push_back(args.num_elem_current_partition_lv);
870  }
871  }
872  return frame_args;
873 }
llvm::Value * num_elem_current_partition_lv
llvm::Value * current_row_pos_lv
llvm::Value * frame_end_bound_expr_lv
llvm::Value * current_partition_start_offset_lv
llvm::Value * int64_t_zero_val_lv
llvm::Value * frame_start_bound_expr_lv
void Executor::redeclareFilterFunction ( )
private

Definition at line 1087 of file IRCodegen.cpp.

References CHECK, CHECK_EQ, get_int_type(), and to_string().

1087  {
1088  if (!cgen_state_->filter_func_) {
1089  return;
1090  }
1091 
1092  // Loop over all the instructions used in the filter func.
1093  // The filter func instructions were generated as if for row func.
1094  // Remap any values used by those instructions to filter func args
1095  // and remember to forward them through the call in the row func.
1096  for (auto bb_it = cgen_state_->filter_func_->begin();
1097  bb_it != cgen_state_->filter_func_->end();
1098  ++bb_it) {
1099  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
1100  size_t i = 0;
1101  for (auto op_it = instr_it->value_op_begin(); op_it != instr_it->value_op_end();
1102  ++op_it, ++i) {
1103  llvm::Value* v = *op_it;
1104 
1105  // The last LLVM operand on a call instruction is the function to be called. Never
1106  // remap it.
1107  if (llvm::dyn_cast<const llvm::CallInst>(instr_it) &&
1108  op_it == instr_it->value_op_end() - 1) {
1109  continue;
1110  }
1111 
1112  CHECK(v);
1113  if (auto* instr = llvm::dyn_cast<llvm::Instruction>(v);
1114  instr && instr->getParent() &&
1115  instr->getParent()->getParent() == cgen_state_->row_func_) {
1116  // Remember that this filter func arg is needed.
1117  cgen_state_->filter_func_args_[v] = nullptr;
1118  } else if (auto* argum = llvm::dyn_cast<llvm::Argument>(v);
1119  argum && argum->getParent() == cgen_state_->row_func_) {
1120  // Remember that this filter func arg is needed.
1121  cgen_state_->filter_func_args_[v] = nullptr;
1122  }
1123  }
1124  }
1125  }
1126 
1127  // Create filter_func2 with parameters only for those row func values that are known to
1128  // be used in the filter func code.
1129  std::vector<llvm::Type*> filter_func_arg_types;
1130  filter_func_arg_types.reserve(cgen_state_->filter_func_args_.v_.size());
1131  for (auto& arg : cgen_state_->filter_func_args_.v_) {
1132  filter_func_arg_types.push_back(arg->getType());
1133  }
1134  auto ft = llvm::FunctionType::get(
1135  get_int_type(32, cgen_state_->context_), filter_func_arg_types, false);
1136  cgen_state_->filter_func_->setName("old_filter_func");
1137  auto filter_func2 = llvm::Function::Create(ft,
1138  llvm::Function::ExternalLinkage,
1139  "filter_func",
1140  cgen_state_->filter_func_->getParent());
1141  CHECK_EQ(filter_func2->arg_size(), cgen_state_->filter_func_args_.v_.size());
1142  auto arg_it = cgen_state_->filter_func_args_.begin();
1143  size_t i = 0;
1144  for (llvm::Function::arg_iterator I = filter_func2->arg_begin(),
1145  E = filter_func2->arg_end();
1146  I != E;
1147  ++I, ++arg_it) {
1148  arg_it->second = &*I;
1149  if (arg_it->first->hasName()) {
1150  I->setName(arg_it->first->getName());
1151  } else {
1152  I->setName("extra" + std::to_string(i++));
1153  }
1154  }
1155 
1156  // copy the filter_func function body over
1157  // see
1158  // https://stackoverflow.com/questions/12864106/move-function-body-avoiding-full-cloning/18751365
1159  filter_func2->getBasicBlockList().splice(
1160  filter_func2->begin(), cgen_state_->filter_func_->getBasicBlockList());
1161 
1162  if (cgen_state_->current_func_ == cgen_state_->filter_func_) {
1163  cgen_state_->current_func_ = filter_func2;
1164  }
1165  cgen_state_->filter_func_ = filter_func2;
1166 
1167  // loop over all the operands in the filter func
1168  for (auto bb_it = cgen_state_->filter_func_->begin();
1169  bb_it != cgen_state_->filter_func_->end();
1170  ++bb_it) {
1171  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
1172  size_t i = 0;
1173  for (auto op_it = instr_it->op_begin(); op_it != instr_it->op_end(); ++op_it, ++i) {
1174  llvm::Value* v = op_it->get();
1175  if (auto arg_it = cgen_state_->filter_func_args_.find(v);
1176  arg_it != cgen_state_->filter_func_args_.end()) {
1177  // replace row func value with a filter func arg
1178  llvm::Use* use = &*op_it;
1179  use->set(arg_it->second);
1180  }
1181  }
1182  }
1183  }
1184 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

ResultSetPtr Executor::reduceMultiDeviceResults ( const RelAlgExecutionUnit ra_exe_unit,
std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &  all_fragment_results,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const QueryMemoryDescriptor query_mem_desc 
) const
private

Definition at line 1589 of file Execute.cpp.

References blockSize(), CPU, DEBUG_TIMER, RelAlgExecutionUnit::estimator, ResultSet::fixupQueryMemoryDescriptor(), getUniqueThreadSharedResultSets(), gridSize(), QueryMemoryDescriptor, reduce_estimator_results(), reduceMultiDeviceResultSets(), RelAlgExecutionUnit::target_exprs, and QueryMemoryDescriptor::threadsCanReuseGroupByBuffers().

Referenced by collectAllDeviceResults().

1593  {
1594  auto timer = DEBUG_TIMER(__func__);
1595  if (ra_exe_unit.estimator) {
1596  return reduce_estimator_results(ra_exe_unit, results_per_device);
1597  }
1598 
1599  if (results_per_device.empty()) {
1600  auto const targets = shared::transform<std::vector<TargetInfo>>(
1601  ra_exe_unit.target_exprs, GetTargetInfo{});
1602  return std::make_shared<ResultSet>(targets,
1605  nullptr,
1606  blockSize(),
1607  gridSize());
1608  }
1609 
1610  if (query_mem_desc.threadsCanReuseGroupByBuffers()) {
1611  auto unique_results = getUniqueThreadSharedResultSets(results_per_device);
1613  unique_results,
1614  row_set_mem_owner,
1615  ResultSet::fixupQueryMemoryDescriptor(query_mem_desc));
1616  }
1618  results_per_device,
1619  row_set_mem_owner,
1620  ResultSet::fixupQueryMemoryDescriptor(query_mem_desc));
1621 }
std::vector< Analyzer::Expr * > target_exprs
bool threadsCanReuseGroupByBuffers() const
friend class QueryMemoryDescriptor
Definition: Execute.h:1641
const std::shared_ptr< Analyzer::Estimator > estimator
unsigned gridSize() const
Definition: Execute.cpp:4352
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:766
ResultSetPtr reduceMultiDeviceResultSets(std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1664
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > getUniqueThreadSharedResultSets(const std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device) const
Definition: Execute.cpp:1624
#define DEBUG_TIMER(name)
Definition: Logger.h:412
ResultSetPtr reduce_estimator_results(const RelAlgExecutionUnit &ra_exe_unit, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device)
unsigned blockSize() const
Definition: Execute.cpp:4366

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::reduceMultiDeviceResultSets ( std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &  all_fragment_results,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const QueryMemoryDescriptor query_mem_desc 
) const
private

Definition at line 1664 of file Execute.cpp.

References gpu_enabled::accumulate(), blockSize(), CHECK, CPU, DEBUG_TIMER, executor_id_, anonymous_namespace{Execute.cpp}::get_reduction_code(), QueryMemoryDescriptor::getQueryDescriptionType(), gridSize(), heavyai::GroupByBaselineHash, logger::init(), plan_state_, query_mem_desc, and QueryMemoryDescriptor::setEntryCount().

Referenced by reduceMultiDeviceResults().

1667  {
1668  auto timer = DEBUG_TIMER(__func__);
1669  std::shared_ptr<ResultSet> reduced_results;
1670 
1671  const auto& first = results_per_device.front().first;
1672 
1673  if (query_mem_desc.getQueryDescriptionType() ==
1675  results_per_device.size() > 1) {
1676  const auto total_entry_count = std::accumulate(
1677  results_per_device.begin(),
1678  results_per_device.end(),
1679  size_t(0),
1680  [](const size_t init, const std::pair<ResultSetPtr, std::vector<size_t>>& rs) {
1681  const auto& r = rs.first;
1682  return init + r->getQueryMemDesc().getEntryCount();
1683  });
1684  CHECK(total_entry_count);
1685  auto query_mem_desc = first->getQueryMemDesc();
1686  query_mem_desc.setEntryCount(total_entry_count);
1687  reduced_results = std::make_shared<ResultSet>(first->getTargetInfos(),
1690  row_set_mem_owner,
1691  blockSize(),
1692  gridSize());
1693  auto result_storage = reduced_results->allocateStorage(plan_state_->init_agg_vals_);
1694  reduced_results->initializeStorage();
1695  switch (query_mem_desc.getEffectiveKeyWidth()) {
1696  case 4:
1697  first->getStorage()->moveEntriesToBuffer<int32_t>(
1698  result_storage->getUnderlyingBuffer(), query_mem_desc.getEntryCount());
1699  break;
1700  case 8:
1701  first->getStorage()->moveEntriesToBuffer<int64_t>(
1702  result_storage->getUnderlyingBuffer(), query_mem_desc.getEntryCount());
1703  break;
1704  default:
1705  CHECK(false);
1706  }
1707  } else {
1708  reduced_results = first;
1709  }
1710 
1711  int64_t compilation_queue_time = 0;
1712  const auto reduction_code =
1713  get_reduction_code(executor_id_, results_per_device, &compilation_queue_time);
1714 
1715  for (size_t i = 1; i < results_per_device.size(); ++i) {
1716  reduced_results->getStorage()->reduce(
1717  *(results_per_device[i].first->getStorage()), {}, reduction_code, executor_id_);
1718  }
1719  reduced_results->addCompilationQueueTime(compilation_queue_time);
1720  reduced_results->invalidateCachedRowCount();
1721  return reduced_results;
1722 }
void setEntryCount(const size_t val)
const ExecutorId executor_id_
Definition: Execute.h:1476
void init(LogOptions const &log_opts)
Definition: Logger.cpp:364
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
QueryDescriptionType getQueryDescriptionType() const
ReductionCode get_reduction_code(const size_t executor_id, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device, int64_t *compilation_queue_time)
Definition: Execute.cpp:1647
unsigned gridSize() const
Definition: Execute.cpp:4352
GroupByBaselineHash
Definition: enums.h:58
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
unsigned blockSize() const
Definition: Execute.cpp:4366

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< int64_t, int32_t > Executor::reduceResults ( const SQLAgg  agg,
const SQLTypeInfo ti,
const int64_t  agg_init_val,
const int8_t  out_byte_width,
const int64_t *  out_vec,
const size_t  out_vec_sz,
const bool  is_group_by,
const bool  float_argument_input 
)
static

Definition at line 1337 of file Execute.cpp.

References agg_max_double_skip_val(), agg_max_float_skip_val(), agg_max_skip_val(), agg_min_double_skip_val(), agg_min_float_skip_val(), agg_min_skip_val(), agg_sum_double_skip_val(), agg_sum_float_skip_val(), agg_sum_skip_val(), CHECK, float_to_double_bin(), SQLTypeInfo::get_notnull(), SQLTypeInfo::is_boolean(), SQLTypeInfo::is_decimal(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_integer(), SQLTypeInfo::is_time(), kAVG, kCOUNT, kCOUNT_IF, kMAX, kMIN, kSAMPLE, kSINGLE_VALUE, kSUM, kSUM_IF, and UNREACHABLE.

Referenced by executePlanWithoutGroupBy().

1344  {
1345  switch (agg) {
1346  case kAVG:
1347  case kSUM:
1348  case kSUM_IF:
1349  if (0 != agg_init_val) {
1350  if (ti.is_integer() || ti.is_decimal() || ti.is_time() || ti.is_boolean()) {
1351  int64_t agg_result = agg_init_val;
1352  for (size_t i = 0; i < out_vec_sz; ++i) {
1353  agg_sum_skip_val(&agg_result, out_vec[i], agg_init_val);
1354  }
1355  return {agg_result, 0};
1356  } else {
1357  CHECK(ti.is_fp());
1358  switch (out_byte_width) {
1359  case 4: {
1360  int agg_result = static_cast<int32_t>(agg_init_val);
1361  for (size_t i = 0; i < out_vec_sz; ++i) {
1363  &agg_result,
1364  *reinterpret_cast<const float*>(may_alias_ptr(&out_vec[i])),
1365  *reinterpret_cast<const float*>(may_alias_ptr(&agg_init_val)));
1366  }
1367  const int64_t converted_bin =
1368  float_argument_input
1369  ? static_cast<int64_t>(agg_result)
1370  : float_to_double_bin(static_cast<int32_t>(agg_result), true);
1371  return {converted_bin, 0};
1372  break;
1373  }
1374  case 8: {
1375  int64_t agg_result = agg_init_val;
1376  for (size_t i = 0; i < out_vec_sz; ++i) {
1378  &agg_result,
1379  *reinterpret_cast<const double*>(may_alias_ptr(&out_vec[i])),
1380  *reinterpret_cast<const double*>(may_alias_ptr(&agg_init_val)));
1381  }
1382  return {agg_result, 0};
1383  break;
1384  }
1385  default:
1386  CHECK(false);
1387  }
1388  }
1389  }
1390  if (ti.is_integer() || ti.is_decimal() || ti.is_time()) {
1391  int64_t agg_result = 0;
1392  for (size_t i = 0; i < out_vec_sz; ++i) {
1393  agg_result += out_vec[i];
1394  }
1395  return {agg_result, 0};
1396  } else {
1397  CHECK(ti.is_fp());
1398  switch (out_byte_width) {
1399  case 4: {
1400  float r = 0.;
1401  for (size_t i = 0; i < out_vec_sz; ++i) {
1402  r += *reinterpret_cast<const float*>(may_alias_ptr(&out_vec[i]));
1403  }
1404  const auto float_bin = *reinterpret_cast<const int32_t*>(may_alias_ptr(&r));
1405  const int64_t converted_bin =
1406  float_argument_input ? float_bin : float_to_double_bin(float_bin, true);
1407  return {converted_bin, 0};
1408  }
1409  case 8: {
1410  double r = 0.;
1411  for (size_t i = 0; i < out_vec_sz; ++i) {
1412  r += *reinterpret_cast<const double*>(may_alias_ptr(&out_vec[i]));
1413  }
1414  return {*reinterpret_cast<const int64_t*>(may_alias_ptr(&r)), 0};
1415  }
1416  default:
1417  CHECK(false);
1418  }
1419  }
1420  break;
1421  case kCOUNT:
1422  case kCOUNT_IF: {
1423  uint64_t agg_result = 0;
1424  for (size_t i = 0; i < out_vec_sz; ++i) {
1425  const uint64_t out = static_cast<uint64_t>(out_vec[i]);
1426  agg_result += out;
1427  }
1428  return {static_cast<int64_t>(agg_result), 0};
1429  }
1430  case kMIN: {
1431  if (ti.is_integer() || ti.is_decimal() || ti.is_time() || ti.is_boolean()) {
1432  int64_t agg_result = agg_init_val;
1433  for (size_t i = 0; i < out_vec_sz; ++i) {
1434  agg_min_skip_val(&agg_result, out_vec[i], agg_init_val);
1435  }
1436  return {agg_result, 0};
1437  } else {
1438  switch (out_byte_width) {
1439  case 4: {
1440  int32_t agg_result = static_cast<int32_t>(agg_init_val);
1441  for (size_t i = 0; i < out_vec_sz; ++i) {
1443  &agg_result,
1444  *reinterpret_cast<const float*>(may_alias_ptr(&out_vec[i])),
1445  *reinterpret_cast<const float*>(may_alias_ptr(&agg_init_val)));
1446  }
1447  const int64_t converted_bin =
1448  float_argument_input
1449  ? static_cast<int64_t>(agg_result)
1450  : float_to_double_bin(static_cast<int32_t>(agg_result), true);
1451  return {converted_bin, 0};
1452  }
1453  case 8: {
1454  int64_t agg_result = agg_init_val;
1455  for (size_t i = 0; i < out_vec_sz; ++i) {
1457  &agg_result,
1458  *reinterpret_cast<const double*>(may_alias_ptr(&out_vec[i])),
1459  *reinterpret_cast<const double*>(may_alias_ptr(&agg_init_val)));
1460  }
1461  return {agg_result, 0};
1462  }
1463  default:
1464  CHECK(false);
1465  }
1466  }
1467  }
1468  case kMAX:
1469  if (ti.is_integer() || ti.is_decimal() || ti.is_time() || ti.is_boolean()) {
1470  int64_t agg_result = agg_init_val;
1471  for (size_t i = 0; i < out_vec_sz; ++i) {
1472  agg_max_skip_val(&agg_result, out_vec[i], agg_init_val);
1473  }
1474  return {agg_result, 0};
1475  } else {
1476  switch (out_byte_width) {
1477  case 4: {
1478  int32_t agg_result = static_cast<int32_t>(agg_init_val);
1479  for (size_t i = 0; i < out_vec_sz; ++i) {
1481  &agg_result,
1482  *reinterpret_cast<const float*>(may_alias_ptr(&out_vec[i])),
1483  *reinterpret_cast<const float*>(may_alias_ptr(&agg_init_val)));
1484  }
1485  const int64_t converted_bin =
1486  float_argument_input ? static_cast<int64_t>(agg_result)
1487  : float_to_double_bin(agg_result, !ti.get_notnull());
1488  return {converted_bin, 0};
1489  }
1490  case 8: {
1491  int64_t agg_result = agg_init_val;
1492  for (size_t i = 0; i < out_vec_sz; ++i) {
1494  &agg_result,
1495  *reinterpret_cast<const double*>(may_alias_ptr(&out_vec[i])),
1496  *reinterpret_cast<const double*>(may_alias_ptr(&agg_init_val)));
1497  }
1498  return {agg_result, 0};
1499  }
1500  default:
1501  CHECK(false);
1502  }
1503  }
1504  case kSINGLE_VALUE: {
1505  int64_t agg_result = agg_init_val;
1506  for (size_t i = 0; i < out_vec_sz; ++i) {
1507  if (out_vec[i] != agg_init_val) {
1508  if (agg_result == agg_init_val) {
1509  agg_result = out_vec[i];
1510  } else if (out_vec[i] != agg_result) {
1511  return {agg_result, int32_t(ErrorCode::SINGLE_VALUE_FOUND_MULTIPLE_VALUES)};
1512  }
1513  }
1514  }
1515  return {agg_result, 0};
1516  }
1517  case kSAMPLE: {
1518  int64_t agg_result = agg_init_val;
1519  for (size_t i = 0; i < out_vec_sz; ++i) {
1520  if (out_vec[i] != agg_init_val) {
1521  agg_result = out_vec[i];
1522  break;
1523  }
1524  }
1525  return {agg_result, 0};
1526  }
1527  default:
1528  UNREACHABLE() << "Unsupported SQLAgg: " << agg;
1529  }
1530  abort();
1531 }
int64_t float_to_double_bin(int32_t val, bool nullable=false)
bool is_fp() const
Definition: sqltypes.h:573
#define UNREACHABLE()
Definition: Logger.h:338
bool is_time() const
Definition: sqltypes.h:579
RUNTIME_EXPORT void agg_sum_float_skip_val(int32_t *agg, const float val, const float skip_val)
Definition: sqldefs.h:78
RUNTIME_EXPORT void agg_sum_double_skip_val(int64_t *agg, const double val, const double skip_val)
bool is_integer() const
Definition: sqltypes.h:567
RUNTIME_EXPORT void agg_max_double_skip_val(int64_t *agg, const double val, const double skip_val)
Definition: sqldefs.h:80
bool is_boolean() const
Definition: sqltypes.h:582
RUNTIME_EXPORT void agg_min_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
Definition: sqldefs.h:81
RUNTIME_EXPORT void agg_min_double_skip_val(int64_t *agg, const double val, const double skip_val)
#define CHECK(condition)
Definition: Logger.h:291
RUNTIME_EXPORT void agg_max_float_skip_val(int32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
RUNTIME_EXPORT void agg_min_float_skip_val(int32_t *agg, const float val, const float skip_val)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
Definition: sqldefs.h:79
RUNTIME_EXPORT void agg_max_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
bool is_decimal() const
Definition: sqltypes.h:570
Definition: sqldefs.h:77

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::reduceSpeculativeTopN ( const RelAlgExecutionUnit ra_exe_unit,
std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &  all_fragment_results,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const QueryMemoryDescriptor query_mem_desc 
) const
private

Definition at line 1724 of file Execute.cpp.

References SpeculativeTopNMap::asRows(), CHECK, CHECK_EQ, SortInfo::limit, SortInfo::offset, SortInfo::order_entries, SpeculativeTopNMap::reduce(), run_benchmark_import::result, report::rows, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by collectAllDeviceResults().

1728  {
1729  if (results_per_device.size() == 1) {
1730  return std::move(results_per_device.front().first);
1731  }
1732  const auto top_n =
1733  ra_exe_unit.sort_info.limit.value_or(0) + ra_exe_unit.sort_info.offset;
1735  for (const auto& result : results_per_device) {
1736  auto rows = result.first;
1737  CHECK(rows);
1738  if (!rows) {
1739  continue;
1740  }
1741  SpeculativeTopNMap that(
1742  *rows,
1743  ra_exe_unit.target_exprs,
1744  std::max(size_t(10000 * std::max(1, static_cast<int>(log(top_n)))), top_n));
1745  m.reduce(that);
1746  }
1747  CHECK_EQ(size_t(1), ra_exe_unit.sort_info.order_entries.size());
1748  const auto desc = ra_exe_unit.sort_info.order_entries.front().is_desc;
1749  return m.asRows(ra_exe_unit, row_set_mem_owner, query_mem_desc, this, top_n, desc);
1750 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void reduce(SpeculativeTopNMap &that)
tuple rows
Definition: report.py:114
std::optional< size_t > limit
std::list< Analyzer::OrderEntry > order_entries
std::shared_ptr< ResultSet > asRows(const RelAlgExecutionUnit &ra_exe_unit, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const QueryMemoryDescriptor &query_mem_desc, const Executor *executor, const size_t top_n, const bool desc) const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::registerActiveModule ( void *  module,
const int  device_id 
)
static

Definition at line 20 of file GpuInterrupt.cpp.

References CHECK_LT, to_string(), and VLOG.

20  {
21 #ifdef HAVE_CUDA
22  std::lock_guard<std::mutex> lock(gpu_active_modules_mutex_);
23  CHECK_LT(device_id, max_gpu_count);
24  gpu_active_modules_device_mask_ |= (1 << device_id);
25  gpu_active_modules_[device_id] = module;
26  VLOG(1) << "Registered module " << module << " on device " << std::to_string(device_id);
27 #endif
28 }
static const int max_gpu_count
Definition: Execute.h:1535
static void * gpu_active_modules_[max_gpu_count]
Definition: Execute.h:1541
static uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:1540
std::string to_string(char const *&&v)
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define VLOG(n)
Definition: Logger.h:388
static std::mutex gpu_active_modules_mutex_
Definition: Execute.h:1539

+ Here is the call graph for this function:

template<typename F >
static void Executor::registerExtensionFunctions ( register_extension_functions)
inlinestatic

Definition at line 470 of file Execute.h.

References execute_mutex_, executors_, executors_cache_mutex_, register_runtime_extension_functions_mutex_, and update_after_registration().

Referenced by DBHandler::register_runtime_extension_functions().

470  {
471  // Don't want native code to vanish while executing:
473  // Blocks Executor::getExecutor:
475  // Lock registration to avoid
476  // java.util.ConcurrentModificationException from calcite server
477  // when client registrations arrive too fast. Also blocks
478  // Executor::get_rt_udf_module for retrieving runtime UDF/UDTF
479  // module until this registration has rebuild it via
480  // Executor::update_after_registration:
481  std::lock_guard<std::mutex> register_lock(
483 
484  // Reset all executors:
485  for (auto& executor_item : Executor::executors_) {
486  executor_item.second->reset(/*discard_runtime_modules_only=*/true);
487  }
488  // Call registration worker, see
489  // DBHandler::register_runtime_extension_functions for details. In
490  // short, updates Executor::extension_module_sources,
491  // table_functions::TableFunctionsFactory, and registers runtime
492  // extension functions with Calcite:
493  register_extension_functions();
494 
495  // Update executors with registered LLVM modules:
496  update_after_registration(/*update_runtime_modules_only=*/true);
497  }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1585
std::unique_lock< T > unique_lock
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1581
static void update_after_registration(bool update_runtime_modules_only=false)
Definition: Execute.h:1420
static std::mutex register_runtime_extension_functions_mutex_
Definition: Execute.h:1623
static heavyai::shared_mutex executors_cache_mutex_
Definition: Execute.h:1602

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::registerExtractedQueryPlanDag ( const QueryPlanDAG query_plan_dag)

Definition at line 5376 of file Execute.cpp.

References latest_query_plan_extracted_.

5376  {
5377  // this function is called under the recycler lock
5378  // e.g., QueryPlanDagExtractor::extractQueryPlanDagImpl()
5379  latest_query_plan_extracted_ = query_plan_dag;
5380 }
static QueryPlanDAG latest_query_plan_extracted_
Definition: Execute.h:1612
bool Executor::removeFromQuerySessionList ( const QuerySessionId query_session,
const std::string &  submitted_time_str,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 5209 of file Execute.cpp.

References executor_id_, interrupted_, queries_interrupt_flag_, and queries_session_map_.

Referenced by clearQuerySessionStatus().

5212  {
5213  if (query_session.empty()) {
5214  return false;
5215  }
5216  if (queries_session_map_.count(query_session)) {
5217  auto& storage = queries_session_map_.at(query_session);
5218  if (storage.size() > 1) {
5219  // in this case we only remove query executor info
5220  for (auto it = storage.begin(); it != storage.end(); it++) {
5221  auto target_submitted_t_str = it->second.getQuerySubmittedTime();
5222  // no time difference && have the same executor id--> found the target query
5223  if (it->second.getExecutorId() == executor_id_ &&
5224  submitted_time_str.compare(target_submitted_t_str) == 0) {
5225  storage.erase(it);
5226  return true;
5227  }
5228  }
5229  } else if (storage.size() == 1) {
5230  // here this session only has a single query executor
5231  // so we clear both executor info and its interrupt flag
5232  queries_session_map_.erase(query_session);
5233  queries_interrupt_flag_.erase(query_session);
5234  if (interrupted_.load()) {
5235  interrupted_.store(false);
5236  }
5237  return true;
5238  }
5239  }
5240  return false;
5241 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
std::atomic< bool > interrupted_
Definition: Execute.h:1543
const ExecutorId executor_id_
Definition: Execute.h:1476
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578

+ Here is the caller graph for this function:

void Executor::reset ( bool  discard_runtime_modules_only = false)

Definition at line 327 of file Execute.cpp.

References QueryEngine::getInstance(), rt_udf_cpu_module, and rt_udf_gpu_module.

327  {
328  // TODO: keep cached results that do not depend on runtime UDF/UDTFs
329  auto qe = QueryEngine::getInstance();
330  qe->s_code_accessor->clear();
331  qe->s_stubs_accessor->clear();
332  qe->cpu_code_accessor->clear();
333  qe->gpu_code_accessor->clear();
334  qe->tf_code_accessor->clear();
335 
336  if (discard_runtime_modules_only) {
338 #ifdef HAVE_CUDA
340 #endif
341  cgen_state_->module_ = nullptr;
342  } else {
343  extension_modules_.clear();
344  cgen_state_.reset();
345  context_.reset(new llvm::LLVMContext());
346  cgen_state_.reset(new CgenState({}, false, this));
347  }
348 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
static std::shared_ptr< QueryEngine > getInstance()
Definition: QueryEngine.h:89
std::map< ExtModuleKinds, std::unique_ptr< llvm::Module > > extension_modules_
Definition: Execute.h:1517

+ Here is the call graph for this function:

void Executor::resetBlockSize ( )

Definition at line 4388 of file Execute.cpp.

References block_size_x_.

4388  {
4389  block_size_x_ = 0;
4390 }
unsigned block_size_x_
Definition: Execute.h:1552
void Executor::resetGridSize ( )

Definition at line 4380 of file Execute.cpp.

References grid_size_x_.

4380  {
4381  grid_size_x_ = 0;
4382 }
unsigned grid_size_x_
Definition: Execute.h:1553
void Executor::resetInterrupt ( )

Definition at line 216 of file GpuInterrupt.cpp.

References check_interrupt_init(), DW_RESET, dynamic_watchdog_init(), g_enable_dynamic_watchdog, g_enable_non_kernel_time_query_interrupt, g_enable_runtime_query_interrupt, INT_RESET, unregisterActiveModule(), and VLOG.

Referenced by clearQuerySessionStatus().

216  {
217  const auto allow_interrupt =
220  dynamic_watchdog_init(static_cast<unsigned>(DW_RESET));
221  } else if (allow_interrupt) {
222 #ifdef HAVE_CUDA
223  for (int device_id = 0; device_id < max_gpu_count; device_id++) {
225  }
226 #endif
227  VLOG(1) << "Reset interrupt flag for CPU execution kernel on Executor "
228  << executor_id_;
229  check_interrupt_init(static_cast<unsigned>(INT_RESET));
230  }
231 
232  if (interrupted_.load()) {
233  VLOG(1) << "RESET Executor " << executor_id_
234  << " that had previously been interrupted";
235  interrupted_.store(false);
236  }
237 }
std::atomic< bool > interrupted_
Definition: Execute.h:1543
static const int max_gpu_count
Definition: Execute.h:1535
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:138
const ExecutorId executor_id_
Definition: Execute.h:1476
static void unregisterActiveModule(const int device_id)
RUNTIME_EXPORT uint64_t dynamic_watchdog_init(unsigned ms_budget)
RUNTIME_EXPORT bool check_interrupt_init(unsigned command)
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:137
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::resultsUnion ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 1563 of file Execute.cpp.

References blockSize(), CHECK_GE, CPU, DEBUG_TIMER, anonymous_namespace{Execute.cpp}::get_merged_result(), SharedKernelContext::getFragmentResults(), gridSize(), QueryMemoryDescriptor, row_set_mem_owner_, gpu_enabled::sort(), and RelAlgExecutionUnit::target_exprs.

Referenced by executeWorkUnitImpl().

1564  {
1565  auto timer = DEBUG_TIMER(__func__);
1566  auto& results_per_device = shared_context.getFragmentResults();
1567  auto const targets = shared::transform<std::vector<TargetInfo>>(
1568  ra_exe_unit.target_exprs, GetTargetInfo{});
1569  if (results_per_device.empty()) {
1570  return std::make_shared<ResultSet>(targets,
1574  blockSize(),
1575  gridSize());
1576  }
1577  using IndexedResultSet = std::pair<ResultSetPtr, std::vector<size_t>>;
1578  std::sort(results_per_device.begin(),
1579  results_per_device.end(),
1580  [](const IndexedResultSet& lhs, const IndexedResultSet& rhs) {
1581  CHECK_GE(lhs.second.size(), size_t(1));
1582  CHECK_GE(rhs.second.size(), size_t(1));
1583  return lhs.second.front() < rhs.second.front();
1584  });
1585 
1586  return get_merged_result(results_per_device, targets);
1587 }
std::vector< Analyzer::Expr * > target_exprs
ResultSetPtr get_merged_result(std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device, std::vector< TargetInfo > const &targets)
Definition: Execute.cpp:1535
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
#define CHECK_GE(x, y)
Definition: Logger.h:306
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533
friend class QueryMemoryDescriptor
Definition: Execute.h:1641
unsigned gridSize() const
Definition: Execute.cpp:4352
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define DEBUG_TIMER(name)
Definition: Logger.h:412
unsigned blockSize() const
Definition: Execute.cpp:4366

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::resume_executor_queue ( )
static

Definition at line 5429 of file Execute.cpp.

References executor_resource_mgr_, and g_enable_executor_resource_mgr.

Referenced by anonymous_namespace{DBHandler.cpp}::pause_and_resume_executor_queue(), and DBHandler::resume_executor_queue().

5429  {
5431  throw std::runtime_error(
5432  "Executor queue cannot be resumed as it requires Executor Resource Manager to be "
5433  "enabled");
5434  }
5435  executor_resource_mgr_->resume_process_queue();
5436 }
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178

+ Here is the caller graph for this function:

std::vector< int8_t > Executor::serializeLiterals ( const std::unordered_map< int, CgenState::LiteralValues > &  literals,
const int  device_id 
)
private

Definition at line 1060 of file Execute.cpp.

References CgenState::addAligned(), align(), CHECK, CHECK_EQ, CHECK_LE, g_enable_string_functions, StringDictionaryProxy::getIdOfString(), StringDictionaryProxy::getOrAddTransient(), getStringDictionaryProxy(), CgenState::literalBytes(), and row_set_mem_owner_.

Referenced by executePlanWithGroupBy(), and executePlanWithoutGroupBy().

1062  {
1063  if (literals.empty()) {
1064  return {};
1065  }
1066  const auto dev_literals_it = literals.find(device_id);
1067  CHECK(dev_literals_it != literals.end());
1068  const auto& dev_literals = dev_literals_it->second;
1069  size_t lit_buf_size{0};
1070  std::vector<std::string> real_strings;
1071  std::vector<std::vector<double>> double_array_literals;
1072  std::vector<std::vector<int8_t>> align64_int8_array_literals;
1073  std::vector<std::vector<int32_t>> int32_array_literals;
1074  std::vector<std::vector<int8_t>> align32_int8_array_literals;
1075  std::vector<std::vector<int8_t>> int8_array_literals;
1076  for (const auto& lit : dev_literals) {
1077  lit_buf_size = CgenState::addAligned(lit_buf_size, CgenState::literalBytes(lit));
1078  if (lit.which() == 7) {
1079  const auto p = boost::get<std::string>(&lit);
1080  CHECK(p);
1081  real_strings.push_back(*p);
1082  } else if (lit.which() == 8) {
1083  const auto p = boost::get<std::vector<double>>(&lit);
1084  CHECK(p);
1085  double_array_literals.push_back(*p);
1086  } else if (lit.which() == 9) {
1087  const auto p = boost::get<std::vector<int32_t>>(&lit);
1088  CHECK(p);
1089  int32_array_literals.push_back(*p);
1090  } else if (lit.which() == 10) {
1091  const auto p = boost::get<std::vector<int8_t>>(&lit);
1092  CHECK(p);
1093  int8_array_literals.push_back(*p);
1094  } else if (lit.which() == 11) {
1095  const auto p = boost::get<std::pair<std::vector<int8_t>, int>>(&lit);
1096  CHECK(p);
1097  if (p->second == 64) {
1098  align64_int8_array_literals.push_back(p->first);
1099  } else if (p->second == 32) {
1100  align32_int8_array_literals.push_back(p->first);
1101  } else {
1102  CHECK(false);
1103  }
1104  }
1105  }
1106  if (lit_buf_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
1107  throw TooManyLiterals();
1108  }
1109  int16_t crt_real_str_off = lit_buf_size;
1110  for (const auto& real_str : real_strings) {
1111  CHECK_LE(real_str.size(), static_cast<size_t>(std::numeric_limits<int16_t>::max()));
1112  lit_buf_size += real_str.size();
1113  }
1114  if (double_array_literals.size() > 0) {
1115  lit_buf_size = align(lit_buf_size, sizeof(double));
1116  }
1117  int16_t crt_double_arr_lit_off = lit_buf_size;
1118  for (const auto& double_array_literal : double_array_literals) {
1119  CHECK_LE(double_array_literal.size(),
1120  static_cast<size_t>(std::numeric_limits<int16_t>::max()));
1121  lit_buf_size += double_array_literal.size() * sizeof(double);
1122  }
1123  if (align64_int8_array_literals.size() > 0) {
1124  lit_buf_size = align(lit_buf_size, sizeof(uint64_t));
1125  }
1126  int16_t crt_align64_int8_arr_lit_off = lit_buf_size;
1127  for (const auto& align64_int8_array_literal : align64_int8_array_literals) {
1128  CHECK_LE(align64_int8_array_literals.size(),
1129  static_cast<size_t>(std::numeric_limits<int16_t>::max()));
1130  lit_buf_size += align64_int8_array_literal.size();
1131  }
1132  if (int32_array_literals.size() > 0) {
1133  lit_buf_size = align(lit_buf_size, sizeof(int32_t));
1134  }
1135  int16_t crt_int32_arr_lit_off = lit_buf_size;
1136  for (const auto& int32_array_literal : int32_array_literals) {
1137  CHECK_LE(int32_array_literal.size(),
1138  static_cast<size_t>(std::numeric_limits<int16_t>::max()));
1139  lit_buf_size += int32_array_literal.size() * sizeof(int32_t);
1140  }
1141  if (align32_int8_array_literals.size() > 0) {
1142  lit_buf_size = align(lit_buf_size, sizeof(int32_t));
1143  }
1144  int16_t crt_align32_int8_arr_lit_off = lit_buf_size;
1145  for (const auto& align32_int8_array_literal : align32_int8_array_literals) {
1146  CHECK_LE(align32_int8_array_literals.size(),
1147  static_cast<size_t>(std::numeric_limits<int16_t>::max()));
1148  lit_buf_size += align32_int8_array_literal.size();
1149  }
1150  int16_t crt_int8_arr_lit_off = lit_buf_size;
1151  for (const auto& int8_array_literal : int8_array_literals) {
1152  CHECK_LE(int8_array_literal.size(),
1153  static_cast<size_t>(std::numeric_limits<int16_t>::max()));
1154  lit_buf_size += int8_array_literal.size();
1155  }
1156  unsigned crt_real_str_idx = 0;
1157  unsigned crt_double_arr_lit_idx = 0;
1158  unsigned crt_align64_int8_arr_lit_idx = 0;
1159  unsigned crt_int32_arr_lit_idx = 0;
1160  unsigned crt_align32_int8_arr_lit_idx = 0;
1161  unsigned crt_int8_arr_lit_idx = 0;
1162  std::vector<int8_t> serialized(lit_buf_size);
1163  size_t off{0};
1164  for (const auto& lit : dev_literals) {
1165  const auto lit_bytes = CgenState::literalBytes(lit);
1166  off = CgenState::addAligned(off, lit_bytes);
1167  switch (lit.which()) {
1168  case 0: {
1169  const auto p = boost::get<int8_t>(&lit);
1170  CHECK(p);
1171  serialized[off - lit_bytes] = *p;
1172  break;
1173  }
1174  case 1: {
1175  const auto p = boost::get<int16_t>(&lit);
1176  CHECK(p);
1177  memcpy(&serialized[off - lit_bytes], p, lit_bytes);
1178  break;
1179  }
1180  case 2: {
1181  const auto p = boost::get<int32_t>(&lit);
1182  CHECK(p);
1183  memcpy(&serialized[off - lit_bytes], p, lit_bytes);
1184  break;
1185  }
1186  case 3: {
1187  const auto p = boost::get<int64_t>(&lit);
1188  CHECK(p);
1189  memcpy(&serialized[off - lit_bytes], p, lit_bytes);
1190  break;
1191  }
1192  case 4: {
1193  const auto p = boost::get<float>(&lit);
1194  CHECK(p);
1195  memcpy(&serialized[off - lit_bytes], p, lit_bytes);
1196  break;
1197  }
1198  case 5: {
1199  const auto p = boost::get<double>(&lit);
1200  CHECK(p);
1201  memcpy(&serialized[off - lit_bytes], p, lit_bytes);
1202  break;
1203  }
1204  case 6: {
1205  const auto p = boost::get<std::pair<std::string, shared::StringDictKey>>(&lit);
1206  CHECK(p);
1207  const auto str_id =
1209  ? getStringDictionaryProxy(p->second, row_set_mem_owner_, true)
1210  ->getOrAddTransient(p->first)
1211  : getStringDictionaryProxy(p->second, row_set_mem_owner_, true)
1212  ->getIdOfString(p->first);
1213  memcpy(&serialized[off - lit_bytes], &str_id, lit_bytes);
1214  break;
1215  }
1216  case 7: {
1217  const auto p = boost::get<std::string>(&lit);
1218  CHECK(p);
1219  int32_t off_and_len = crt_real_str_off << 16;
1220  const auto& crt_real_str = real_strings[crt_real_str_idx];
1221  off_and_len |= static_cast<int16_t>(crt_real_str.size());
1222  memcpy(&serialized[off - lit_bytes], &off_and_len, lit_bytes);
1223  memcpy(&serialized[crt_real_str_off], crt_real_str.data(), crt_real_str.size());
1224  ++crt_real_str_idx;
1225  crt_real_str_off += crt_real_str.size();
1226  break;
1227  }
1228  case 8: {
1229  const auto p = boost::get<std::vector<double>>(&lit);
1230  CHECK(p);
1231  int32_t off_and_len = crt_double_arr_lit_off << 16;
1232  const auto& crt_double_arr_lit = double_array_literals[crt_double_arr_lit_idx];
1233  int32_t len = crt_double_arr_lit.size();
1234  CHECK_EQ((len >> 16), 0);
1235  off_and_len |= static_cast<int16_t>(len);
1236  int32_t double_array_bytesize = len * sizeof(double);
1237  memcpy(&serialized[off - lit_bytes], &off_and_len, lit_bytes);
1238  memcpy(&serialized[crt_double_arr_lit_off],
1239  crt_double_arr_lit.data(),
1240  double_array_bytesize);
1241  ++crt_double_arr_lit_idx;
1242  crt_double_arr_lit_off += double_array_bytesize;
1243  break;
1244  }
1245  case 9: {
1246  const auto p = boost::get<std::vector<int32_t>>(&lit);
1247  CHECK(p);
1248  int32_t off_and_len = crt_int32_arr_lit_off << 16;
1249  const auto& crt_int32_arr_lit = int32_array_literals[crt_int32_arr_lit_idx];
1250  int32_t len = crt_int32_arr_lit.size();
1251  CHECK_EQ((len >> 16), 0);
1252  off_and_len |= static_cast<int16_t>(len);
1253  int32_t int32_array_bytesize = len * sizeof(int32_t);
1254  memcpy(&serialized[off - lit_bytes], &off_and_len, lit_bytes);
1255  memcpy(&serialized[crt_int32_arr_lit_off],
1256  crt_int32_arr_lit.data(),
1257  int32_array_bytesize);
1258  ++crt_int32_arr_lit_idx;
1259  crt_int32_arr_lit_off += int32_array_bytesize;
1260  break;
1261  }
1262  case 10: {
1263  const auto p = boost::get<std::vector<int8_t>>(&lit);
1264  CHECK(p);
1265  int32_t off_and_len = crt_int8_arr_lit_off << 16;
1266  const auto& crt_int8_arr_lit = int8_array_literals[crt_int8_arr_lit_idx];
1267  int32_t len = crt_int8_arr_lit.size();
1268  CHECK_EQ((len >> 16), 0);
1269  off_and_len |= static_cast<int16_t>(len);
1270  int32_t int8_array_bytesize = len;
1271  memcpy(&serialized[off - lit_bytes], &off_and_len, lit_bytes);
1272  memcpy(&serialized[crt_int8_arr_lit_off],
1273  crt_int8_arr_lit.data(),
1274  int8_array_bytesize);
1275  ++crt_int8_arr_lit_idx;
1276  crt_int8_arr_lit_off += int8_array_bytesize;
1277  break;
1278  }
1279  case 11: {
1280  const auto p = boost::get<std::pair<std::vector<int8_t>, int>>(&lit);
1281  CHECK(p);
1282  if (p->second == 64) {
1283  int32_t off_and_len = crt_align64_int8_arr_lit_off << 16;
1284  const auto& crt_align64_int8_arr_lit =
1285  align64_int8_array_literals[crt_align64_int8_arr_lit_idx];
1286  int32_t len = crt_align64_int8_arr_lit.size();
1287  CHECK_EQ((len >> 16), 0);
1288  off_and_len |= static_cast<int16_t>(len);
1289  int32_t align64_int8_array_bytesize = len;
1290  memcpy(&serialized[off - lit_bytes], &off_and_len, lit_bytes);
1291  memcpy(&serialized[crt_align64_int8_arr_lit_off],
1292  crt_align64_int8_arr_lit.data(),
1293  align64_int8_array_bytesize);
1294  ++crt_align64_int8_arr_lit_idx;
1295  crt_align64_int8_arr_lit_off += align64_int8_array_bytesize;
1296  } else if (p->second == 32) {
1297  int32_t off_and_len = crt_align32_int8_arr_lit_off << 16;
1298  const auto& crt_align32_int8_arr_lit =
1299  align32_int8_array_literals[crt_align32_int8_arr_lit_idx];
1300  int32_t len = crt_align32_int8_arr_lit.size();
1301  CHECK_EQ((len >> 16), 0);
1302  off_and_len |= static_cast<int16_t>(len);
1303  int32_t align32_int8_array_bytesize = len;
1304  memcpy(&serialized[off - lit_bytes], &off_and_len, lit_bytes);
1305  memcpy(&serialized[crt_align32_int8_arr_lit_off],
1306  crt_align32_int8_arr_lit.data(),
1307  align32_int8_array_bytesize);
1308  ++crt_align32_int8_arr_lit_idx;
1309  crt_align32_int8_arr_lit_off += align32_int8_array_bytesize;
1310  } else {
1311  CHECK(false);
1312  }
1313  break;
1314  }
1315  default:
1316  CHECK(false);
1317  }
1318  }
1319  return serialized;
1320 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key, const bool with_generation) const
Definition: Execute.h:578
static size_t literalBytes(const CgenState::LiteralValue &lit)
Definition: CgenState.h:418
bool g_enable_string_functions
static size_t addAligned(const size_t off_in, const size_t alignment)
Definition: CgenState.h:449
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533
int32_t getOrAddTransient(const std::string &)
#define CHECK_LE(x, y)
Definition: Logger.h:304
#define CHECK(condition)
Definition: Logger.h:291
static size_t align(const size_t off_in, const size_t alignment)
Definition: Execute.h:1468
int32_t getIdOfString(const std::string &str) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::set_concurrent_resource_grant_policy ( const ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy concurrent_resource_grant_policy)
static

Definition at line 5477 of file Execute.cpp.

References executor_resource_mgr_, and g_enable_executor_resource_mgr.

5479  {
5481  throw std::runtime_error(
5482  "ExecutorResourceMgr must be enabled to set executor concurrent resource grant "
5483  "policy.");
5484  }
5485  executor_resource_mgr_->set_concurrent_resource_grant_policy(
5486  concurrent_resource_grant_policy);
5487 }
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178
void Executor::set_executor_resource_pool_resource ( const ExecutorResourceMgr_Namespace::ResourceType  resource_type,
const size_t  resource_quantity 
)
static

Definition at line 5456 of file Execute.cpp.

References executor_resource_mgr_, and g_enable_executor_resource_mgr.

5458  {
5460  throw std::runtime_error(
5461  "ExecutorResourceMgr must be enabled to set executor resource pool resource.");
5462  }
5463  executor_resource_mgr_->set_resource(resource_type, resource_quantity);
5464 }
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178
void Executor::setBlockSize ( unsigned  block_size)

Definition at line 4384 of file Execute.cpp.

References block_size_x_.

4384  {
4385  block_size_x_ = block_size;
4386 }
unsigned block_size_x_
Definition: Execute.h:1552
void Executor::setColRangeCache ( const AggregatedColRange aggregated_col_range)
inline

Definition at line 1329 of file Execute.h.

References agg_col_range_cache_.

1329  {
1330  agg_col_range_cache_ = aggregated_col_range;
1331  }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1572
void Executor::setGridSize ( unsigned  grid_size)

Definition at line 4376 of file Execute.cpp.

References grid_size_x_.

4376  {
4377  grid_size_x_ = grid_size;
4378 }
unsigned grid_size_x_
Definition: Execute.h:1553
void Executor::setQuerySessionAsInterrupted ( const QuerySessionId query_session,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 5243 of file Execute.cpp.

References queries_interrupt_flag_.

5245  {
5246  if (query_session.empty()) {
5247  return;
5248  }
5249  if (queries_interrupt_flag_.find(query_session) != queries_interrupt_flag_.end()) {
5250  queries_interrupt_flag_[query_session] = true;
5251  }
5252 }
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578
void Executor::setupCaching ( const std::unordered_set< PhysicalInput > &  phys_inputs,
const std::unordered_set< shared::TableKey > &  phys_table_keys 
)

Definition at line 4960 of file Execute.cpp.

References agg_col_range_cache_, computeColRangesCache(), computeStringDictionaryGenerations(), computeTableGenerations(), executor_id_, getArenaBlockSize(), row_set_mem_owner_, and table_generations_.

4961  {
4963  std::make_shared<RowSetMemoryOwner>(Executor::getArenaBlockSize(), executor_id_);
4964  row_set_mem_owner_->setDictionaryGenerations(
4965  computeStringDictionaryGenerations(phys_inputs));
4967  table_generations_ = computeTableGenerations(phys_table_ids);
4968 }
AggregatedColRange computeColRangesCache(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:4894
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1572
const ExecutorId executor_id_
Definition: Execute.h:1476
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533
TableGenerations computeTableGenerations(const std::unordered_set< shared::TableKey > &phys_table_keys)
Definition: Execute.cpp:4948
StringDictionaryGenerations computeStringDictionaryGenerations(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:4922
TableGenerations table_generations_
Definition: Execute.h:1573
static size_t getArenaBlockSize()
Definition: Execute.cpp:562

+ Here is the call graph for this function:

std::pair< bool, int64_t > Executor::skipFragment ( const InputDescriptor table_desc,
const Fragmenter_Namespace::FragmentInfo frag_info,
const std::list< std::shared_ptr< Analyzer::Expr >> &  simple_quals,
const std::vector< uint64_t > &  frag_offsets,
const size_t  frag_idx 
)
private

Definition at line 4658 of file Execute.cpp.

References canSkipFragmentForFpQual(), CHECK, CodeGenerator::codegenIntConst(), DateTruncateHighPrecisionToDate(), extract_max_stat_int_type(), extract_min_stat_int_type(), get_column_descriptor(), anonymous_namespace{Execute.cpp}::get_hpt_overflow_underflow_safe_scaled_values(), Analyzer::BinOper::get_left_operand(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), getTableGeneration(), InputDescriptor::getTableKey(), INVALID, isFragmentFullyDeleted(), kCAST, kEQ, kGE, kGT, kLE, kLT, kTIME, NOT_SKIPPABLE, Fragmenter_Namespace::FragmentInfo::physicalTableId, SKIPPABLE, to_string(), UNREACHABLE, and VLOG.

Referenced by skipFragmentInnerJoins().

4663  {
4664  // First check to see if all of fragment is deleted, in which case we know we can skip
4665  if (isFragmentFullyDeleted(table_desc, fragment)) {
4666  VLOG(2) << "Skipping deleted fragment with table id: " << fragment.physicalTableId
4667  << ", fragment id: " << frag_idx;
4668  return {true, -1};
4669  }
4670 
4671  for (const auto& simple_qual : simple_quals) {
4672  const auto comp_expr =
4673  std::dynamic_pointer_cast<const Analyzer::BinOper>(simple_qual);
4674  if (!comp_expr) {
4675  // is this possible?
4676  return {false, -1};
4677  }
4678  const auto lhs = comp_expr->get_left_operand();
4679  auto lhs_col = dynamic_cast<const Analyzer::ColumnVar*>(lhs);
4680  if (!lhs_col || !lhs_col->getColumnKey().table_id || lhs_col->get_rte_idx()) {
4681  // See if lhs is a simple cast that was allowed through normalize_simple_predicate
4682  auto lhs_uexpr = dynamic_cast<const Analyzer::UOper*>(lhs);
4683  if (lhs_uexpr) {
4684  CHECK(lhs_uexpr->get_optype() ==
4685  kCAST); // We should have only been passed a cast expression
4686  lhs_col = dynamic_cast<const Analyzer::ColumnVar*>(lhs_uexpr->get_operand());
4687  if (!lhs_col || !lhs_col->getColumnKey().table_id || lhs_col->get_rte_idx()) {
4688  continue;
4689  }
4690  } else {
4691  continue;
4692  }
4693  }
4694  const auto rhs = comp_expr->get_right_operand();
4695  const auto rhs_const = dynamic_cast<const Analyzer::Constant*>(rhs);
4696  if (!rhs_const) {
4697  // is this possible?
4698  return {false, -1};
4699  }
4700  if (!lhs->get_type_info().is_integer() && !lhs->get_type_info().is_time() &&
4701  !lhs->get_type_info().is_fp()) {
4702  continue;
4703  }
4704  if (lhs->get_type_info().is_fp()) {
4705  const auto fragment_skip_status =
4706  canSkipFragmentForFpQual(comp_expr.get(), lhs_col, fragment, rhs_const);
4707  switch (fragment_skip_status) {
4709  return {true, -1};
4711  return {false, -1};
4713  continue;
4714  default:
4715  UNREACHABLE();
4716  }
4717  }
4718 
4719  // Everything below is logic for integer and integer-backed timestamps
4720  // TODO: Factor out into separate function per canSkipFragmentForFpQual above
4721 
4722  if (lhs_col->get_type_info().is_timestamp() &&
4723  rhs_const->get_type_info().is_any<kTIME>()) {
4724  // when casting from a timestamp to time
4725  // is not possible to get a valid range
4726  // so we can't skip any fragment
4727  continue;
4728  }
4729 
4730  const int col_id = lhs_col->getColumnKey().column_id;
4731  auto chunk_meta_it = fragment.getChunkMetadataMap().find(col_id);
4732  int64_t chunk_min{0};
4733  int64_t chunk_max{0};
4734  bool is_rowid{false};
4735  size_t start_rowid{0};
4736  const auto& table_key = table_desc.getTableKey();
4737  if (chunk_meta_it == fragment.getChunkMetadataMap().end()) {
4738  auto cd = get_column_descriptor({table_key, col_id});
4739  if (cd->isVirtualCol) {
4740  CHECK(cd->columnName == "rowid");
4741  const auto& table_generation = getTableGeneration(table_key);
4742  start_rowid = table_generation.start_rowid;
4743  chunk_min = frag_offsets[frag_idx] + start_rowid;
4744  chunk_max = frag_offsets[frag_idx + 1] - 1 + start_rowid;
4745  is_rowid = true;
4746  }
4747  } else {
4748  const auto& chunk_type = lhs_col->get_type_info();
4749  chunk_min =
4750  extract_min_stat_int_type(chunk_meta_it->second->chunkStats, chunk_type);
4751  chunk_max =
4752  extract_max_stat_int_type(chunk_meta_it->second->chunkStats, chunk_type);
4753  }
4754  if (chunk_min > chunk_max) {
4755  // invalid metadata range, do not skip fragment
4756  return {false, -1};
4757  }
4758  if (lhs->get_type_info().is_timestamp() &&
4759  (lhs_col->get_type_info().get_dimension() !=
4760  rhs_const->get_type_info().get_dimension()) &&
4761  (lhs_col->get_type_info().is_high_precision_timestamp() ||
4762  rhs_const->get_type_info().is_high_precision_timestamp())) {
4763  // If original timestamp lhs col has different precision,
4764  // column metadata holds value in original precision
4765  // therefore adjust rhs value to match lhs precision
4766 
4767  // Note(Wamsi): We adjust rhs const value instead of lhs value to not
4768  // artificially limit the lhs column range. RHS overflow/underflow is already
4769  // been validated in `TimeGM::get_overflow_underflow_safe_epoch`.
4770  bool is_valid;
4771  std::tie(is_valid, chunk_min, chunk_max) =
4773  chunk_min, chunk_max, lhs_col->get_type_info(), rhs_const->get_type_info());
4774  if (!is_valid) {
4775  VLOG(4) << "Overflow/Underflow detecting in fragments skipping logic.\nChunk min "
4776  "value: "
4777  << std::to_string(chunk_min)
4778  << "\nChunk max value: " << std::to_string(chunk_max)
4779  << "\nLHS col precision is: "
4780  << std::to_string(lhs_col->get_type_info().get_dimension())
4781  << "\nRHS precision is: "
4782  << std::to_string(rhs_const->get_type_info().get_dimension()) << ".";
4783  return {false, -1};
4784  }
4785  }
4786  if (lhs_col->get_type_info().is_timestamp() && rhs_const->get_type_info().is_date()) {
4787  // It is obvious that a cast from timestamp to date is happening here,
4788  // so we have to correct the chunk min and max values to lower the precision as of
4789  // the date
4790  chunk_min = DateTruncateHighPrecisionToDate(
4791  chunk_min, pow(10, lhs_col->get_type_info().get_dimension()));
4792  chunk_max = DateTruncateHighPrecisionToDate(
4793  chunk_max, pow(10, lhs_col->get_type_info().get_dimension()));
4794  }
4795  llvm::LLVMContext local_context;
4796  CgenState local_cgen_state(local_context);
4797  CodeGenerator code_generator(&local_cgen_state, nullptr);
4798 
4799  const auto rhs_val =
4800  CodeGenerator::codegenIntConst(rhs_const, &local_cgen_state)->getSExtValue();
4801 
4802  switch (comp_expr->get_optype()) {
4803  case kGE:
4804  if (chunk_max < rhs_val) {
4805  return {true, -1};
4806  }
4807  break;
4808  case kGT:
4809  if (chunk_max <= rhs_val) {
4810  return {true, -1};
4811  }
4812  break;
4813  case kLE:
4814  if (chunk_min > rhs_val) {
4815  return {true, -1};
4816  }
4817  break;
4818  case kLT:
4819  if (chunk_min >= rhs_val) {
4820  return {true, -1};
4821  }
4822  break;
4823  case kEQ:
4824  if (chunk_min > rhs_val || chunk_max < rhs_val) {
4825  return {true, -1};
4826  } else if (is_rowid) {
4827  return {false, rhs_val - start_rowid};
4828  }
4829  break;
4830  default:
4831  break;
4832  }
4833  }
4834  return {false, -1};
4835 }
Definition: sqltypes.h:76
std::tuple< bool, int64_t, int64_t > get_hpt_overflow_underflow_safe_scaled_values(const int64_t chunk_min, const int64_t chunk_max, const SQLTypeInfo &lhs_type, const SQLTypeInfo &rhs_type)
Definition: Execute.cpp:4526
Definition: sqldefs.h:37
Definition: sqldefs.h:38
#define UNREACHABLE()
Definition: Logger.h:338
Definition: sqldefs.h:51
Definition: sqldefs.h:32
FragmentSkipStatus canSkipFragmentForFpQual(const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
Definition: Execute.cpp:4598
int64_t extract_max_stat_int_type(const ChunkStats &stats, const SQLTypeInfo &ti)
std::string to_string(char const *&&v)
int64_t extract_min_stat_int_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ColumnDescriptor * get_column_descriptor(const shared::ColumnKey &column_key)
Definition: Execute.h:213
const shared::TableKey & getTableKey() const
Definition: sqldefs.h:36
static llvm::ConstantInt * codegenIntConst(const Analyzer::Constant *constant, CgenState *cgen_state)
Definition: ConstantIR.cpp:89
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t DateTruncateHighPrecisionToDate(const int64_t timeval, const int64_t scale)
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:35
const Expr * get_left_operand() const
Definition: Analyzer.h:455
const TableGeneration & getTableGeneration(const shared::TableKey &table_key) const
Definition: Execute.cpp:741
#define VLOG(n)
Definition: Logger.h:388
bool isFragmentFullyDeleted(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &fragment)
Definition: Execute.cpp:4561

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< bool, int64_t > Executor::skipFragmentInnerJoins ( const InputDescriptor table_desc,
const RelAlgExecutionUnit ra_exe_unit,
const Fragmenter_Namespace::FragmentInfo fragment,
const std::vector< uint64_t > &  frag_offsets,
const size_t  frag_idx 
)
private

Definition at line 4861 of file Execute.cpp.

References INNER, RelAlgExecutionUnit::join_quals, qual_to_conjunctive_form(), and skipFragment().

4866  {
4867  std::pair<bool, int64_t> skip_frag{false, -1};
4868  for (auto& inner_join : ra_exe_unit.join_quals) {
4869  if (inner_join.type != JoinType::INNER) {
4870  continue;
4871  }
4872 
4873  // extracting all the conjunctive simple_quals from the quals stored for the inner
4874  // join
4875  std::list<std::shared_ptr<Analyzer::Expr>> inner_join_simple_quals;
4876  for (auto& qual : inner_join.quals) {
4877  auto temp_qual = qual_to_conjunctive_form(qual);
4878  inner_join_simple_quals.insert(inner_join_simple_quals.begin(),
4879  temp_qual.simple_quals.begin(),
4880  temp_qual.simple_quals.end());
4881  }
4882  auto temp_skip_frag = skipFragment(
4883  table_desc, fragment, inner_join_simple_quals, frag_offsets, frag_idx);
4884  if (temp_skip_frag.second != -1) {
4885  skip_frag.second = temp_skip_frag.second;
4886  return skip_frag;
4887  } else {
4888  skip_frag.first = skip_frag.first || temp_skip_frag.first;
4889  }
4890  }
4891  return skip_frag;
4892 }
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
const JoinQualsPerNestingLevel join_quals
std::pair< bool, int64_t > skipFragment(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:4658

+ Here is the call graph for this function:

bool Executor::skipFragmentPair ( const Fragmenter_Namespace::FragmentInfo outer_fragment_info,
const Fragmenter_Namespace::FragmentInfo inner_fragment_info,
const int  inner_table_id,
const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &  inner_table_id_to_join_condition,
const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  device_type 
)
private

Definition at line 3278 of file Execute.cpp.

References CHECK, CHECK_EQ, get_shard_count(), BaselineJoinHashTable::getShardCountForCondition(), getTemporaryTables(), GPU, RelAlgExecutionUnit::input_descs, RelAlgExecutionUnit::join_quals, HashJoin::normalizeColumnPairs(), plan_state_, and Fragmenter_Namespace::FragmentInfo::shard.

Referenced by getTableFragmentIndices().

3285  {
3286  if (device_type != ExecutorDeviceType::GPU) {
3287  return false;
3288  }
3289  CHECK(table_idx >= 0 &&
3290  static_cast<size_t>(table_idx) < ra_exe_unit.input_descs.size());
3291  const auto& inner_table_key = ra_exe_unit.input_descs[table_idx].getTableKey();
3292  // Both tables need to be sharded the same way.
3293  if (outer_fragment_info.shard == -1 || inner_fragment_info.shard == -1 ||
3294  outer_fragment_info.shard == inner_fragment_info.shard) {
3295  return false;
3296  }
3297  const Analyzer::BinOper* join_condition{nullptr};
3298  if (ra_exe_unit.join_quals.empty()) {
3299  CHECK(!inner_table_id_to_join_condition.empty());
3300  auto condition_it = inner_table_id_to_join_condition.find(inner_table_key);
3301  CHECK(condition_it != inner_table_id_to_join_condition.end());
3302  join_condition = condition_it->second;
3303  CHECK(join_condition);
3304  } else {
3305  CHECK_EQ(plan_state_->join_info_.equi_join_tautologies_.size(),
3306  plan_state_->join_info_.join_hash_tables_.size());
3307  for (size_t i = 0; i < plan_state_->join_info_.join_hash_tables_.size(); ++i) {
3308  if (plan_state_->join_info_.join_hash_tables_[i]->getInnerTableRteIdx() ==
3309  table_idx) {
3310  CHECK(!join_condition);
3311  join_condition = plan_state_->join_info_.equi_join_tautologies_[i].get();
3312  }
3313  }
3314  }
3315  if (!join_condition) {
3316  return false;
3317  }
3318  // TODO(adb): support fragment skipping based on the bounding box intersect operator
3319  if (join_condition->is_bbox_intersect_oper()) {
3320  return false;
3321  }
3322  size_t shard_count{0};
3323  if (dynamic_cast<const Analyzer::ExpressionTuple*>(
3324  join_condition->get_left_operand())) {
3325  auto inner_outer_pairs =
3326  HashJoin::normalizeColumnPairs(join_condition, getTemporaryTables()).first;
3328  join_condition, this, inner_outer_pairs);
3329  } else {
3330  shard_count = get_shard_count(join_condition, this);
3331  }
3332  if (shard_count && !ra_exe_unit.join_quals.empty()) {
3333  plan_state_->join_info_.sharded_range_table_indices_.emplace(table_idx);
3334  }
3335  return shard_count;
3336 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< InputDescriptor > input_descs
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
const TemporaryTables * getTemporaryTables()
Definition: Execute.h:573
#define CHECK(condition)
Definition: Logger.h:291
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1015
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:1084

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::spillDoubleElement ( llvm::Value *  elem_val,
llvm::Type elem_ty 
)
private

Definition at line 19 of file MaxwellCodegenPatch.cpp.

19  {
20  auto var_ptr = cgen_state_->ir_builder_.CreateAlloca(elem_ty);
21  cgen_state_->ir_builder_.CreateStore(elem_val, var_ptr);
22  return var_ptr;
23 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
void Executor::unregisterActiveModule ( const int  device_id)
static

Definition at line 30 of file GpuInterrupt.cpp.

References CHECK_LT, to_string(), and VLOG.

Referenced by resetInterrupt().

30  {
31 #ifdef HAVE_CUDA
32  std::lock_guard<std::mutex> lock(gpu_active_modules_mutex_);
33  CHECK_LT(device_id, max_gpu_count);
34  if ((gpu_active_modules_device_mask_ & (1 << device_id)) == 0) {
35  return;
36  }
37  gpu_active_modules_device_mask_ ^= (1 << device_id);
38  VLOG(1) << "Unregistered module on device " << std::to_string(device_id);
39 #endif
40 }
static const int max_gpu_count
Definition: Execute.h:1535
static uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:1540
std::string to_string(char const *&&v)
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define VLOG(n)
Definition: Logger.h:388
static std::mutex gpu_active_modules_mutex_
Definition: Execute.h:1539

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static void Executor::update_after_registration ( bool  update_runtime_modules_only = false)
inlinestatic

Definition at line 1420 of file Execute.h.

References executors_.

Referenced by registerExtensionFunctions().

1420  {
1421  for (auto executor_item : Executor::executors_) {
1422  executor_item.second->update_extension_modules(update_runtime_modules_only);
1423  }
1424  }
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1581

+ Here is the caller graph for this function:

void Executor::update_extension_modules ( bool  update_runtime_modules_only = false)

is_gpu=

is_gpu=

is_gpu=

is_gpu=

Definition at line 350 of file Execute.cpp.

References CHECK, extension_module_sources, LOG, read_llvm_module_from_bc_file(), read_llvm_module_from_ir_file(), read_llvm_module_from_ir_string(), rt_geos_module, rt_libdevice_module, rt_udf_cpu_module, rt_udf_gpu_module, template_module, toString(), udf_cpu_module, udf_gpu_module, UNREACHABLE, and logger::WARNING.

350  {
351  auto read_module = [&](Executor::ExtModuleKinds module_kind,
352  const std::string& source) {
353  /*
354  source can be either a filename of a LLVM IR
355  or LLVM BC source, or a string containing
356  LLVM IR code.
357  */
358  CHECK(!source.empty());
359  switch (module_kind) {
363  return read_llvm_module_from_bc_file(source, getContext());
364  }
366  return read_llvm_module_from_ir_file(source, getContext(), false);
367  }
369  return read_llvm_module_from_ir_file(source, getContext(), true);
370  }
372  return read_llvm_module_from_ir_string(source, getContext(), false);
373  }
375  return read_llvm_module_from_ir_string(source, getContext(), true);
376  }
377  default: {
378  UNREACHABLE();
379  return std::unique_ptr<llvm::Module>();
380  }
381  }
382  };
383  auto update_module = [&](Executor::ExtModuleKinds module_kind,
384  bool erase_not_found = false) {
385  auto it = Executor::extension_module_sources.find(module_kind);
386  if (it != Executor::extension_module_sources.end()) {
387  auto llvm_module = read_module(module_kind, it->second);
388  if (llvm_module) {
389  extension_modules_[module_kind] = std::move(llvm_module);
390  } else if (erase_not_found) {
391  extension_modules_.erase(module_kind);
392  } else {
393  if (extension_modules_.find(module_kind) == extension_modules_.end()) {
394  LOG(WARNING) << "Failed to update " << ::toString(module_kind)
395  << " LLVM module. The module will be unavailable.";
396  } else {
397  LOG(WARNING) << "Failed to update " << ::toString(module_kind)
398  << " LLVM module. Using the existing module.";
399  }
400  }
401  } else {
402  if (erase_not_found) {
403  extension_modules_.erase(module_kind);
404  } else {
405  if (extension_modules_.find(module_kind) == extension_modules_.end()) {
406  LOG(WARNING) << "Source of " << ::toString(module_kind)
407  << " LLVM module is unavailable. The module will be unavailable.";
408  } else {
409  LOG(WARNING) << "Source of " << ::toString(module_kind)
410  << " LLVM module is unavailable. Using the existing module.";
411  }
412  }
413  }
414  };
415 
416  if (!update_runtime_modules_only) {
417  // required compile-time modules, their requirements are enforced
418  // by Executor::initialize_extension_module_sources():
420 #ifdef ENABLE_GEOS
422 #endif
423  // load-time modules, these are optional:
424  update_module(Executor::ExtModuleKinds::udf_cpu_module, true);
425 #ifdef HAVE_CUDA
426  update_module(Executor::ExtModuleKinds::udf_gpu_module, true);
428 #endif
429  }
430  // run-time modules, these are optional and erasable:
431  update_module(Executor::ExtModuleKinds::rt_udf_cpu_module, true);
432 #ifdef HAVE_CUDA
433  update_module(Executor::ExtModuleKinds::rt_udf_gpu_module, true);
434 #endif
435 }
ExtModuleKinds
Definition: Execute.h:518
#define LOG(tag)
Definition: Logger.h:285
#define UNREACHABLE()
Definition: Logger.h:338
std::unique_ptr< llvm::Module > read_llvm_module_from_ir_string(const std::string &udf_ir_string, llvm::LLVMContext &ctx, bool is_gpu=false)
std::unique_ptr< llvm::Module > read_llvm_module_from_ir_file(const std::string &udf_ir_filename, llvm::LLVMContext &ctx, bool is_gpu=false)
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:528
#define CHECK(condition)
Definition: Logger.h:291
llvm::LLVMContext & getContext()
Definition: Execute.h:1417
std::unique_ptr< llvm::Module > read_llvm_module_from_bc_file(const std::string &udf_ir_filename, llvm::LLVMContext &ctx)
std::map< ExtModuleKinds, std::unique_ptr< llvm::Module > > extension_modules_
Definition: Execute.h:1517

+ Here is the call graph for this function:

bool Executor::updateQuerySessionExecutorAssignment ( const QuerySessionId query_session,
const std::string &  submitted_time_str,
const size_t  executor_id,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 5184 of file Execute.cpp.

References queries_session_map_.

Referenced by attachExecutorToQuerySession().

5188  {
5189  // update the executor id of the query session
5190  if (query_session.empty()) {
5191  return false;
5192  }
5193  if (queries_session_map_.count(query_session)) {
5194  auto storage = queries_session_map_.at(query_session);
5195  for (auto it = storage.begin(); it != storage.end(); it++) {
5196  auto target_submitted_t_str = it->second.getQuerySubmittedTime();
5197  // no time difference --> found the target query status
5198  if (submitted_time_str.compare(target_submitted_t_str) == 0) {
5199  queries_session_map_.at(query_session)
5200  .at(submitted_time_str)
5201  .setExecutorId(executor_id);
5202  return true;
5203  }
5204  }
5205  }
5206  return false;
5207 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580

+ Here is the caller graph for this function:

void Executor::updateQuerySessionStatus ( const QuerySessionId query_session,
const std::string &  submitted_time_str,
const QuerySessionStatus::QueryStatus  new_query_status 
)

Definition at line 5075 of file Execute.cpp.

References current_query_session_, executor_session_mutex_, and updateQuerySessionStatusWithLock().

Referenced by executeWorkUnitImpl().

5078  {
5079  // update the running query session's the current status
5081  if (query_session.empty()) {
5082  return;
5083  }
5084  if (new_query_status == QuerySessionStatus::QueryStatus::RUNNING_QUERY_KERNEL) {
5085  current_query_session_ = query_session;
5086  }
5088  query_session, submitted_time_str, new_query_status, session_write_lock);
5089 }
QuerySessionId current_query_session_
Definition: Execute.h:1576
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5158
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::updateQuerySessionStatusWithLock ( const QuerySessionId query_session,
const std::string &  submitted_time_str,
const QuerySessionStatus::QueryStatus  updated_query_status,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 5158 of file Execute.cpp.

References queries_session_map_.

Referenced by attachExecutorToQuerySession(), and updateQuerySessionStatus().

5162  {
5163  // an internal API that updates query session status
5164  if (query_session.empty()) {
5165  return false;
5166  }
5167  if (queries_session_map_.count(query_session)) {
5168  for (auto& query_status : queries_session_map_.at(query_session)) {
5169  auto target_submitted_t_str = query_status.second.getQuerySubmittedTime();
5170  // no time difference --> found the target query status
5171  if (submitted_time_str.compare(target_submitted_t_str) == 0) {
5172  auto prev_status = query_status.second.getQueryStatus();
5173  if (prev_status == updated_query_status) {
5174  return false;
5175  }
5176  query_status.second.setQueryStatus(updated_query_status);
5177  return true;
5178  }
5179  }
5180  }
5181  return false;
5182 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580

+ Here is the caller graph for this function:

int8_t Executor::warpSize ( ) const

Definition at line 4344 of file Execute.cpp.

References CHECK, cudaMgr(), and CudaMgr_Namespace::CudaMgr::getAllDeviceProperties().

4344  {
4345  const auto& dev_props = cudaMgr()->getAllDeviceProperties();
4346  CHECK(!dev_props.empty());
4347  return dev_props.front().warpSize;
4348 }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:865
#define CHECK(condition)
Definition: Logger.h:291
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:134

+ Here is the call graph for this function:

Friends And Related Function Documentation

friend class BaselineJoinHashTable
friend

Definition at line 1630 of file Execute.h.

friend class BoundingBoxIntersectJoinHashTable
friend

Definition at line 1637 of file Execute.h.

friend class CodeGenerator
friend

Definition at line 1631 of file Execute.h.

friend class ColumnFetcher
friend

Definition at line 1632 of file Execute.h.

friend struct DiamondCodegen
friend

Definition at line 1633 of file Execute.h.

friend class ExecutionKernel
friend

Definition at line 1634 of file Execute.h.

friend class GroupByAndAggregate
friend

Definition at line 1639 of file Execute.h.

friend class HashJoin
friend

Definition at line 1636 of file Execute.h.

friend class InValuesBitmap
friend

Definition at line 1646 of file Execute.h.

friend class KernelSubtask
friend

Definition at line 1635 of file Execute.h.

friend class LeafAggregator
friend

Definition at line 1648 of file Execute.h.

friend class PendingExecutionClosure
friend

Definition at line 1651 of file Execute.h.

friend class PerfectJoinHashTable
friend

Definition at line 1649 of file Execute.h.

friend class QueryCompilationDescriptor
friend

Definition at line 1640 of file Execute.h.

friend class QueryExecutionContext
friend

Definition at line 1644 of file Execute.h.

friend class QueryFragmentDescriptor
friend

Definition at line 1643 of file Execute.h.

friend class QueryMemoryDescriptor
friend

Definition at line 1641 of file Execute.h.

Referenced by executeWorkUnitImpl(), reduceMultiDeviceResults(), and resultsUnion().

friend class QueryMemoryInitializer
friend

Definition at line 1642 of file Execute.h.

friend class QueryRewriter
friend

Definition at line 1650 of file Execute.h.

friend class RangeJoinHashTable
friend

Definition at line 1638 of file Execute.h.

friend class RelAlgExecutor
friend

Definition at line 1652 of file Execute.h.

friend class ResultSet
friend

Definition at line 1645 of file Execute.h.

friend class StringDictionaryTranslationMgr
friend

Definition at line 1647 of file Execute.h.

friend class TableFunctionCompilationContext
friend

Definition at line 1654 of file Execute.h.

friend class TableFunctionExecutionContext
friend

Definition at line 1655 of file Execute.h.

friend class TableOptimizer
friend

Definition at line 1653 of file Execute.h.

friend struct TargetExprCodegen
friend

Definition at line 1657 of file Execute.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 1656 of file Execute.h.

friend class WindowProjectNodeContext
friend

Definition at line 1658 of file Execute.h.

Member Data Documentation

WindowFunctionContext* Executor::active_window_function_ {nullptr}
private

Definition at line 1569 of file Execute.h.

AggregatedColRange Executor::agg_col_range_cache_
private
const size_t Executor::auto_cpu_mem_bytes {size_t(0)}
static

Definition at line 1626 of file Execute.h.

Referenced by DBHandler::init_executor_resource_mgr().

const size_t Executor::auto_num_threads {size_t(0)}
staticprivate

Definition at line 1536 of file Execute.h.

Referenced by launchKernelsImpl(), and launchKernelsLocked().

const size_t Executor::baseline_threshold
staticprivate
Initial value:
{
1000000}

Definition at line 1549 of file Execute.h.

Referenced by getBaselineThreshold(), and ResultSet::sort().

unsigned Executor::block_size_x_
private

Definition at line 1552 of file Execute.h.

Referenced by blockSize(), resetBlockSize(), and setBlockSize().

std::unordered_map< CardinalityCacheKey, size_t > Executor::cardinality_cache_
staticprivate
std::mutex Executor::compilation_mutex_

Definition at line 1618 of file Execute.h.

int64_t Executor::compilation_queue_time_ms_ = 0
private

Definition at line 1563 of file Execute.h.

Referenced by executeWorkUnit(), and nukeOldState().

std::unique_ptr<llvm::LLVMContext> Executor::context_
private

Definition at line 1477 of file Execute.h.

Referenced by getContext().

Data_Namespace::DataMgr* Executor::data_mgr_
private
const std::string Executor::debug_dir_
private

Definition at line 1555 of file Execute.h.

const std::string Executor::debug_file_
private

Definition at line 1556 of file Execute.h.

heavyai::shared_mutex Executor::execute_mutex_
staticprivate
std::map< int, std::shared_ptr< Executor > > Executor::executors_
staticprivate
heavyai::shared_mutex Executor::executors_cache_mutex_
staticprivate

Definition at line 1602 of file Execute.h.

Referenced by getExecutor(), nukeCacheOfExecutors(), and registerExtensionFunctions().

std::map< Executor::ExtModuleKinds, std::string > Executor::extension_module_sources
static
std::map<ExtModuleKinds, std::unique_ptr<llvm::Module> > Executor::extension_modules_
private

Definition at line 1517 of file Execute.h.

Referenced by get_extension_module(), and has_extension_module().

void * Executor::gpu_active_modules_
staticprivate

Definition at line 1541 of file Execute.h.

uint32_t Executor::gpu_active_modules_device_mask_ {0x0}
staticprivate

Definition at line 1540 of file Execute.h.

std::mutex Executor::gpu_active_modules_mutex_
staticprivate

Definition at line 1539 of file Execute.h.

std::mutex Executor::gpu_exec_mutex_[max_gpu_count]
private

Definition at line 1537 of file Execute.h.

unsigned Executor::grid_size_x_
private

Definition at line 1553 of file Execute.h.

Referenced by gridSize(), numBlocksPerMP(), resetGridSize(), and setGridSize().

InputTableInfoCache Executor::input_table_info_cache_
mutableprivate

Definition at line 1571 of file Execute.h.

Referenced by clearMetaInfoCache(), and getTableInfo().

std::atomic<bool> Executor::interrupted_ {false}
private
constexpr ExecutorId Executor::INVALID_EXECUTOR_ID = SIZE_MAX
static

Definition at line 424 of file Execute.h.

Referenced by CgenState::getExecutor().

std::mutex Executor::kernel_mutex_
static

Definition at line 1624 of file Execute.h.

Referenced by executeWorkUnitPerFragment(), and launchKernelsLocked().

int64_t Executor::kernel_queue_time_ms_ = 0
private
QueryPlanDAG Executor::latest_query_plan_extracted_ {EMPTY_QUERY_PLAN}
staticprivate

Definition at line 1612 of file Execute.h.

Referenced by getLatestQueryPlanDagExtracted(), and registerExtractedQueryPlanDag().

int const Executor::max_gpu_count
staticprivate

Definition at line 1535 of file Execute.h.

Referenced by ExecutionKernel::runImpl().

const size_t Executor::max_gpu_slab_size_
private

Definition at line 1554 of file Execute.h.

Referenced by maxGpuSlabSize().

std::unique_ptr<llvm::TargetMachine> Executor::nvptx_target_machine_
mutableprivate

Definition at line 1547 of file Execute.h.

QueryPlanDagCache Executor::query_plan_dag_cache_
staticprivate

Definition at line 1604 of file Execute.h.

Referenced by getQueryPlanDagCache().

std::mutex Executor::register_runtime_extension_functions_mutex_
static

Definition at line 1623 of file Execute.h.

Referenced by get_rt_udf_module(), and registerExtensionFunctions().

ResultSetRecyclerHolder Executor::resultset_recycler_holder_
staticprivate

Definition at line 1608 of file Execute.h.

Referenced by getResultSetRecyclerHolder().

std::shared_ptr<RowSetMemoryOwner> Executor::row_set_mem_owner_
private
std::mutex Executor::str_dict_mutex_
mutableprivate
TableGenerations Executor::table_generations_
private

Definition at line 1573 of file Execute.h.

Referenced by clearMetaInfoCache(), dumpCache(), getTableGeneration(), and setupCaching().

TableIdToNodeMap Executor::table_id_to_node_map_
private

Definition at line 1560 of file Execute.h.

const TemporaryTables* Executor::temporary_tables_
private

Definition at line 1559 of file Execute.h.

Referenced by getTemporaryTables().

constexpr ExecutorId Executor::UNITARY_EXECUTOR_ID = 0
static

Definition at line 423 of file Execute.h.

Referenced by acquireExecuteMutex(), checkNonKernelTimeInterrupted(), Parser::OptimizeTableStmt::execute(), Parser::CopyTableStmt::execute(), Parser::InsertValuesStmt::execute(), DBHandler::execute_rel_alg(), QueryRunner::QueryRunner::extractQueryPlanDag(), DBHandler::get_queries_info(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), QueryRunner::QueryRunner::getExecutor(), Parser::LocalQueryConnector::getOuterFragmentCount(), QueryRunner::QueryRunner::getParsedGlobalQueryHints(), QueryRunner::QueryRunner::getParsedQueryHint(), QueryRunner::QueryRunner::getParsedQueryHints(), DBHandler::getQueries(), QueryRunner::QueryRunner::getQueryInfoForDataRecyclerTest(), QueryRunner::QueryRunner::getRaExecutionSequence(), QueryRunner::QueryRunner::getRootNodeFromParsedQuery(), DBHandler::import_table(), import_export::Importer::importDelimited(), import_export::Importer::importGDALGeo(), import_export::Importer::importGDALRaster(), DBHandler::importGeoTableSingle(), DBHandler::interrupt(), DBHandler::interruptQuery(), DBHandler::invalidate_cur_session(), anonymous_namespace{DBHandler.cpp}::log_cache_size(), migrations::MigrationMgr::migrateDateInDaysMetadata(), Parser::InsertIntoTableAsSelectStmt::populateData(), Parser::LocalQueryConnector::query(), QueryRunner::anonymous_namespace{QueryRunner.cpp}::run_select_query_with_filter_push_down(), QueryRunner::QueryRunner::runSQLWithAllowingInterrupt(), DBHandler::set_cur_session(), DBHandler::sql_execute_impl(), and anonymous_namespace{DdlCommandExecutor.cpp}::vacuum_table_if_required().

std::unique_ptr<WindowProjectNodeContext> Executor::window_project_node_context_owned_
private

Definition at line 1567 of file Execute.h.


The documentation for this class was generated from the following files: