OmniSciDB
a5dc49c757
|
#include <QueryExecutionContext.h>
Public Member Functions | |
QueryExecutionContext (const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) | |
ResultSetPtr | getRowSet (const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc) const |
ResultSetPtr | groupBufferToResults (const size_t i) const |
std::vector< int64_t * > | launchGpuCode (const RelAlgExecutionUnit &ra_exe_unit, const CompilationContext *compilation_context, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const size_t shared_memory_size, int32_t *error_code, const uint32_t num_tables, const bool allow_runtime_interrupt, const std::vector< int8_t * > &join_hash_tables, RenderAllocatorMap *render_allocator_map, bool optimize_cuda_block_and_grid_sizes) |
std::vector< int64_t * > | launchCpuCode (const RelAlgExecutionUnit &ra_exe_unit, const CpuCompilationContext *fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t start_rowid, const uint32_t num_tables, const std::vector< int8_t * > &join_hash_tables, const int64_t num_rows_to_process=-1) |
int64_t | getAggInitValForIndex (const size_t index) const |
Private Types | |
enum | { ERROR_CODE, TOTAL_MATCHED, GROUPBY_BUF, NUM_FRAGMENTS, NUM_TABLES, ROW_INDEX_RESUME, COL_BUFFERS, LITERALS, NUM_ROWS, FRAG_ROW_OFFSETS, MAX_MATCHED, INIT_AGG_VALS, JOIN_HASH_TABLES, ROW_FUNC_MGR, KERN_PARAM_COUNT } |
using | KernelParamSizes = std::array< size_t, KERN_PARAM_COUNT > |
using | KernelParams = std::array< int8_t *, KERN_PARAM_COUNT > |
Private Member Functions | |
size_t | sizeofColBuffers (std::vector< std::vector< int8_t const * >> const &col_buffers) const |
void | copyColBuffersToDevice (int8_t *device_ptr, std::vector< std::vector< int8_t const * >> const &col_buffers) const |
template<typename T > | |
size_t | sizeofFlattened2dVec (uint32_t const expected_subvector_size, std::vector< std::vector< T >> const &vec2d) const |
template<typename T > | |
void | copyFlattened2dVecToDevice (int8_t *device_ptr, uint32_t const expected_subvector_size, std::vector< std::vector< T >> const &vec2d) const |
size_t | sizeofInitAggVals (bool const is_group_by, std::vector< int64_t > const &init_agg_vals) const |
void | copyInitAggValsToDevice (int8_t *device_ptr, bool const is_group_by, std::vector< int64_t > const &init_agg_vals) const |
size_t | sizeofJoinHashTables (std::vector< int8_t * > const &join_hash_tables) const |
int8_t * | copyJoinHashTablesToDevice (int8_t *device_ptr, std::vector< int8_t * > const &join_hash_tables) const |
size_t | sizeofLiterals (std::vector< int8_t > const &literal_buff) const |
int8_t * | copyLiteralsToDevice (int8_t *device_ptr, std::vector< int8_t > const &literal_buff) const |
template<typename T > | |
void | copyValueToDevice (int8_t *device_ptr, T const value) const |
template<typename T > | |
size_t | sizeofVector (std::vector< T > const &vec) const |
template<typename T > | |
void | copyVectorToDevice (int8_t *device_ptr, std::vector< T > const &vec) const |
KernelParams | prepareKernelParams (const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< int8_t > &literal_buff, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, const int32_t scan_limit, const std::vector< int64_t > &init_agg_vals, const std::vector< int32_t > &error_codes, const uint32_t num_tables, const std::vector< int8_t * > &join_hash_tables, Data_Namespace::DataMgr *data_mgr, const int device_id, const bool hoist_literals, const bool is_group_by) const |
ResultSetPtr | groupBufferToDeinterleavedResults (const size_t i) const |
Private Attributes | |
std::unique_ptr< DeviceAllocator > | gpu_allocator_ |
QueryMemoryDescriptor | query_mem_desc_ |
const Executor * | executor_ |
const ExecutorDeviceType | device_type_ |
const ExecutorDispatchMode | dispatch_mode_ |
std::shared_ptr < RowSetMemoryOwner > | row_set_mem_owner_ |
const bool | output_columnar_ |
std::unique_ptr < QueryMemoryInitializer > | query_buffers_ |
std::unique_ptr< ResultSet > | estimator_result_set_ |
Friends | |
class | Executor |
Definition at line 38 of file QueryExecutionContext.h.
|
private |
Definition at line 120 of file QueryExecutionContext.h.
|
private |
Definition at line 119 of file QueryExecutionContext.h.
|
private |
Definition at line 102 of file QueryExecutionContext.h.
QueryExecutionContext::QueryExecutionContext | ( | const RelAlgExecutionUnit & | ra_exe_unit, |
const QueryMemoryDescriptor & | query_mem_desc, | ||
const Executor * | executor, | ||
const ExecutorDeviceType | device_type, | ||
const ExecutorDispatchMode | dispatch_mode, | ||
const int | device_id, | ||
const shared::TableKey & | outer_table_key, | ||
const int64_t | num_rows, | ||
const std::vector< std::vector< const int8_t * >> & | col_buffers, | ||
const std::vector< std::vector< uint64_t >> & | frag_offsets, | ||
std::shared_ptr< RowSetMemoryOwner > | row_set_mem_owner, | ||
const bool | output_columnar, | ||
const bool | sort_on_gpu, | ||
const size_t | thread_idx, | ||
RenderInfo * | render_info | ||
) |
Definition at line 33 of file QueryExecutionContext.cpp.
References CHECK, getQueryEngineCudaStreamForDevice(), GPU, gpu_allocator_, heavyai::InSituFlagsOwnerInterface::isInSitu(), query_buffers_, query_mem_desc, RenderInfo::render_allocator_map_ptr, and sort_on_gpu().
|
private |
Definition at line 722 of file QueryExecutionContext.cpp.
References CHECK_EQ, and gpu_allocator_.
Referenced by prepareKernelParams().
|
private |
Definition at line 750 of file QueryExecutionContext.cpp.
References CHECK_EQ, gpu_allocator_, and heavydb.dtypes::T.
Referenced by prepareKernelParams().
|
private |
Definition at line 772 of file QueryExecutionContext.cpp.
References compact_init_vals(), copyVectorToDevice(), QueryMemoryDescriptor::getColsSize(), output_columnar_, and query_mem_desc_.
Referenced by prepareKernelParams().
|
private |
Definition at line 789 of file QueryExecutionContext.cpp.
References copyVectorToDevice().
Referenced by prepareKernelParams().
|
private |
Definition at line 812 of file QueryExecutionContext.cpp.
References CHECK, CHECK_EQ, gpu_allocator_, and query_buffers_.
Referenced by prepareKernelParams().
|
private |
Definition at line 843 of file QueryExecutionContext.cpp.
References gpu_allocator_, and heavydb.dtypes::T.
Referenced by prepareKernelParams().
|
private |
Definition at line 852 of file QueryExecutionContext.cpp.
References gpu_allocator_, and heavydb.dtypes::T.
Referenced by copyInitAggValsToDevice(), copyJoinHashTablesToDevice(), and prepareKernelParams().
int64_t QueryExecutionContext::getAggInitValForIndex | ( | const size_t | index | ) | const |
Definition at line 153 of file QueryExecutionContext.cpp.
References CHECK, and query_buffers_.
Referenced by Executor::executePlanWithoutGroupBy().
ResultSetPtr QueryExecutionContext::getRowSet | ( | const RelAlgExecutionUnit & | ra_exe_unit, |
const QueryMemoryDescriptor & | query_mem_desc | ||
) | const |
Definition at line 158 of file QueryExecutionContext.cpp.
References CHECK, CHECK_EQ, CPU, DEBUG_TIMER, device_type_, executor_, GPU, groupBufferToResults(), QueryMemoryDescriptor::hasVarlenOutput(), query_buffers_, query_mem_desc_, row_set_mem_owner_, and QueryMemoryDescriptor::threadsShareMemory().
Referenced by Executor::executePlanWithGroupBy().
|
private |
Definition at line 84 of file QueryExecutionContext.cpp.
References CHECK, CPU, executor_, ResultSet::fixupQueryMemoryDescriptor(), g_enable_non_kernel_time_query_interrupt, QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOffInBytesInNextBin(), QueryMemoryDescriptor::getSlotCount(), output_columnar_, query_buffers_, query_mem_desc_, ResultSetStorage::reduceSingleRow(), row_set_mem_owner_, and UNLIKELY.
Referenced by groupBufferToResults().
ResultSetPtr QueryExecutionContext::groupBufferToResults | ( | const size_t | i | ) | const |
Definition at line 181 of file QueryExecutionContext.cpp.
References device_type_, groupBufferToDeinterleavedResults(), QueryMemoryDescriptor::interleavedBins(), query_buffers_, and query_mem_desc_.
Referenced by getRowSet().
std::vector< int64_t * > QueryExecutionContext::launchCpuCode | ( | const RelAlgExecutionUnit & | ra_exe_unit, |
const CpuCompilationContext * | fn_ptrs, | ||
const bool | hoist_literals, | ||
const std::vector< int8_t > & | literal_buff, | ||
std::vector< std::vector< const int8_t * >> | col_buffers, | ||
const std::vector< std::vector< int64_t >> & | num_rows, | ||
const std::vector< std::vector< uint64_t >> & | frag_row_offsets, | ||
const int32_t | scan_limit, | ||
int32_t * | error_code, | ||
const uint32_t | start_rowid, | ||
const uint32_t | num_tables, | ||
const std::vector< int8_t * > & | join_hash_tables, | ||
const int64_t | num_rows_to_process = -1 |
||
) |
Definition at line 563 of file QueryExecutionContext.cpp.
References align_to_int64(), CpuCompilationContext::call(), CHECK, CHECK_EQ, compact_init_vals(), CPU, DEBUG_TIMER, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, estimator_result_set_, executor_, QueryMemoryDescriptor::getColsSize(), QueryMemoryDescriptor::getQueryDescriptionType(), INJECT_TIMER, QueryMemoryDescriptor::isGroupBy(), CpuCompilationContext::name(), foreign_storage::num_rows_to_process(), heavyai::Projection, query_buffers_, query_mem_desc_, QueryMemoryDescriptor::useStreamingTopN(), and VLOG.
Referenced by Executor::executePlanWithGroupBy(), and Executor::executePlanWithoutGroupBy().
std::vector< int64_t * > QueryExecutionContext::launchGpuCode | ( | const RelAlgExecutionUnit & | ra_exe_unit, |
const CompilationContext * | compilation_context, | ||
const bool | hoist_literals, | ||
const std::vector< int8_t > & | literal_buff, | ||
std::vector< std::vector< const int8_t * >> | col_buffers, | ||
const std::vector< std::vector< int64_t >> & | num_rows, | ||
const std::vector< std::vector< uint64_t >> & | frag_row_offsets, | ||
const int32_t | scan_limit, | ||
Data_Namespace::DataMgr * | data_mgr, | ||
const unsigned | block_size_x, | ||
const unsigned | grid_size_x, | ||
const int | device_id, | ||
const size_t | shared_memory_size, | ||
int32_t * | error_code, | ||
const uint32_t | num_tables, | ||
const bool | allow_runtime_interrupt, | ||
const std::vector< int8_t * > & | join_hash_tables, | ||
RenderAllocatorMap * | render_allocator_map, | ||
bool | optimize_cuda_block_and_grid_sizes | ||
) |
Definition at line 207 of file QueryExecutionContext.cpp.
References anonymous_namespace{QueryExecutionContext.cpp}::aggregate_error_codes(), CHECK, CHECK_EQ, create_device_kernel(), DEBUG_TIMER, QueryMemoryDescriptor::didOutputColumnar(), dispatch_mode_, ERROR_CODE, RelAlgExecutionUnit::estimator, estimator_result_set_, executor_, g_dynamic_watchdog_time_limit, g_enable_dynamic_watchdog, get_num_allocated_rows_from_gpu(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), RenderAllocatorMap::getRenderAllocator(), GPU, gpu_allocator_, GROUPBY_BUF, QueryMemoryDescriptor::hasKeylessHash(), INIT_AGG_VALS, INJECT_TIMER, inplace_sort_gpu(), QueryMemoryDescriptor::isGroupBy(), KERN_PARAM_COUNT, LITERALS, MAX_MATCHED, SortInfo::order_entries, output_columnar_, prepareKernelParams(), heavyai::Projection, query_buffers_, query_mem_desc_, RelAlgExecutionUnit::sort_info, QueryMemoryDescriptor::sortOnGpu(), to_string(), TOTAL_MATCHED, RelAlgExecutionUnit::use_bump_allocator, use_speculative_top_n(), QueryMemoryDescriptor::useStreamingTopN(), QueryMemoryDescriptor::varlenOutputBufferElemSize(), and VLOG.
Referenced by Executor::executePlanWithGroupBy(), and Executor::executePlanWithoutGroupBy().
|
private |
Definition at line 857 of file QueryExecutionContext.cpp.
References gpu_enabled::accumulate(), CHECK, CHECK_EQ, COL_BUFFERS, copyColBuffersToDevice(), copyFlattened2dVecToDevice(), copyInitAggValsToDevice(), copyJoinHashTablesToDevice(), copyLiteralsToDevice(), copyValueToDevice(), copyVectorToDevice(), ERROR_CODE, FRAG_ROW_OFFSETS, gpu_allocator_, GROUPBY_BUF, INIT_AGG_VALS, JOIN_HASH_TABLES, LITERALS, MAX_MATCHED, NUM_FRAGMENTS, NUM_ROWS, NUM_TABLES, report::params, ROW_FUNC_MGR, ROW_INDEX_RESUME, sizeofColBuffers(), sizeofFlattened2dVec(), sizeofInitAggVals(), sizeofJoinHashTables(), sizeofLiterals(), sizeofVector(), and TOTAL_MATCHED.
Referenced by launchGpuCode().
|
private |
Definition at line 711 of file QueryExecutionContext.cpp.
Referenced by prepareKernelParams().
|
private |
Definition at line 744 of file QueryExecutionContext.cpp.
References heavydb.dtypes::T.
Referenced by prepareKernelParams().
|
private |
Definition at line 762 of file QueryExecutionContext.cpp.
References QueryMemoryDescriptor::getColsSize(), output_columnar_, and query_mem_desc_.
Referenced by prepareKernelParams().
|
private |
Definition at line 785 of file QueryExecutionContext.cpp.
Referenced by prepareKernelParams().
|
private |
Definition at line 803 of file QueryExecutionContext.cpp.
References query_buffers_.
Referenced by prepareKernelParams().
|
private |
Definition at line 848 of file QueryExecutionContext.cpp.
References heavydb.dtypes::T.
Referenced by prepareKernelParams().
|
friend |
Definition at line 187 of file QueryExecutionContext.h.
|
private |
Definition at line 180 of file QueryExecutionContext.h.
Referenced by getRowSet(), and groupBufferToResults().
|
private |
Definition at line 181 of file QueryExecutionContext.h.
Referenced by launchGpuCode().
|
mutableprivate |
Definition at line 185 of file QueryExecutionContext.h.
Referenced by Executor::executePlanWithoutGroupBy(), launchCpuCode(), and launchGpuCode().
|
private |
Definition at line 179 of file QueryExecutionContext.h.
Referenced by getRowSet(), groupBufferToDeinterleavedResults(), launchCpuCode(), and launchGpuCode().
|
private |
Definition at line 175 of file QueryExecutionContext.h.
Referenced by copyColBuffersToDevice(), copyFlattened2dVecToDevice(), copyLiteralsToDevice(), copyValueToDevice(), copyVectorToDevice(), launchGpuCode(), prepareKernelParams(), and QueryExecutionContext().
|
private |
Definition at line 183 of file QueryExecutionContext.h.
Referenced by copyInitAggValsToDevice(), groupBufferToDeinterleavedResults(), launchGpuCode(), and sizeofInitAggVals().
|
private |
Definition at line 184 of file QueryExecutionContext.h.
Referenced by copyLiteralsToDevice(), Executor::executePlanWithGroupBy(), Executor::executePlanWithoutGroupBy(), getAggInitValForIndex(), getRowSet(), groupBufferToDeinterleavedResults(), groupBufferToResults(), launchCpuCode(), launchGpuCode(), QueryExecutionContext(), and sizeofLiterals().
|
private |
Definition at line 178 of file QueryExecutionContext.h.
Referenced by copyInitAggValsToDevice(), Executor::executePlanWithGroupBy(), Executor::executePlanWithoutGroupBy(), getRowSet(), groupBufferToDeinterleavedResults(), groupBufferToResults(), launchCpuCode(), launchGpuCode(), and sizeofInitAggVals().
|
private |
Definition at line 182 of file QueryExecutionContext.h.
Referenced by getRowSet(), and groupBufferToDeinterleavedResults().