17 #ifndef QUERYENGINE_QUERYEXECUTIONCONTEXT_H
18 #define QUERYENGINE_QUERYEXECUTIONCONTEXT_H
27 #include <boost/core/noncopyable.hpp>
48 const int64_t num_rows,
49 const std::vector<std::vector<const int8_t*>>& col_buffers,
50 const std::vector<std::vector<uint64_t>>& frag_offsets,
51 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
52 const bool output_columnar,
54 const size_t thread_idx,
65 const bool hoist_literals,
66 const std::vector<int8_t>& literal_buff,
67 std::vector<std::vector<const int8_t*>> col_buffers,
68 const std::vector<std::vector<int64_t>>& num_rows,
69 const std::vector<std::vector<uint64_t>>& frag_row_offsets,
70 const int32_t scan_limit,
72 const unsigned block_size_x,
73 const unsigned grid_size_x,
75 const size_t shared_memory_size,
77 const uint32_t num_tables,
78 const bool allow_runtime_interrupt,
79 const std::vector<int8_t*>& join_hash_tables,
81 bool optimize_cuda_block_and_grid_sizes);
86 const bool hoist_literals,
87 const std::vector<int8_t>& literal_buff,
88 std::vector<std::vector<const int8_t*>> col_buffers,
89 const std::vector<std::vector<int64_t>>& num_rows,
90 const std::vector<std::vector<uint64_t>>& frag_row_offsets,
91 const int32_t scan_limit,
93 const uint32_t start_rowid,
94 const uint32_t num_tables,
95 const std::vector<int8_t*>& join_hash_tables,
123 std::vector<std::vector<int8_t const*>>
const& col_buffers)
const;
126 std::vector<std::vector<int8_t const*>>
const& col_buffers)
const;
128 template <
typename T>
130 std::vector<std::vector<T>>
const& vec2d)
const;
131 template <
typename T>
133 uint32_t
const expected_subvector_size,
134 std::vector<std::vector<T>>
const& vec2d)
const;
137 std::vector<int64_t>
const& init_agg_vals)
const;
139 bool const is_group_by,
140 std::vector<int64_t>
const& init_agg_vals)
const;
144 std::vector<int8_t*>
const& join_hash_tables)
const;
146 size_t sizeofLiterals(std::vector<int8_t>
const& literal_buff)
const;
148 std::vector<int8_t>
const& literal_buff)
const;
150 template <
typename T>
153 template <
typename T>
155 template <
typename T>
159 const std::vector<std::vector<const int8_t*>>& col_buffers,
160 const std::vector<int8_t>& literal_buff,
161 const std::vector<std::vector<int64_t>>& num_rows,
162 const std::vector<std::vector<uint64_t>>& frag_offsets,
163 const int32_t scan_limit,
164 const std::vector<int64_t>& init_agg_vals,
165 const std::vector<int32_t>& error_codes,
166 const uint32_t num_tables,
167 const std::vector<int8_t*>& join_hash_tables,
170 const bool hoist_literals,
171 const bool is_group_by)
const;
190 #endif // QUERYENGINE_QUERYEXECUTIONCONTEXT_H
const Executor * executor_
int8_t * copyJoinHashTablesToDevice(int8_t *device_ptr, std::vector< int8_t * > const &join_hash_tables) const
std::unique_ptr< DeviceAllocator > gpu_allocator_
QueryExecutionContext(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *)
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const CpuCompilationContext *fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t start_rowid, const uint32_t num_tables, const std::vector< int8_t * > &join_hash_tables, const int64_t num_rows_to_process=-1)
const ExecutorDispatchMode dispatch_mode_
size_t num_rows_to_process(const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining)
std::shared_ptr< ResultSet > ResultSetPtr
std::array< int8_t *, KERN_PARAM_COUNT > KernelParams
void copyColBuffersToDevice(int8_t *device_ptr, std::vector< std::vector< int8_t const * >> const &col_buffers) const
const ExecutorDeviceType device_type_
const bool output_columnar_
std::unique_ptr< QueryMemoryInitializer > query_buffers_
ResultSetPtr getRowSet(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc) const
int8_t * copyLiteralsToDevice(int8_t *device_ptr, std::vector< int8_t > const &literal_buff) const
void copyValueToDevice(int8_t *device_ptr, T const value) const
size_t sizeofFlattened2dVec(uint32_t const expected_subvector_size, std::vector< std::vector< T >> const &vec2d) const
void copyFlattened2dVecToDevice(int8_t *device_ptr, uint32_t const expected_subvector_size, std::vector< std::vector< T >> const &vec2d) const
int64_t getAggInitValForIndex(const size_t index) const
size_t sizeofInitAggVals(bool const is_group_by, std::vector< int64_t > const &init_agg_vals) const
void copyVectorToDevice(int8_t *device_ptr, std::vector< T > const &vec) const
size_t sizeofJoinHashTables(std::vector< int8_t * > const &join_hash_tables) const
QueryMemoryDescriptor query_mem_desc_
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
ResultSetPtr groupBufferToDeinterleavedResults(const size_t i) const
std::array< size_t, KERN_PARAM_COUNT > KernelParamSizes
ResultSetPtr groupBufferToResults(const size_t i) const
KernelParams prepareKernelParams(const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< int8_t > &literal_buff, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, const int32_t scan_limit, const std::vector< int64_t > &init_agg_vals, const std::vector< int32_t > &error_codes, const uint32_t num_tables, const std::vector< int8_t * > &join_hash_tables, Data_Namespace::DataMgr *data_mgr, const int device_id, const bool hoist_literals, const bool is_group_by) const
void copyInitAggValsToDevice(int8_t *device_ptr, bool const is_group_by, std::vector< int64_t > const &init_agg_vals) const
Basic constructors and methods of the row set interface.
Allocate GPU memory using GpuBuffers via DataMgr.
std::unique_ptr< ResultSet > estimator_result_set_
size_t sizeofVector(std::vector< T > const &vec) const
size_t sizeofColBuffers(std::vector< std::vector< int8_t const * >> const &col_buffers) const
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const CompilationContext *compilation_context, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const size_t shared_memory_size, int32_t *error_code, const uint32_t num_tables, const bool allow_runtime_interrupt, const std::vector< int8_t * > &join_hash_tables, RenderAllocatorMap *render_allocator_map, bool optimize_cuda_block_and_grid_sizes)
size_t sizeofLiterals(std::vector< int8_t > const &literal_buff) const